public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-3218] Add define_insn_and_split to support general version of "kxnor".
@ 2022-10-11  9:23 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2022-10-11  9:23 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:498ad738690b3c464f901d63dcd4d0f49a50dd00

commit r13-3218-g498ad738690b3c464f901d63dcd4d0f49a50dd00
Author: liuhongt <hongtao.liu@intel.com>
Date:   Mon Oct 10 11:31:48 2022 +0800

    Add define_insn_and_split to support general version of "kxnor".
    
    For genereal_reg_operand, it will be splitted into xor + not.
    For mask_reg_operand, it will be splitted with UNSPEC_MASK_OP just
    like what we did for other logic operations.
    
    The patch will optimize xor+not to kxnor when possible.
    
    gcc/ChangeLog:
    
            PR target/107093
            * config/i386/i386.md (*notxor<mode>_1): New post_reload
            define_insn_and_split.
            (*notxorqi_1): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr107093.c: New test.

Diff:
---
 gcc/config/i386/i386.md                  | 71 ++++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr107093.c | 38 +++++++++++++++++
 2 files changed, 109 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 947513701b9..9390dd5be88 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -10826,6 +10826,39 @@
    (set_attr "type" "alu, alu, msklog")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "*notxor<mode>_1"
+  [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
+	(not:SWI248
+	  (xor:SWI248
+	    (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
+	    (match_operand:SWI248 2 "<general_operand>" "r<i>,<m>,k"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 0)
+	  (xor:SWI248 (match_dup 1) (match_dup 2)))
+     (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0)
+	(not:SWI248 (match_dup 0)))]
+{
+  if (MASK_REGNO_P (REGNO (operands[0])))
+    {
+      emit_insn (gen_kxnor<mode> (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}
+  [(set (attr "isa")
+	(cond [(eq_attr "alternative" "2")
+		 (if_then_else (eq_attr "mode" "SI,DI")
+		   (const_string "avx512bw")
+		   (const_string "avx512f"))
+	      ]
+	      (const_string "*")))
+   (set_attr "type" "alu, alu, msklog")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn_and_split "*iordi_1_bts"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 	(ior:DI
@@ -10959,6 +10992,44 @@
 	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
 	   (symbol_ref "true")))])
 
+(define_insn_and_split "*notxorqi_1"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
+	(not:QI
+	  (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
+		  (match_operand:QI 2 "general_operand" "qn,m,rn,k"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (XOR, QImode, operands)"
+  "#"
+  "&& reload_completed"
+  [(parallel
+    [(set (match_dup 0)
+	  (xor:QI (match_dup 1) (match_dup 2)))
+     (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 0)
+	(not:QI (match_dup 0)))]
+{
+  if (mask_reg_operand (operands[0], QImode))
+    {
+      emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+}
+  [(set_attr "isa" "*,*,*,avx512f")
+   (set_attr "type" "alu,alu,alu,msklog")
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "2")
+		 (const_string "SI")
+		(and (eq_attr "alternative" "3")
+		     (match_test "!TARGET_AVX512DQ"))
+		 (const_string "HI")
+	       ]
+	       (const_string "QI")))
+   ;; Potential partial reg stall on alternative 2.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "2")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))])
+
 ;; Alternative 1 is needed to work around LRA limitation, see PR82524.
 (define_insn_and_split "*<code><mode>_1_slp"
   [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>,&<r>"))
diff --git a/gcc/testsuite/gcc.target/i386/pr107093.c b/gcc/testsuite/gcc.target/i386/pr107093.c
new file mode 100644
index 00000000000..23e30cbac0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107093.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times {(?n)kxnor[bwqd]} 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times {(?n)kxnor[bwdq]} 3 { target ia32 } } }  */
+
+#include<immintrin.h>
+
+__m512i
+foo (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask32 k1 = _mm512_cmp_epi16_mask (a, b, 1);
+  __mmask32 k2 = _mm512_cmp_epi16_mask (c, d, 2);
+  return _mm512_mask_mov_epi16 (a, ~(k1 ^ k2), c);
+}
+
+__m512i
+foo1 (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask16 k1 = _mm512_cmp_epi32_mask (a, b, 1);
+  __mmask16 k2 = _mm512_cmp_epi32_mask (c, d, 2);
+  return _mm512_mask_mov_epi32 (a, ~(k1 ^ k2), c);
+}
+
+__m512i
+foo2 (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask64 k1 = _mm512_cmp_epi8_mask (a, b, 1);
+  __mmask64 k2 = _mm512_cmp_epi8_mask (c, d, 2);
+  return _mm512_mask_mov_epi8 (a, ~(k1 ^ k2), c);
+}
+
+__m512i
+foo3 (__m512i a, __m512i b, __m512i c, __m512i d)
+{
+  __mmask8 k1 = _mm512_cmp_epi64_mask (a, b, 1);
+  __mmask8 k2 = _mm512_cmp_epi64_mask (c, d, 2);
+  return _mm512_mask_mov_epi64 (a, ~(k1 ^ k2), c);
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-10-11  9:23 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-11  9:23 [gcc r13-3218] Add define_insn_and_split to support general version of "kxnor" hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).