public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Break false dependence for vpternlog by inserting vpxor.
@ 2023-07-04  2:50 liuhongt
  2023-07-06 15:46 ` simonaytes.yan
  0 siblings, 1 reply; 7+ messages in thread
From: liuhongt @ 2023-07-04  2:50 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools

vpternlog is also used for optimization which doesn't need any valid
input operand, in that case, the destination is used as input in the
instruction and that creates a false dependence.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

	PR target/110438
	* config/i386/predicates.md
	(int_float_vector_all_ones_operand): New predicate.
	* config/i386/sse.md (*vmov<mode>_constm1_pternlog): New
	define_insn.
	(*<avx512>_cvtmask2<ssemodesuffix><mode>): Adjust to
	define_insn_and_split to avoid false dependence.
	(*<avx512>_cvtmask2<ssemodesuffix><mode>_pternlog): New
	define_insn.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110438.c: New test.
---
 gcc/config/i386/predicates.md            |  8 ++-
 gcc/config/i386/sse.md                   | 69 +++++++++++++++++++-----
 gcc/testsuite/gcc.target/i386/pr110438.c | 30 +++++++++++
 3 files changed, 94 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110438.c

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index fb07707dcba..df0d9e20def 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1192,12 +1192,18 @@ (define_predicate "float_vector_all_ones_operand"
     return false;
 })
 
-/* Return true if operand is a vector constant that is all ones. */
+/* Return true if operand is an integral vector constant that is all ones. */
 (define_predicate "vector_all_ones_operand"
   (and (match_code "const_vector")
        (match_test "INTEGRAL_MODE_P (GET_MODE (op))")
        (match_test "op == CONSTM1_RTX (GET_MODE (op))")))
 
+/* Return true if operand is a vector constant that is all ones. */
+(define_predicate "int_float_vector_all_ones_operand"
+  (ior (match_operand 0 "vector_all_ones_operand")
+       (match_operand 0 "float_vector_all_ones_operand")
+       (match_test "op == constm1_rtx")))
+
 /* Return true if operand is an 128/256bit all ones vector
    that zero-extends to 256/512bit.  */
 (define_predicate "vector_all_ones_zero_extend_half_operand"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 812cfca4b92..93cdd844026 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1382,6 +1382,28 @@ (define_insn "mov<mode>_internal"
 	      ]
 	      (symbol_ref "true")))])
 
+; False dependency happens on destination register which is not really
+; used when moving all ones to vector register
+(define_split
+  [(set (match_operand:VMOVE 0 "register_operand")
+	(match_operand:VMOVE 1 "int_float_vector_all_ones_operand"))]
+  "TARGET_AVX512F && reload_completed
+  && (<MODE_SIZE> == 64 || EXT_REX_SSE_REG_P (operands[0]))"
+  [(set (match_dup 0) (match_dup 2))
+   (parallel
+     [(set (match_dup 0) (match_dup 1))
+      (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "operands[2] = CONST0_RTX (<MODE>mode);")
+
+(define_insn "*vmov<mode>_constm1_pternlog"
+  [(set (match_operand:VMOVE 0 "register_operand" "=v")
+	(match_operand:VMOVE 1 "int_float_vector_all_ones_operand" "<sseconstm1>"))
+   (unspec [(match_operand:VMOVE 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP)]
+   "TARGET_AVX512VL || <MODE_SIZE> == 64"
+   "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix" "evex")])
+
 ;; If mem_addr points to a memory region with less than whole vector size bytes
 ;; of accessible memory and k is a mask that would prevent reading the inaccessible
 ;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
@@ -9336,7 +9358,7 @@ (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
     operands[3] = CONST0_RTX (<MODE>mode);
   }")
 
-(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
+(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
 	(vec_merge:VI48_AVX512VL
 	  (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
@@ -9345,12 +9367,35 @@ (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
   "TARGET_AVX512F"
   "@
    vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
-   vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
+   #"
+  "&& !TARGET_AVX512DQ && reload_completed"
+  [(set (match_dup 0) (match_dup 4))
+   (parallel
+    [(set (match_dup 0)
+	  (vec_merge:VI48_AVX512VL
+	    (match_dup 2)
+	    (match_dup 3)
+	    (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "operands[4] = CONST0_RTX (<MODE>mode);"
   [(set_attr "isa" "avx512dq,*")
    (set_attr "length_immediate" "0,1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>_pternlog"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+	(vec_merge:VI48_AVX512VL
+	  (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
+	  (match_operand:VI48_AVX512VL 3 "const0_operand")
+	  (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))
+   (unspec [(match_operand:VI48_AVX512VL 4 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_AVX512F && !TARGET_AVX512DQ"
+  "vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
+  [(set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_expand "extendv2sfv2df2"
   [(set (match_operand:V2DF 0 "register_operand")
 	(float_extend:V2DF
@@ -17164,32 +17209,32 @@ (define_expand "one_cmpl<mode>2"
 
   if (!TARGET_AVX512F)
     operands[2] = force_reg (<MODE>mode, operands[2]);
+  else
+    operands[1] = force_reg (<MODE>mode, operands[1]);
 })
 
 (define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
-  [(set (match_operand:VI 0 "register_operand" "=v,v")
-	(xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
-		(match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
+  [(set (match_operand:VI 0 "register_operand" "=v")
+	(xor:VI (match_operand:VI 1 "register_operand" "v")
+		(match_operand:VI 2 "vector_all_ones_operand" "BC")))]
   "TARGET_AVX512F
    && (!<mask_applied>
        || <ssescalarmode>mode == SImode
        || <ssescalarmode>mode == DImode)"
 {
+  /* Use vpternlog 0x55, %1, %1, %0 instead of
+     vpternlog 0x55, %1, %0, %0 to avoid false dependence on %0.  */
   if (TARGET_AVX512VL)
-    return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
+    return "vpternlog<ternlogsuffix>\t{$0x55, %1, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %1, 0x55}";
   else
-    return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
+    return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g1, %g0<mask_operand3>|%g0<mask_operand3>, %g1, %g1, 0x55}";
 }
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
    (set (attr "mode")
         (if_then_else (match_test "TARGET_AVX512VL")
 		      (const_string "<sseinsnmode>")
-		      (const_string "XI")))
-   (set (attr "enabled")
-	(if_then_else (eq_attr "alternative" "1")
-		      (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
-		      (const_int 1)))])
+		      (const_string "XI")))])
 
 (define_expand "<sse2_avx2>_andnot<mode>3"
   [(set (match_operand:VI_AVX2 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/i386/pr110438.c b/gcc/testsuite/gcc.target/i386/pr110438.c
new file mode 100644
index 00000000000..11b8cc59fd2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110438.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2 -ftree-vectorize -mno-avx512dq -dp -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times {cvtmask2.*_pternlog} "1" } } */
+/* { dg-final { scan-assembler-times {constm1_pternlog} "1" } } */
+/* { dg-final { scan-assembler-not {(?n)vpternlogd.*\(} } } */
+
+
+#include <immintrin.h>
+
+__m512i g(void)
+{
+  return (__m512i){ 0 } - 1;
+}
+
+__m512i g1(__m512i* a)
+{
+  return ~(*a);
+}
+
+void
+foo (int* a, int* __restrict b)
+{
+  for (int i = 0; i != 16; i++)
+    {
+      if (b[i])
+	a[i] = -1;
+      else
+	a[i] = 0;
+    }
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-07-11  4:03 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-04  2:50 [PATCH] Break false dependence for vpternlog by inserting vpxor liuhongt
2023-07-06 15:46 ` simonaytes.yan
2023-07-07  6:50   ` Hongtao Liu
2023-07-10  1:17     ` [PATCH] Break false dependence for vpternlog by inserting vpxor or setting constraint of input operand to '0' liuhongt
2023-07-10 16:23       ` Alexander Monakov
2023-07-11  0:03         ` Hongtao Liu
2023-07-11  4:01           ` [PATCH v2] " liuhongt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).