public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v4, rs6000] Add V1TI into vector comparison expand [PR103316]
@ 2022-05-24  8:45 HAO CHEN GUI
  2022-05-26  3:22 ` Kewen.Lin
  0 siblings, 1 reply; 6+ messages in thread
From: HAO CHEN GUI @ 2022-05-24  8:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner

Hi,
   This patch adds V1TI mode into a new mode iterator used in vector
comparison and rotation expands. Without the patch, the comparisons
between two vector __int128 are converted to scalar comparisons. The
code is suboptimal. The patch fixes the issue. Now all comparisons
between two vector __int128 generates P10 new comparison instructions.
Also the relative built-ins generate the same instructions after gimple
folding. So they're added back to the list.

  This patch also merges some vector comparison and rotation expands
for V1T1 and other vector integer modes as they have the same patterns.
The expands for V1TI only are removed.

   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-05-24 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	PR target/103316
	* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
	gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
	RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
	RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
	* config/rs6000/vector.md (VEC_IC): Define.  Add support for new Power10
	V1TI instructions.
	(vec_cmp<mode><mode>): Set mode iterator to VEC_IC.
	(vec_cmpu<mode><mode>): Likewise.
	(vector_nlt<mode>): Set mode iterator to VEC_IC.
	(vector_nltv1ti): Remove.
	(vector_gtu<mode>): Set mode iterator to VEC_IC.
	(vector_gtuv1ti): Remove.
	(vector_nltu<mode>): Set mode iterator to VEC_IC.
	(vector_nltuv1ti): Remove.
	(vector_geu<mode>): Set mode iterator to VEC_IC.
	(vector_ngt<mode>): Likewise.
	(vector_ngtv1ti): Remove.
	(vector_ngtu<mode>): Set mode iterator to VEC_IC.
	(vector_ngtuv1ti): Remove.
	(vector_gtu_<mode>_p): Set mode iterator to VEC_IC.
	(vector_gtu_v1ti_p): Remove.
	(vrotl<mode>3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
	(vrotlv1ti3): Remove.
	(vashr<mode>3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
	(vashrv1ti3): Remove.

gcc/testsuite/
	PR target/103316
	* gcc.target/powerpc/pr103316.c: New.
	* gcc.target/powerpc/fold-vec-cmp-int128.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index e925ba9fad9..b67f4e066a8 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -2000,16 +2000,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_VCMPEQUH:
     case RS6000_BIF_VCMPEQUW:
     case RS6000_BIF_VCMPEQUD:
-    /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
-       folding produces worse code for 128-bit compares.  */
+    case RS6000_BIF_VCMPEQUT:
       fold_compare_helper (gsi, EQ_EXPR, stmt);
       return true;

     case RS6000_BIF_VCMPNEB:
     case RS6000_BIF_VCMPNEH:
     case RS6000_BIF_VCMPNEW:
-    /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
-       folding produces worse code for 128-bit compares.  */
+    case RS6000_BIF_VCMPNET:
       fold_compare_helper (gsi, NE_EXPR, stmt);
       return true;

@@ -2021,9 +2019,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_CMPGE_U4SI:
     case RS6000_BIF_CMPGE_2DI:
     case RS6000_BIF_CMPGE_U2DI:
-    /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
-       for now, because gimple folding produces worse code for 128-bit
-       compares.  */
+    case RS6000_BIF_CMPGE_1TI:
+    case RS6000_BIF_CMPGE_U1TI:
       fold_compare_helper (gsi, GE_EXPR, stmt);
       return true;

@@ -2035,9 +2032,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_VCMPGTUW:
     case RS6000_BIF_VCMPGTUD:
     case RS6000_BIF_VCMPGTSD:
-    /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST
-       for now, because gimple folding produces worse code for 128-bit
-       compares.  */
+    case RS6000_BIF_VCMPGTUT:
+    case RS6000_BIF_VCMPGTST:
       fold_compare_helper (gsi, GT_EXPR, stmt);
       return true;

@@ -2049,9 +2045,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_CMPLE_U4SI:
     case RS6000_BIF_CMPLE_2DI:
     case RS6000_BIF_CMPLE_U2DI:
-    /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI
-       for now, because gimple folding produces worse code for 128-bit
-       compares.  */
+    case RS6000_BIF_CMPLE_1TI:
+    case RS6000_BIF_CMPLE_U1TI:
       fold_compare_helper (gsi, LE_EXPR, stmt);
       return true;

diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 4d0797c48f8..3b7a272994f 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -26,6 +26,9 @@
 ;; Vector int modes
 (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])

+;; Vector int modes for comparison
+(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")])
+
 ;; 128-bit int modes
 (define_mode_iterator VEC_TI [V1TI TI])

@@ -533,10 +536,10 @@ (define_expand "vcond_mask_<mode><VEC_int>"

 ;; For signed integer vectors comparison.
 (define_expand "vec_cmp<mode><mode>"
-  [(set (match_operand:VEC_I 0 "vint_operand")
+  [(set (match_operand:VEC_IC 0 "vint_operand")
 	(match_operator 1 "signed_or_equality_comparison_operator"
-	  [(match_operand:VEC_I 2 "vint_operand")
-	   (match_operand:VEC_I 3 "vint_operand")]))]
+	  [(match_operand:VEC_IC 2 "vint_operand")
+	   (match_operand:VEC_IC 3 "vint_operand")]))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   enum rtx_code code = GET_CODE (operands[1]);
@@ -573,10 +576,10 @@ (define_expand "vec_cmp<mode><mode>"

 ;; For unsigned integer vectors comparison.
 (define_expand "vec_cmpu<mode><mode>"
-  [(set (match_operand:VEC_I 0 "vint_operand")
+  [(set (match_operand:VEC_IC 0 "vint_operand")
 	(match_operator 1 "unsigned_or_equality_comparison_operator"
-	  [(match_operand:VEC_I 2 "vint_operand")
-	   (match_operand:VEC_I 3 "vint_operand")]))]
+	  [(match_operand:VEC_IC 2 "vint_operand")
+	   (match_operand:VEC_IC 3 "vint_operand")]))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   enum rtx_code code = GET_CODE (operands[1]);
@@ -690,116 +693,65 @@ (define_expand "vector_gt<mode>"

 ; >= for integer vectors: swap operands and apply not-greater-than
 (define_expand "vector_nlt<mode>"
-  [(set (match_operand:VEC_I 3 "vlogical_operand")
-	(gt:VEC_I (match_operand:VEC_I 2 "vlogical_operand")
-		  (match_operand:VEC_I 1 "vlogical_operand")))
-   (set (match_operand:VEC_I 0 "vlogical_operand")
-        (not:VEC_I (match_dup 3)))]
+  [(set (match_operand:VEC_IC 3 "vlogical_operand")
+	(gt:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand")
+		   (match_operand:VEC_IC 1 "vlogical_operand")))
+   (set (match_operand:VEC_IC 0 "vlogical_operand")
+	(not:VEC_IC (match_dup 3)))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   operands[3] = gen_reg_rtx_and_attrs (operands[0]);
 })

-(define_expand "vector_nltv1ti"
-  [(set (match_operand:V1TI 3 "vlogical_operand")
-	(gt:V1TI (match_operand:V1TI 2 "vlogical_operand")
-		 (match_operand:V1TI 1 "vlogical_operand")))
-   (set (match_operand:V1TI 0 "vlogical_operand")
-        (not:V1TI (match_dup 3)))]
-  "TARGET_POWER10"
-{
-  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
-})
-
 (define_expand "vector_gtu<mode>"
-  [(set (match_operand:VEC_I 0 "vint_operand")
-	(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand")
-		   (match_operand:VEC_I 2 "vint_operand")))]
+  [(set (match_operand:VEC_IC 0 "vint_operand")
+	(gtu:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
+		    (match_operand:VEC_IC 2 "vint_operand")))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")

-(define_expand "vector_gtuv1ti"
-  [(set (match_operand:V1TI 0 "altivec_register_operand")
-	(gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand")
-		  (match_operand:V1TI 2 "altivec_register_operand")))]
-  "TARGET_POWER10"
-  "")
-
 ; >= for integer vectors: swap operands and apply not-greater-than
 (define_expand "vector_nltu<mode>"
-  [(set (match_operand:VEC_I 3 "vlogical_operand")
-	(gtu:VEC_I (match_operand:VEC_I 2 "vlogical_operand")
-	 	   (match_operand:VEC_I 1 "vlogical_operand")))
-   (set (match_operand:VEC_I 0 "vlogical_operand")
-        (not:VEC_I (match_dup 3)))]
+  [(set (match_operand:VEC_IC 3 "vlogical_operand")
+	(gtu:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand")
+		    (match_operand:VEC_IC 1 "vlogical_operand")))
+   (set (match_operand:VEC_IC 0 "vlogical_operand")
+	(not:VEC_IC (match_dup 3)))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   operands[3] = gen_reg_rtx_and_attrs (operands[0]);
 })

-(define_expand "vector_nltuv1ti"
-  [(set (match_operand:V1TI 3 "vlogical_operand")
-	(gtu:V1TI (match_operand:V1TI 2 "vlogical_operand")
-		  (match_operand:V1TI 1 "vlogical_operand")))
-   (set (match_operand:V1TI 0 "vlogical_operand")
-	(not:V1TI (match_dup 3)))]
-  "TARGET_POWER10"
-{
-  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
-})
-
 (define_expand "vector_geu<mode>"
-  [(set (match_operand:VEC_I 0 "vint_operand")
-	(geu:VEC_I (match_operand:VEC_I 1 "vint_operand")
-		   (match_operand:VEC_I 2 "vint_operand")))]
+  [(set (match_operand:VEC_IC 0 "vint_operand")
+	(geu:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
+		    (match_operand:VEC_IC 2 "vint_operand")))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")

 ; <= for integer vectors: apply not-greater-than
 (define_expand "vector_ngt<mode>"
-  [(set (match_operand:VEC_I 3 "vlogical_operand")
-	(gt:VEC_I (match_operand:VEC_I 1 "vlogical_operand")
-		  (match_operand:VEC_I 2 "vlogical_operand")))
-   (set (match_operand:VEC_I 0 "vlogical_operand")
-        (not:VEC_I (match_dup 3)))]
+  [(set (match_operand:VEC_IC 3 "vlogical_operand")
+	(gt:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand")
+		   (match_operand:VEC_IC 2 "vlogical_operand")))
+   (set (match_operand:VEC_IC 0 "vlogical_operand")
+	(not:VEC_IC (match_dup 3)))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   operands[3] = gen_reg_rtx_and_attrs (operands[0]);
 })

-(define_expand "vector_ngtv1ti"
-  [(set (match_operand:V1TI 3 "vlogical_operand")
-	(gt:V1TI (match_operand:V1TI 1 "vlogical_operand")
-		 (match_operand:V1TI 2 "vlogical_operand")))
-   (set (match_operand:V1TI 0 "vlogical_operand")
-        (not:V1TI (match_dup 3)))]
-  "TARGET_POWER10"
-{
-  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
-})
-
 (define_expand "vector_ngtu<mode>"
-  [(set (match_operand:VEC_I 3 "vlogical_operand")
-	(gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand")
-	 	   (match_operand:VEC_I 2 "vlogical_operand")))
-   (set (match_operand:VEC_I 0 "vlogical_operand")
-        (not:VEC_I (match_dup 3)))]
+  [(set (match_operand:VEC_IC 3 "vlogical_operand")
+	(gtu:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand")
+		    (match_operand:VEC_IC 2 "vlogical_operand")))
+   (set (match_operand:VEC_IC 0 "vlogical_operand")
+	(not:VEC_IC (match_dup 3)))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
 {
   operands[3] = gen_reg_rtx_and_attrs (operands[0]);
 })

-(define_expand "vector_ngtuv1ti"
-  [(set (match_operand:V1TI 3 "vlogical_operand")
-	(gtu:V1TI (match_operand:V1TI 1 "vlogical_operand")
-		  (match_operand:V1TI 2 "vlogical_operand")))
-   (set (match_operand:V1TI 0 "vlogical_operand")
-        (not:V1TI (match_dup 3)))]
-  "TARGET_POWER10"
-{
-  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
-})
-
 ; There are 14 possible vector FP comparison operators, gt and eq of them have
 ; been expanded above, so just support 12 remaining operators here.

@@ -1189,27 +1141,15 @@ (define_expand "vector_ge_<mode>_p"
 (define_expand "vector_gtu_<mode>_p"
   [(parallel
     [(set (reg:CC CR6_REGNO)
-	  (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand")
-			      (match_operand:VEC_I 2 "vint_operand"))]
+	  (unspec:CC [(gtu:CC (match_operand:VEC_IC 1 "vint_operand")
+			      (match_operand:VEC_IC 2 "vint_operand"))]
 		     UNSPEC_PREDICATE))
-     (set (match_operand:VEC_I 0 "vlogical_operand")
-	  (gtu:VEC_I (match_dup 1)
-		     (match_dup 2)))])]
+     (set (match_operand:VEC_IC 0 "vlogical_operand")
+	  (gtu:VEC_IC (match_dup 1)
+		      (match_dup 2)))])]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
   "")

-(define_expand "vector_gtu_v1ti_p"
-  [(parallel
-    [(set (reg:CC CR6_REGNO)
-	  (unspec:CC [(gtu:CC (match_operand:V1TI 1 "altivec_register_operand")
-			      (match_operand:V1TI 2 "altivec_register_operand"))]
-		     UNSPEC_PREDICATE))
-     (set (match_operand:V1TI 0 "altivec_register_operand")
-	  (gtu:V1TI (match_dup 1)
-		    (match_dup 2)))])]
-  "TARGET_POWER10"
-  "")
-
 ;; AltiVec/VSX predicates.

 ;; This expansion is triggered during expansion of predicate built-in
@@ -1582,25 +1522,21 @@ (define_expand "vec_shr_<mode>"

 ;; Expanders for rotate each element in a vector
 (define_expand "vrotl<mode>3"
-  [(set (match_operand:VEC_I 0 "vint_operand")
-	(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand")
-		      (match_operand:VEC_I 2 "vint_operand")))]
+  [(set (match_operand:VEC_IC 0 "vint_operand")
+	(rotate:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
+		       (match_operand:VEC_IC 2 "vint_operand")))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
-
-(define_expand "vrotlv1ti3"
-  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
-        (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
-                     (match_operand:V1TI 2 "vsx_register_operand" "v")))]
-  "TARGET_POWER10"
 {
-  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
-  rtx tmp = gen_reg_rtx (V1TImode);
+  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2.  */
+  if (<MODE>mode == V1TImode)
+    {
+      rtx tmp = gen_reg_rtx (V1TImode);

-  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
-  emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp));
-  DONE;
-})
+      emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
+      emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp));
+      DONE;
+    }
+ })

 ;; Expanders for rotatert to make use of vrotl
 (define_expand "vrotr<mode>3"
@@ -1663,25 +1599,20 @@ (define_expand "vlshr<mode>3"

 ;; Expanders for arithmetic shift right on each vector element
 (define_expand "vashr<mode>3"
-  [(set (match_operand:VEC_I 0 "vint_operand")
-	(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand")
-			(match_operand:VEC_I 2 "vint_operand")))]
+  [(set (match_operand:VEC_IC 0 "vint_operand")
+	(ashiftrt:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
+			 (match_operand:VEC_IC 2 "vint_operand")))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "")
-
-;; No immediate version of this 128-bit instruction
-(define_expand "vashrv1ti3"
-  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
-	(ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
-		       (match_operand:V1TI 2 "vsx_register_operand" "v")))]
-  "TARGET_POWER10"
 {
-  /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */
-  rtx tmp = gen_reg_rtx (V1TImode);
+  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2.  */
+  if (<MODE>mode == V1TImode)
+    {
+      rtx tmp = gen_reg_rtx (V1TImode);

-  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
-  emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp));
-  DONE;
+      emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
+      emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp));
+      DONE;
+    }
 })

 \f
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
new file mode 100644
index 00000000000..1a4db0f45d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
@@ -0,0 +1,86 @@
+/* Verify that overloaded built-ins for vec_cmp with __int128
+   inputs produce the right code.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector bool __int128
+test3_eq (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool __int128
+test6_eq (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool __int128
+test3_ge (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool __int128
+test6_ge (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool __int128
+test3_gt (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool __int128
+test6_gt (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool __int128
+test3_le (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool __int128
+test6_le (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool __int128
+test3_lt (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool __int128
+test6_lt (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool __int128
+test3_ne (vector signed __int128 x, vector signed __int128 y)
+{
+  return vec_cmpne (x, y);
+}
+
+vector bool __int128
+test6_ne (vector unsigned __int128 x, vector unsigned __int128 y)
+{
+  return vec_cmpne (x, y);
+}
+
+/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c
new file mode 100644
index 00000000000..02f7dc5ca1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c
@@ -0,0 +1,80 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+vector bool __int128
+test_eq (vector signed __int128 a, vector signed __int128 b)
+{
+  return a == b;
+}
+
+vector bool __int128
+test_ne (vector signed __int128 a, vector signed __int128 b)
+{
+  return a != b;
+}
+
+vector bool __int128
+test_gt (vector signed __int128 a, vector signed __int128 b)
+{
+  return a > b;
+}
+
+vector bool __int128
+test_ge (vector signed __int128 a, vector signed __int128 b)
+{
+  return a >= b;
+}
+
+vector bool __int128
+test_lt (vector signed __int128 a, vector signed __int128 b)
+{
+  return a < b;
+}
+
+vector bool __int128
+test_le (vector signed __int128 a, vector signed __int128 b)
+{
+  return a <= b;
+}
+
+vector bool __int128
+testu_eq (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a == b;
+}
+
+vector bool __int128
+testu_ne (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a != b;
+}
+
+vector bool __int128
+testu_gt (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a > b;
+}
+
+vector bool __int128
+testu_ge (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a >= b;
+}
+
+vector bool __int128
+testu_lt (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a < b;
+}
+
+vector bool __int128
+testu_le (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a <= b;
+}
+
+/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 6 } } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4, rs6000] Add V1TI into vector comparison expand [PR103316]
  2022-05-24  8:45 [PATCH v4, rs6000] Add V1TI into vector comparison expand [PR103316] HAO CHEN GUI
@ 2022-05-26  3:22 ` Kewen.Lin
  2022-05-26  5:30   ` HAO CHEN GUI
  0 siblings, 1 reply; 6+ messages in thread
From: Kewen.Lin @ 2022-05-26  3:22 UTC (permalink / raw)
  To: HAO CHEN GUI; +Cc: Segher Boessenkool, David, Peter Bergner, gcc-patches

Hi Haochen,

on 2022/5/24 16:45, HAO CHEN GUI wrote:
> Hi,
>    This patch adds V1TI mode into a new mode iterator used in vector
> comparison and rotation expands. Without the patch, the comparisons
> between two vector __int128 are converted to scalar comparisons. The
> code is suboptimal. The patch fixes the issue. Now all comparisons
> between two vector __int128 generates P10 new comparison instructions.
> Also the relative built-ins generate the same instructions after gimple
> folding. So they're added back to the list.
> 
>   This patch also merges some vector comparison and rotation expands
> for V1T1 and other vector integer modes as they have the same patterns.
> The expands for V1TI only are removed.
> 
>    Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-05-24 Haochen Gui <guihaoc@linux.ibm.com>
> 
> gcc/
> 	PR target/103316
> 	* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
> 	gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
> 	RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
> 	RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
> 	* config/rs6000/vector.md (VEC_IC): Define.  Add support for new Power10
> 	V1TI instructions.

Nit: Maybe "New mode iterator" is better than "Define".

> 	(vec_cmp<mode><mode>): Set mode iterator to VEC_IC.
> 	(vec_cmpu<mode><mode>): Likewise.
> 	(vector_nlt<mode>): Set mode iterator to VEC_IC.
> 	(vector_nltv1ti): Remove.
> 	(vector_gtu<mode>): Set mode iterator to VEC_IC.
> 	(vector_gtuv1ti): Remove.
> 	(vector_nltu<mode>): Set mode iterator to VEC_IC.
> 	(vector_nltuv1ti): Remove.
> 	(vector_geu<mode>): Set mode iterator to VEC_IC.
> 	(vector_ngt<mode>): Likewise.
> 	(vector_ngtv1ti): Remove.
> 	(vector_ngtu<mode>): Set mode iterator to VEC_IC.
> 	(vector_ngtuv1ti): Remove.
> 	(vector_gtu_<mode>_p): Set mode iterator to VEC_IC.
> 	(vector_gtu_v1ti_p): Remove.
> 	(vrotl<mode>3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
> 	(vrotlv1ti3): Remove.
> 	(vashr<mode>3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
> 	(vashrv1ti3): Remove.
> 
> gcc/testsuite/
> 	PR target/103316
> 	* gcc.target/powerpc/pr103316.c: New.
> 	* gcc.target/powerpc/fold-vec-cmp-int128.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
> index e925ba9fad9..b67f4e066a8 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -2000,16 +2000,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_VCMPEQUH:
>      case RS6000_BIF_VCMPEQUW:
>      case RS6000_BIF_VCMPEQUD:
> -    /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
> -       folding produces worse code for 128-bit compares.  */
> +    case RS6000_BIF_VCMPEQUT:
>        fold_compare_helper (gsi, EQ_EXPR, stmt);
>        return true;
> 
>      case RS6000_BIF_VCMPNEB:
>      case RS6000_BIF_VCMPNEH:
>      case RS6000_BIF_VCMPNEW:
> -    /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
> -       folding produces worse code for 128-bit compares.  */
> +    case RS6000_BIF_VCMPNET:
>        fold_compare_helper (gsi, NE_EXPR, stmt);
>        return true;
> 
> @@ -2021,9 +2019,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_CMPGE_U4SI:
>      case RS6000_BIF_CMPGE_2DI:
>      case RS6000_BIF_CMPGE_U2DI:
> -    /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
> -       for now, because gimple folding produces worse code for 128-bit
> -       compares.  */
> +    case RS6000_BIF_CMPGE_1TI:
> +    case RS6000_BIF_CMPGE_U1TI:
>        fold_compare_helper (gsi, GE_EXPR, stmt);
>        return true;
> 
> @@ -2035,9 +2032,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_VCMPGTUW:
>      case RS6000_BIF_VCMPGTUD:
>      case RS6000_BIF_VCMPGTSD:
> -    /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST
> -       for now, because gimple folding produces worse code for 128-bit
> -       compares.  */
> +    case RS6000_BIF_VCMPGTUT:
> +    case RS6000_BIF_VCMPGTST:
>        fold_compare_helper (gsi, GT_EXPR, stmt);
>        return true;
> 
> @@ -2049,9 +2045,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_CMPLE_U4SI:
>      case RS6000_BIF_CMPLE_2DI:
>      case RS6000_BIF_CMPLE_U2DI:
> -    /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI
> -       for now, because gimple folding produces worse code for 128-bit
> -       compares.  */
> +    case RS6000_BIF_CMPLE_1TI:
> +    case RS6000_BIF_CMPLE_U1TI:
>        fold_compare_helper (gsi, LE_EXPR, stmt);
>        return true;
> 
> diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
> index 4d0797c48f8..3b7a272994f 100644
> --- a/gcc/config/rs6000/vector.md
> +++ b/gcc/config/rs6000/vector.md
> @@ -26,6 +26,9 @@
>  ;; Vector int modes
>  (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
> 
> +;; Vector int modes for comparison

Nit: This comment line doesn't perfectly match the usage below since it's also
used for shift and rotation in this patch.  Maybe it's better with:

"Vector int modes for comparison, shift and rotation"

> +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")])
> +
>  ;; 128-bit int modes
>  (define_mode_iterator VEC_TI [V1TI TI])
> 
> @@ -533,10 +536,10 @@ (define_expand "vcond_mask_<mode><VEC_int>"
> 
>  ;; For signed integer vectors comparison.
>  (define_expand "vec_cmp<mode><mode>"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>  	(match_operator 1 "signed_or_equality_comparison_operator"
> -	  [(match_operand:VEC_I 2 "vint_operand")
> -	   (match_operand:VEC_I 3 "vint_operand")]))]
> +	  [(match_operand:VEC_IC 2 "vint_operand")
> +	   (match_operand:VEC_IC 3 "vint_operand")]))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>  {
>    enum rtx_code code = GET_CODE (operands[1]);
> @@ -573,10 +576,10 @@ (define_expand "vec_cmp<mode><mode>"
> 
>  ;; For unsigned integer vectors comparison.
>  (define_expand "vec_cmpu<mode><mode>"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>  	(match_operator 1 "unsigned_or_equality_comparison_operator"
> -	  [(match_operand:VEC_I 2 "vint_operand")
> -	   (match_operand:VEC_I 3 "vint_operand")]))]
> +	  [(match_operand:VEC_IC 2 "vint_operand")
> +	   (match_operand:VEC_IC 3 "vint_operand")]))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>  {
>    enum rtx_code code = GET_CODE (operands[1]);
> @@ -690,116 +693,65 @@ (define_expand "vector_gt<mode>"
> 
>  ; >= for integer vectors: swap operands and apply not-greater-than
>  (define_expand "vector_nlt<mode>"
> -  [(set (match_operand:VEC_I 3 "vlogical_operand")
> -	(gt:VEC_I (match_operand:VEC_I 2 "vlogical_operand")
> -		  (match_operand:VEC_I 1 "vlogical_operand")))
> -   (set (match_operand:VEC_I 0 "vlogical_operand")
> -        (not:VEC_I (match_dup 3)))]
> +  [(set (match_operand:VEC_IC 3 "vlogical_operand")
> +	(gt:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand")
> +		   (match_operand:VEC_IC 1 "vlogical_operand")))
> +   (set (match_operand:VEC_IC 0 "vlogical_operand")
> +	(not:VEC_IC (match_dup 3)))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>  {
>    operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>  })
> 
> -(define_expand "vector_nltv1ti"
> -  [(set (match_operand:V1TI 3 "vlogical_operand")
> -	(gt:V1TI (match_operand:V1TI 2 "vlogical_operand")
> -		 (match_operand:V1TI 1 "vlogical_operand")))
> -   (set (match_operand:V1TI 0 "vlogical_operand")
> -        (not:V1TI (match_dup 3)))]
> -  "TARGET_POWER10"
> -{
> -  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
> -})
> -
>  (define_expand "vector_gtu<mode>"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> -	(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand")
> -		   (match_operand:VEC_I 2 "vint_operand")))]
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
> +	(gtu:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
> +		    (match_operand:VEC_IC 2 "vint_operand")))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>    "")
> 
> -(define_expand "vector_gtuv1ti"
> -  [(set (match_operand:V1TI 0 "altivec_register_operand")
> -	(gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand")
> -		  (match_operand:V1TI 2 "altivec_register_operand")))]
> -  "TARGET_POWER10"
> -  "")
> -
>  ; >= for integer vectors: swap operands and apply not-greater-than
>  (define_expand "vector_nltu<mode>"
> -  [(set (match_operand:VEC_I 3 "vlogical_operand")
> -	(gtu:VEC_I (match_operand:VEC_I 2 "vlogical_operand")
> -	 	   (match_operand:VEC_I 1 "vlogical_operand")))
> -   (set (match_operand:VEC_I 0 "vlogical_operand")
> -        (not:VEC_I (match_dup 3)))]
> +  [(set (match_operand:VEC_IC 3 "vlogical_operand")
> +	(gtu:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand")
> +		    (match_operand:VEC_IC 1 "vlogical_operand")))
> +   (set (match_operand:VEC_IC 0 "vlogical_operand")
> +	(not:VEC_IC (match_dup 3)))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>  {
>    operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>  })
> 
> -(define_expand "vector_nltuv1ti"
> -  [(set (match_operand:V1TI 3 "vlogical_operand")
> -	(gtu:V1TI (match_operand:V1TI 2 "vlogical_operand")
> -		  (match_operand:V1TI 1 "vlogical_operand")))
> -   (set (match_operand:V1TI 0 "vlogical_operand")
> -	(not:V1TI (match_dup 3)))]
> -  "TARGET_POWER10"
> -{
> -  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
> -})
> -
>  (define_expand "vector_geu<mode>"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> -	(geu:VEC_I (match_operand:VEC_I 1 "vint_operand")
> -		   (match_operand:VEC_I 2 "vint_operand")))]
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
> +	(geu:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
> +		    (match_operand:VEC_IC 2 "vint_operand")))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>    "")
> 
>  ; <= for integer vectors: apply not-greater-than
>  (define_expand "vector_ngt<mode>"
> -  [(set (match_operand:VEC_I 3 "vlogical_operand")
> -	(gt:VEC_I (match_operand:VEC_I 1 "vlogical_operand")
> -		  (match_operand:VEC_I 2 "vlogical_operand")))
> -   (set (match_operand:VEC_I 0 "vlogical_operand")
> -        (not:VEC_I (match_dup 3)))]
> +  [(set (match_operand:VEC_IC 3 "vlogical_operand")
> +	(gt:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand")
> +		   (match_operand:VEC_IC 2 "vlogical_operand")))
> +   (set (match_operand:VEC_IC 0 "vlogical_operand")
> +	(not:VEC_IC (match_dup 3)))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>  {
>    operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>  })
> 
> -(define_expand "vector_ngtv1ti"
> -  [(set (match_operand:V1TI 3 "vlogical_operand")
> -	(gt:V1TI (match_operand:V1TI 1 "vlogical_operand")
> -		 (match_operand:V1TI 2 "vlogical_operand")))
> -   (set (match_operand:V1TI 0 "vlogical_operand")
> -        (not:V1TI (match_dup 3)))]
> -  "TARGET_POWER10"
> -{
> -  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
> -})
> -
>  (define_expand "vector_ngtu<mode>"
> -  [(set (match_operand:VEC_I 3 "vlogical_operand")
> -	(gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand")
> -	 	   (match_operand:VEC_I 2 "vlogical_operand")))
> -   (set (match_operand:VEC_I 0 "vlogical_operand")
> -        (not:VEC_I (match_dup 3)))]
> +  [(set (match_operand:VEC_IC 3 "vlogical_operand")
> +	(gtu:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand")
> +		    (match_operand:VEC_IC 2 "vlogical_operand")))
> +   (set (match_operand:VEC_IC 0 "vlogical_operand")
> +	(not:VEC_IC (match_dup 3)))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>  {
>    operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>  })
> 
> -(define_expand "vector_ngtuv1ti"
> -  [(set (match_operand:V1TI 3 "vlogical_operand")
> -	(gtu:V1TI (match_operand:V1TI 1 "vlogical_operand")
> -		  (match_operand:V1TI 2 "vlogical_operand")))
> -   (set (match_operand:V1TI 0 "vlogical_operand")
> -        (not:V1TI (match_dup 3)))]
> -  "TARGET_POWER10"
> -{
> -  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
> -})
> -
>  ; There are 14 possible vector FP comparison operators, gt and eq of them have
>  ; been expanded above, so just support 12 remaining operators here.
> 
> @@ -1189,27 +1141,15 @@ (define_expand "vector_ge_<mode>_p"
>  (define_expand "vector_gtu_<mode>_p"
>    [(parallel
>      [(set (reg:CC CR6_REGNO)
> -	  (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand")
> -			      (match_operand:VEC_I 2 "vint_operand"))]
> +	  (unspec:CC [(gtu:CC (match_operand:VEC_IC 1 "vint_operand")
> +			      (match_operand:VEC_IC 2 "vint_operand"))]
>  		     UNSPEC_PREDICATE))
> -     (set (match_operand:VEC_I 0 "vlogical_operand")
> -	  (gtu:VEC_I (match_dup 1)
> -		     (match_dup 2)))])]
> +     (set (match_operand:VEC_IC 0 "vlogical_operand")
> +	  (gtu:VEC_IC (match_dup 1)
> +		      (match_dup 2)))])]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>    "")
> 
> -(define_expand "vector_gtu_v1ti_p"
> -  [(parallel
> -    [(set (reg:CC CR6_REGNO)
> -	  (unspec:CC [(gtu:CC (match_operand:V1TI 1 "altivec_register_operand")
> -			      (match_operand:V1TI 2 "altivec_register_operand"))]
> -		     UNSPEC_PREDICATE))
> -     (set (match_operand:V1TI 0 "altivec_register_operand")
> -	  (gtu:V1TI (match_dup 1)
> -		    (match_dup 2)))])]
> -  "TARGET_POWER10"
> -  "")
> -
>  ;; AltiVec/VSX predicates.
> 
>  ;; This expansion is triggered during expansion of predicate built-in
> @@ -1582,25 +1522,21 @@ (define_expand "vec_shr_<mode>"
> 
>  ;; Expanders for rotate each element in a vector
>  (define_expand "vrotl<mode>3"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> -	(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand")
> -		      (match_operand:VEC_I 2 "vint_operand")))]
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
> +	(rotate:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
> +		       (match_operand:VEC_IC 2 "vint_operand")))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
> -  "")
> -
> -(define_expand "vrotlv1ti3"
> -  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
> -        (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
> -                     (match_operand:V1TI 2 "vsx_register_operand" "v")))]
> -  "TARGET_POWER10"
>  {
> -  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
> -  rtx tmp = gen_reg_rtx (V1TImode);
> +  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2.  */
> +  if (<MODE>mode == V1TImode)
> +    {
> +      rtx tmp = gen_reg_rtx (V1TImode);
> 
> -  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
> -  emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp));
> -  DONE;
> -})
> +      emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
> +      emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp));
> +      DONE;
> +    }
> + })
> 
>  ;; Expanders for rotatert to make use of vrotl
>  (define_expand "vrotr<mode>3"
> @@ -1663,25 +1599,20 @@ (define_expand "vlshr<mode>3"
> 
>  ;; Expanders for arithmetic shift right on each vector element
>  (define_expand "vashr<mode>3"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> -	(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand")
> -			(match_operand:VEC_I 2 "vint_operand")))]
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
> +	(ashiftrt:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
> +			 (match_operand:VEC_IC 2 "vint_operand")))]
>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
> -  "")
> -
> -;; No immediate version of this 128-bit instruction
> -(define_expand "vashrv1ti3"
> -  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
> -	(ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
> -		       (match_operand:V1TI 2 "vsx_register_operand" "v")))]
> -  "TARGET_POWER10"
>  {
> -  /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */
> -  rtx tmp = gen_reg_rtx (V1TImode);
> +  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2.  */
> +  if (<MODE>mode == V1TImode)
> +    {
> +      rtx tmp = gen_reg_rtx (V1TImode);
> 
> -  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
> -  emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp));
> -  DONE;
> +      emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
> +      emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp));
> +      DONE;
> +    }
>  })
> 
>  \f
> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
> new file mode 100644
> index 00000000000..1a4db0f45d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
> @@ -0,0 +1,86 @@
> +/* Verify that overloaded built-ins for vec_cmp with __int128
> +   inputs produce the right code.  */
> +
> +/* { dg-do compile } */

Need /* { dg-require-effective-target int128 } */

> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include <altivec.h>
> +
> +vector bool __int128
> +test3_eq (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmpeq (x, y);
> +}
> +
> +vector bool __int128
> +test6_eq (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmpeq (x, y);
> +}
> +

Nit: The function names test6 and test3 seems copied from other cases somewhere.
Maybe it's more meaningful with s/3// (or s/3/s/) and s/6/u/.

> +vector bool __int128
> +test3_ge (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmpge (x, y);
> +}
> +
> +vector bool __int128
> +test6_ge (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmpge (x, y);
> +}
> +
> +vector bool __int128
> +test3_gt (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmpgt (x, y);
> +}
> +
> +vector bool __int128
> +test6_gt (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmpgt (x, y);
> +}
> +
> +vector bool __int128
> +test3_le (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmple (x, y);
> +}
> +
> +vector bool __int128
> +test6_le (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmple (x, y);
> +}
> +
> +vector bool __int128
> +test3_lt (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmplt (x, y);
> +}
> +
> +vector bool __int128
> +test6_lt (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmplt (x, y);
> +}
> +
> +vector bool __int128
> +test3_ne (vector signed __int128 x, vector signed __int128 y)
> +{
> +  return vec_cmpne (x, y);
> +}
> +
> +vector bool __int128
> +test6_ne (vector unsigned __int128 x, vector unsigned __int128 y)
> +{
> +  return vec_cmpne (x, y);
> +}
> +
> +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */
> +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */
> +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */
> +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c
> new file mode 100644
> index 00000000000..02f7dc5ca1b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c
> @@ -0,0 +1,80 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */

Need /* { dg-require-effective-target int128 } */ too.

BR,
Kewen

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4, rs6000] Add V1TI into vector comparison expand [PR103316]
  2022-05-26  3:22 ` Kewen.Lin
@ 2022-05-26  5:30   ` HAO CHEN GUI
  2022-05-26  5:52     ` Kewen.Lin
  0 siblings, 1 reply; 6+ messages in thread
From: HAO CHEN GUI @ 2022-05-26  5:30 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: Segher Boessenkool, David, Peter Bergner, gcc-patches

Kewen,
  Thanks so much for your advice. Just one question about effective-target.

  For the test cases, it needs both power10_ok and int128 support. I saw some
existing test cases have these two checks as well. But I wonder if power10_ok
already covers int128 on powerpc targets? Can we save one check then?

On 26/5/2022 上午 11:22, Kewen.Lin wrote:
> Hi Haochen,
> 
> on 2022/5/24 16:45, HAO CHEN GUI wrote:
>> Hi,
>>    This patch adds V1TI mode into a new mode iterator used in vector
>> comparison and rotation expands. Without the patch, the comparisons
>> between two vector __int128 are converted to scalar comparisons. The
>> code is suboptimal. The patch fixes the issue. Now all comparisons
>> between two vector __int128 generates P10 new comparison instructions.
>> Also the relative built-ins generate the same instructions after gimple
>> folding. So they're added back to the list.
>>
>>   This patch also merges some vector comparison and rotation expands
>> for V1T1 and other vector integer modes as they have the same patterns.
>> The expands for V1TI only are removed.
>>
>>    Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
>> Is this okay for trunk? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2022-05-24 Haochen Gui <guihaoc@linux.ibm.com>
>>
>> gcc/
>> 	PR target/103316
>> 	* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
>> 	gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
>> 	RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
>> 	RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
>> 	* config/rs6000/vector.md (VEC_IC): Define.  Add support for new Power10
>> 	V1TI instructions.
> 
> Nit: Maybe "New mode iterator" is better than "Define".
> 
>> 	(vec_cmp<mode><mode>): Set mode iterator to VEC_IC.
>> 	(vec_cmpu<mode><mode>): Likewise.
>> 	(vector_nlt<mode>): Set mode iterator to VEC_IC.
>> 	(vector_nltv1ti): Remove.
>> 	(vector_gtu<mode>): Set mode iterator to VEC_IC.
>> 	(vector_gtuv1ti): Remove.
>> 	(vector_nltu<mode>): Set mode iterator to VEC_IC.
>> 	(vector_nltuv1ti): Remove.
>> 	(vector_geu<mode>): Set mode iterator to VEC_IC.
>> 	(vector_ngt<mode>): Likewise.
>> 	(vector_ngtv1ti): Remove.
>> 	(vector_ngtu<mode>): Set mode iterator to VEC_IC.
>> 	(vector_ngtuv1ti): Remove.
>> 	(vector_gtu_<mode>_p): Set mode iterator to VEC_IC.
>> 	(vector_gtu_v1ti_p): Remove.
>> 	(vrotl<mode>3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
>> 	(vrotlv1ti3): Remove.
>> 	(vashr<mode>3): Set mode iterator to VEC_IC.  Emit insns for V1TI.
>> 	(vashrv1ti3): Remove.
>>
>> gcc/testsuite/
>> 	PR target/103316
>> 	* gcc.target/powerpc/pr103316.c: New.
>> 	* gcc.target/powerpc/fold-vec-cmp-int128.c: New.
>>
>> patch.diff
>> diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
>> index e925ba9fad9..b67f4e066a8 100644
>> --- a/gcc/config/rs6000/rs6000-builtin.cc
>> +++ b/gcc/config/rs6000/rs6000-builtin.cc
>> @@ -2000,16 +2000,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>>      case RS6000_BIF_VCMPEQUH:
>>      case RS6000_BIF_VCMPEQUW:
>>      case RS6000_BIF_VCMPEQUD:
>> -    /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
>> -       folding produces worse code for 128-bit compares.  */
>> +    case RS6000_BIF_VCMPEQUT:
>>        fold_compare_helper (gsi, EQ_EXPR, stmt);
>>        return true;
>>
>>      case RS6000_BIF_VCMPNEB:
>>      case RS6000_BIF_VCMPNEH:
>>      case RS6000_BIF_VCMPNEW:
>> -    /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
>> -       folding produces worse code for 128-bit compares.  */
>> +    case RS6000_BIF_VCMPNET:
>>        fold_compare_helper (gsi, NE_EXPR, stmt);
>>        return true;
>>
>> @@ -2021,9 +2019,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>>      case RS6000_BIF_CMPGE_U4SI:
>>      case RS6000_BIF_CMPGE_2DI:
>>      case RS6000_BIF_CMPGE_U2DI:
>> -    /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
>> -       for now, because gimple folding produces worse code for 128-bit
>> -       compares.  */
>> +    case RS6000_BIF_CMPGE_1TI:
>> +    case RS6000_BIF_CMPGE_U1TI:
>>        fold_compare_helper (gsi, GE_EXPR, stmt);
>>        return true;
>>
>> @@ -2035,9 +2032,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>>      case RS6000_BIF_VCMPGTUW:
>>      case RS6000_BIF_VCMPGTUD:
>>      case RS6000_BIF_VCMPGTSD:
>> -    /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST
>> -       for now, because gimple folding produces worse code for 128-bit
>> -       compares.  */
>> +    case RS6000_BIF_VCMPGTUT:
>> +    case RS6000_BIF_VCMPGTST:
>>        fold_compare_helper (gsi, GT_EXPR, stmt);
>>        return true;
>>
>> @@ -2049,9 +2045,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>>      case RS6000_BIF_CMPLE_U4SI:
>>      case RS6000_BIF_CMPLE_2DI:
>>      case RS6000_BIF_CMPLE_U2DI:
>> -    /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI
>> -       for now, because gimple folding produces worse code for 128-bit
>> -       compares.  */
>> +    case RS6000_BIF_CMPLE_1TI:
>> +    case RS6000_BIF_CMPLE_U1TI:
>>        fold_compare_helper (gsi, LE_EXPR, stmt);
>>        return true;
>>
>> diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
>> index 4d0797c48f8..3b7a272994f 100644
>> --- a/gcc/config/rs6000/vector.md
>> +++ b/gcc/config/rs6000/vector.md
>> @@ -26,6 +26,9 @@
>>  ;; Vector int modes
>>  (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
>>
>> +;; Vector int modes for comparison
> 
> Nit: This comment line doesn't perfectly match the usage below since it's also
> used for shift and rotation in this patch.  Maybe it's better with:
> 
> "Vector int modes for comparison, shift and rotation"
> 
>> +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")])
>> +
>>  ;; 128-bit int modes
>>  (define_mode_iterator VEC_TI [V1TI TI])
>>
>> @@ -533,10 +536,10 @@ (define_expand "vcond_mask_<mode><VEC_int>"
>>
>>  ;; For signed integer vectors comparison.
>>  (define_expand "vec_cmp<mode><mode>"
>> -  [(set (match_operand:VEC_I 0 "vint_operand")
>> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>>  	(match_operator 1 "signed_or_equality_comparison_operator"
>> -	  [(match_operand:VEC_I 2 "vint_operand")
>> -	   (match_operand:VEC_I 3 "vint_operand")]))]
>> +	  [(match_operand:VEC_IC 2 "vint_operand")
>> +	   (match_operand:VEC_IC 3 "vint_operand")]))]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>>  {
>>    enum rtx_code code = GET_CODE (operands[1]);
>> @@ -573,10 +576,10 @@ (define_expand "vec_cmp<mode><mode>"
>>
>>  ;; For unsigned integer vectors comparison.
>>  (define_expand "vec_cmpu<mode><mode>"
>> -  [(set (match_operand:VEC_I 0 "vint_operand")
>> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>>  	(match_operator 1 "unsigned_or_equality_comparison_operator"
>> -	  [(match_operand:VEC_I 2 "vint_operand")
>> -	   (match_operand:VEC_I 3 "vint_operand")]))]
>> +	  [(match_operand:VEC_IC 2 "vint_operand")
>> +	   (match_operand:VEC_IC 3 "vint_operand")]))]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>>  {
>>    enum rtx_code code = GET_CODE (operands[1]);
>> @@ -690,116 +693,65 @@ (define_expand "vector_gt<mode>"
>>
>>  ; >= for integer vectors: swap operands and apply not-greater-than
>>  (define_expand "vector_nlt<mode>"
>> -  [(set (match_operand:VEC_I 3 "vlogical_operand")
>> -	(gt:VEC_I (match_operand:VEC_I 2 "vlogical_operand")
>> -		  (match_operand:VEC_I 1 "vlogical_operand")))
>> -   (set (match_operand:VEC_I 0 "vlogical_operand")
>> -        (not:VEC_I (match_dup 3)))]
>> +  [(set (match_operand:VEC_IC 3 "vlogical_operand")
>> +	(gt:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand")
>> +		   (match_operand:VEC_IC 1 "vlogical_operand")))
>> +   (set (match_operand:VEC_IC 0 "vlogical_operand")
>> +	(not:VEC_IC (match_dup 3)))]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>>  {
>>    operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>>  })
>>
>> -(define_expand "vector_nltv1ti"
>> -  [(set (match_operand:V1TI 3 "vlogical_operand")
>> -	(gt:V1TI (match_operand:V1TI 2 "vlogical_operand")
>> -		 (match_operand:V1TI 1 "vlogical_operand")))
>> -   (set (match_operand:V1TI 0 "vlogical_operand")
>> -        (not:V1TI (match_dup 3)))]
>> -  "TARGET_POWER10"
>> -{
>> -  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>> -})
>> -
>>  (define_expand "vector_gtu<mode>"
>> -  [(set (match_operand:VEC_I 0 "vint_operand")
>> -	(gtu:VEC_I (match_operand:VEC_I 1 "vint_operand")
>> -		   (match_operand:VEC_I 2 "vint_operand")))]
>> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>> +	(gtu:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
>> +		    (match_operand:VEC_IC 2 "vint_operand")))]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>>    "")
>>
>> -(define_expand "vector_gtuv1ti"
>> -  [(set (match_operand:V1TI 0 "altivec_register_operand")
>> -	(gtu:V1TI (match_operand:V1TI 1 "altivec_register_operand")
>> -		  (match_operand:V1TI 2 "altivec_register_operand")))]
>> -  "TARGET_POWER10"
>> -  "")
>> -
>>  ; >= for integer vectors: swap operands and apply not-greater-than
>>  (define_expand "vector_nltu<mode>"
>> -  [(set (match_operand:VEC_I 3 "vlogical_operand")
>> -	(gtu:VEC_I (match_operand:VEC_I 2 "vlogical_operand")
>> -	 	   (match_operand:VEC_I 1 "vlogical_operand")))
>> -   (set (match_operand:VEC_I 0 "vlogical_operand")
>> -        (not:VEC_I (match_dup 3)))]
>> +  [(set (match_operand:VEC_IC 3 "vlogical_operand")
>> +	(gtu:VEC_IC (match_operand:VEC_IC 2 "vlogical_operand")
>> +		    (match_operand:VEC_IC 1 "vlogical_operand")))
>> +   (set (match_operand:VEC_IC 0 "vlogical_operand")
>> +	(not:VEC_IC (match_dup 3)))]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>>  {
>>    operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>>  })
>>
>> -(define_expand "vector_nltuv1ti"
>> -  [(set (match_operand:V1TI 3 "vlogical_operand")
>> -	(gtu:V1TI (match_operand:V1TI 2 "vlogical_operand")
>> -		  (match_operand:V1TI 1 "vlogical_operand")))
>> -   (set (match_operand:V1TI 0 "vlogical_operand")
>> -	(not:V1TI (match_dup 3)))]
>> -  "TARGET_POWER10"
>> -{
>> -  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>> -})
>> -
>>  (define_expand "vector_geu<mode>"
>> -  [(set (match_operand:VEC_I 0 "vint_operand")
>> -	(geu:VEC_I (match_operand:VEC_I 1 "vint_operand")
>> -		   (match_operand:VEC_I 2 "vint_operand")))]
>> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>> +	(geu:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
>> +		    (match_operand:VEC_IC 2 "vint_operand")))]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>>    "")
>>
>>  ; <= for integer vectors: apply not-greater-than
>>  (define_expand "vector_ngt<mode>"
>> -  [(set (match_operand:VEC_I 3 "vlogical_operand")
>> -	(gt:VEC_I (match_operand:VEC_I 1 "vlogical_operand")
>> -		  (match_operand:VEC_I 2 "vlogical_operand")))
>> -   (set (match_operand:VEC_I 0 "vlogical_operand")
>> -        (not:VEC_I (match_dup 3)))]
>> +  [(set (match_operand:VEC_IC 3 "vlogical_operand")
>> +	(gt:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand")
>> +		   (match_operand:VEC_IC 2 "vlogical_operand")))
>> +   (set (match_operand:VEC_IC 0 "vlogical_operand")
>> +	(not:VEC_IC (match_dup 3)))]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>>  {
>>    operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>>  })
>>
>> -(define_expand "vector_ngtv1ti"
>> -  [(set (match_operand:V1TI 3 "vlogical_operand")
>> -	(gt:V1TI (match_operand:V1TI 1 "vlogical_operand")
>> -		 (match_operand:V1TI 2 "vlogical_operand")))
>> -   (set (match_operand:V1TI 0 "vlogical_operand")
>> -        (not:V1TI (match_dup 3)))]
>> -  "TARGET_POWER10"
>> -{
>> -  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>> -})
>> -
>>  (define_expand "vector_ngtu<mode>"
>> -  [(set (match_operand:VEC_I 3 "vlogical_operand")
>> -	(gtu:VEC_I (match_operand:VEC_I 1 "vlogical_operand")
>> -	 	   (match_operand:VEC_I 2 "vlogical_operand")))
>> -   (set (match_operand:VEC_I 0 "vlogical_operand")
>> -        (not:VEC_I (match_dup 3)))]
>> +  [(set (match_operand:VEC_IC 3 "vlogical_operand")
>> +	(gtu:VEC_IC (match_operand:VEC_IC 1 "vlogical_operand")
>> +		    (match_operand:VEC_IC 2 "vlogical_operand")))
>> +   (set (match_operand:VEC_IC 0 "vlogical_operand")
>> +	(not:VEC_IC (match_dup 3)))]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>>  {
>>    operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>>  })
>>
>> -(define_expand "vector_ngtuv1ti"
>> -  [(set (match_operand:V1TI 3 "vlogical_operand")
>> -	(gtu:V1TI (match_operand:V1TI 1 "vlogical_operand")
>> -		  (match_operand:V1TI 2 "vlogical_operand")))
>> -   (set (match_operand:V1TI 0 "vlogical_operand")
>> -        (not:V1TI (match_dup 3)))]
>> -  "TARGET_POWER10"
>> -{
>> -  operands[3] = gen_reg_rtx_and_attrs (operands[0]);
>> -})
>> -
>>  ; There are 14 possible vector FP comparison operators, gt and eq of them have
>>  ; been expanded above, so just support 12 remaining operators here.
>>
>> @@ -1189,27 +1141,15 @@ (define_expand "vector_ge_<mode>_p"
>>  (define_expand "vector_gtu_<mode>_p"
>>    [(parallel
>>      [(set (reg:CC CR6_REGNO)
>> -	  (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand")
>> -			      (match_operand:VEC_I 2 "vint_operand"))]
>> +	  (unspec:CC [(gtu:CC (match_operand:VEC_IC 1 "vint_operand")
>> +			      (match_operand:VEC_IC 2 "vint_operand"))]
>>  		     UNSPEC_PREDICATE))
>> -     (set (match_operand:VEC_I 0 "vlogical_operand")
>> -	  (gtu:VEC_I (match_dup 1)
>> -		     (match_dup 2)))])]
>> +     (set (match_operand:VEC_IC 0 "vlogical_operand")
>> +	  (gtu:VEC_IC (match_dup 1)
>> +		      (match_dup 2)))])]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>>    "")
>>
>> -(define_expand "vector_gtu_v1ti_p"
>> -  [(parallel
>> -    [(set (reg:CC CR6_REGNO)
>> -	  (unspec:CC [(gtu:CC (match_operand:V1TI 1 "altivec_register_operand")
>> -			      (match_operand:V1TI 2 "altivec_register_operand"))]
>> -		     UNSPEC_PREDICATE))
>> -     (set (match_operand:V1TI 0 "altivec_register_operand")
>> -	  (gtu:V1TI (match_dup 1)
>> -		    (match_dup 2)))])]
>> -  "TARGET_POWER10"
>> -  "")
>> -
>>  ;; AltiVec/VSX predicates.
>>
>>  ;; This expansion is triggered during expansion of predicate built-in
>> @@ -1582,25 +1522,21 @@ (define_expand "vec_shr_<mode>"
>>
>>  ;; Expanders for rotate each element in a vector
>>  (define_expand "vrotl<mode>3"
>> -  [(set (match_operand:VEC_I 0 "vint_operand")
>> -	(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand")
>> -		      (match_operand:VEC_I 2 "vint_operand")))]
>> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>> +	(rotate:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
>> +		       (match_operand:VEC_IC 2 "vint_operand")))]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>> -  "")
>> -
>> -(define_expand "vrotlv1ti3"
>> -  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
>> -        (rotate:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
>> -                     (match_operand:V1TI 2 "vsx_register_operand" "v")))]
>> -  "TARGET_POWER10"
>>  {
>> -  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
>> -  rtx tmp = gen_reg_rtx (V1TImode);
>> +  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2.  */
>> +  if (<MODE>mode == V1TImode)
>> +    {
>> +      rtx tmp = gen_reg_rtx (V1TImode);
>>
>> -  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
>> -  emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp));
>> -  DONE;
>> -})
>> +      emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
>> +      emit_insn (gen_altivec_vrlq (operands[0], operands[1], tmp));
>> +      DONE;
>> +    }
>> + })
>>
>>  ;; Expanders for rotatert to make use of vrotl
>>  (define_expand "vrotr<mode>3"
>> @@ -1663,25 +1599,20 @@ (define_expand "vlshr<mode>3"
>>
>>  ;; Expanders for arithmetic shift right on each vector element
>>  (define_expand "vashr<mode>3"
>> -  [(set (match_operand:VEC_I 0 "vint_operand")
>> -	(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand")
>> -			(match_operand:VEC_I 2 "vint_operand")))]
>> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>> +	(ashiftrt:VEC_IC (match_operand:VEC_IC 1 "vint_operand")
>> +			 (match_operand:VEC_IC 2 "vint_operand")))]
>>    "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
>> -  "")
>> -
>> -;; No immediate version of this 128-bit instruction
>> -(define_expand "vashrv1ti3"
>> -  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
>> -	(ashiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
>> -		       (match_operand:V1TI 2 "vsx_register_operand" "v")))]
>> -  "TARGET_POWER10"
>>  {
>> -  /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */
>> -  rtx tmp = gen_reg_rtx (V1TImode);
>> +  /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2.  */
>> +  if (<MODE>mode == V1TImode)
>> +    {
>> +      rtx tmp = gen_reg_rtx (V1TImode);
>>
>> -  emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
>> -  emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp));
>> -  DONE;
>> +      emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
>> +      emit_insn (gen_altivec_vsraq (operands[0], operands[1], tmp));
>> +      DONE;
>> +    }
>>  })
>>
>>  \f
>> diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
>> new file mode 100644
>> index 00000000000..1a4db0f45d4
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int128.c
>> @@ -0,0 +1,86 @@
>> +/* Verify that overloaded built-ins for vec_cmp with __int128
>> +   inputs produce the right code.  */
>> +
>> +/* { dg-do compile } */
> 
> Need /* { dg-require-effective-target int128 } */
> 
>> +/* { dg-require-effective-target power10_ok } */
>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
>> +
>> +#include <altivec.h>
>> +
>> +vector bool __int128
>> +test3_eq (vector signed __int128 x, vector signed __int128 y)
>> +{
>> +  return vec_cmpeq (x, y);
>> +}
>> +
>> +vector bool __int128
>> +test6_eq (vector unsigned __int128 x, vector unsigned __int128 y)
>> +{
>> +  return vec_cmpeq (x, y);
>> +}
>> +
> 
> Nit: The function names test6 and test3 seems copied from other cases somewhere.
> Maybe it's more meaningful with s/3// (or s/3/s/) and s/6/u/.
> 
>> +vector bool __int128
>> +test3_ge (vector signed __int128 x, vector signed __int128 y)
>> +{
>> +  return vec_cmpge (x, y);
>> +}
>> +
>> +vector bool __int128
>> +test6_ge (vector unsigned __int128 x, vector unsigned __int128 y)
>> +{
>> +  return vec_cmpge (x, y);
>> +}
>> +
>> +vector bool __int128
>> +test3_gt (vector signed __int128 x, vector signed __int128 y)
>> +{
>> +  return vec_cmpgt (x, y);
>> +}
>> +
>> +vector bool __int128
>> +test6_gt (vector unsigned __int128 x, vector unsigned __int128 y)
>> +{
>> +  return vec_cmpgt (x, y);
>> +}
>> +
>> +vector bool __int128
>> +test3_le (vector signed __int128 x, vector signed __int128 y)
>> +{
>> +  return vec_cmple (x, y);
>> +}
>> +
>> +vector bool __int128
>> +test6_le (vector unsigned __int128 x, vector unsigned __int128 y)
>> +{
>> +  return vec_cmple (x, y);
>> +}
>> +
>> +vector bool __int128
>> +test3_lt (vector signed __int128 x, vector signed __int128 y)
>> +{
>> +  return vec_cmplt (x, y);
>> +}
>> +
>> +vector bool __int128
>> +test6_lt (vector unsigned __int128 x, vector unsigned __int128 y)
>> +{
>> +  return vec_cmplt (x, y);
>> +}
>> +
>> +vector bool __int128
>> +test3_ne (vector signed __int128 x, vector signed __int128 y)
>> +{
>> +  return vec_cmpne (x, y);
>> +}
>> +
>> +vector bool __int128
>> +test6_ne (vector unsigned __int128 x, vector unsigned __int128 y)
>> +{
>> +  return vec_cmpne (x, y);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times "vcmpequq" 4 } } */
>> +/* { dg-final { scan-assembler-times "vcmpgtsq" 4 } } */
>> +/* { dg-final { scan-assembler-times "vcmpgtuq" 4 } } */
>> +/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
>> +
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c
>> new file mode 100644
>> index 00000000000..02f7dc5ca1b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c
>> @@ -0,0 +1,80 @@
>> +/* { dg-do compile } */
>> +/* { dg-require-effective-target power10_ok } */
> 
> Need /* { dg-require-effective-target int128 } */ too.
> 
> BR,
> Kewen

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4, rs6000] Add V1TI into vector comparison expand [PR103316]
  2022-05-26  5:30   ` HAO CHEN GUI
@ 2022-05-26  5:52     ` Kewen.Lin
  2022-05-26 13:16       ` David Edelsohn
  0 siblings, 1 reply; 6+ messages in thread
From: Kewen.Lin @ 2022-05-26  5:52 UTC (permalink / raw)
  To: HAO CHEN GUI; +Cc: Segher Boessenkool, David, Peter Bergner, gcc-patches

Hi Haochen,

on 2022/5/26 13:30, HAO CHEN GUI wrote:
> Kewen,
>   Thanks so much for your advice. Just one question about effective-target.
> 
>   For the test cases, it needs both power10_ok and int128 support. I saw some
> existing test cases have these two checks as well. But I wonder if power10_ok
> already covers int128 on powerpc targets? Can we save one check then?
> 

Good question, IMHO the checks are orthogonal, it's doable to disable int128
support by hacking the compiler, the int128 effective-target check then fails
due to missing defined __SIZEOF_INT128__, but power10_ok check isn't able to
catch that, the test case could end up with possible unexpected result without
the explicit int128 check.

To me, the int128 check is to ensure int128 type is available and the
power10_ok check is to ensure the power10 specific instructions are supported. 

BR,
Kewen

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4, rs6000] Add V1TI into vector comparison expand [PR103316]
  2022-05-26  5:52     ` Kewen.Lin
@ 2022-05-26 13:16       ` David Edelsohn
  2022-05-26 17:56         ` Segher Boessenkool
  0 siblings, 1 reply; 6+ messages in thread
From: David Edelsohn @ 2022-05-26 13:16 UTC (permalink / raw)
  To: Kewen.Lin; +Cc: HAO CHEN GUI, Segher Boessenkool, Peter Bergner, gcc-patches

On Thu, May 26, 2022 at 1:52 AM Kewen.Lin <linkw@linux.ibm.com> wrote:
>
> Hi Haochen,
>
> on 2022/5/26 13:30, HAO CHEN GUI wrote:
> > Kewen,
> >   Thanks so much for your advice. Just one question about effective-target.
> >
> >   For the test cases, it needs both power10_ok and int128 support. I saw some
> > existing test cases have these two checks as well. But I wonder if power10_ok
> > already covers int128 on powerpc targets? Can we save one check then?
> >
>
> Good question, IMHO the checks are orthogonal, it's doable to disable int128
> support by hacking the compiler, the int128 effective-target check then fails
> due to missing defined __SIZEOF_INT128__, but power10_ok check isn't able to
> catch that, the test case could end up with possible unexpected result without
> the explicit int128 check.
>
> To me, the int128 check is to ensure int128 type is available and the
> power10_ok check is to ensure the power10 specific instructions are supported.

Does Power10 fully support int128 in 32 bit mode?  I would expect no,
so the additional test is required.

Thanks, David

>
> BR,
> Kewen

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v4, rs6000] Add V1TI into vector comparison expand [PR103316]
  2022-05-26 13:16       ` David Edelsohn
@ 2022-05-26 17:56         ` Segher Boessenkool
  0 siblings, 0 replies; 6+ messages in thread
From: Segher Boessenkool @ 2022-05-26 17:56 UTC (permalink / raw)
  To: David Edelsohn; +Cc: Kewen.Lin, HAO CHEN GUI, Peter Bergner, gcc-patches

On Thu, May 26, 2022 at 09:16:59AM -0400, David Edelsohn wrote:
> Does Power10 fully support int128 in 32 bit mode?  I would expect no,
> so the additional test is required.

It isn't supported on any 32-bit ABI, Power or not.  It would simplify
many things if it was though!


Segher

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-05-26 17:57 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-24  8:45 [PATCH v4, rs6000] Add V1TI into vector comparison expand [PR103316] HAO CHEN GUI
2022-05-26  3:22 ` Kewen.Lin
2022-05-26  5:30   ` HAO CHEN GUI
2022-05-26  5:52     ` Kewen.Lin
2022-05-26 13:16       ` David Edelsohn
2022-05-26 17:56         ` Segher Boessenkool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).