PATCH, rs6000] Add V1TI into vector comparison expand [PR103316]

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* PATCH, rs6000] Add V1TI into vector comparison expand [PR103316]
@ 2022-03-10  6:31 HAO CHEN GUI
  2022-03-15  2:05 ` Ping^1 [PATCH, " HAO CHEN GUI
  2022-03-15  8:59 ` PATCH, " Kewen.Lin
  0 siblings, 2 replies; 3+ messages in thread
From: HAO CHEN GUI @ 2022-03-10  6:31 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, David, Peter Bergner

Hi,
   This patch adds V1TI mode into mode iterator used in vector comparison
expands.With the patch, both built-ins and direct comparison could generate
P10 new V1TI comparison instructions.

   Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is
this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-03-09 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	PR target/103316
	* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
	gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
	RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
	RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
	* config/rs6000/vector.md (VEC_IC): Define. Add support for new Power10
	V1TI instructions.
	(vec_cmp<mode><mode>): Set mode iterator to VEC_IC.
	(vec_cmpu<mode><mode>): Likewise.

gcc/testsuite/
	PR target/103316
	* gcc.target/powerpc/pr103316.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 5d34c1bcfc9..143effa89bf 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -1994,6 +1994,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_VCMPEQUH:
     case RS6000_BIF_VCMPEQUW:
     case RS6000_BIF_VCMPEQUD:
+    case RS6000_BIF_VCMPEQUT:
     /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
        folding produces worse code for 128-bit compares.  */
       fold_compare_helper (gsi, EQ_EXPR, stmt);
@@ -2002,6 +2003,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_VCMPNEB:
     case RS6000_BIF_VCMPNEH:
     case RS6000_BIF_VCMPNEW:
+    case RS6000_BIF_VCMPNET:
     /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
        folding produces worse code for 128-bit compares.  */
       fold_compare_helper (gsi, NE_EXPR, stmt);
@@ -2015,6 +2017,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_CMPGE_U4SI:
     case RS6000_BIF_CMPGE_2DI:
     case RS6000_BIF_CMPGE_U2DI:
+    case RS6000_BIF_CMPGE_1TI:
+    case RS6000_BIF_CMPGE_U1TI:
     /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
        for now, because gimple folding produces worse code for 128-bit
        compares.  */
@@ -2029,6 +2033,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_VCMPGTUW:
     case RS6000_BIF_VCMPGTUD:
     case RS6000_BIF_VCMPGTSD:
+    case RS6000_BIF_VCMPGTUT:
+    case RS6000_BIF_VCMPGTST:
     /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST
        for now, because gimple folding produces worse code for 128-bit
        compares.  */
@@ -2043,6 +2049,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case RS6000_BIF_CMPLE_U4SI:
     case RS6000_BIF_CMPLE_2DI:
     case RS6000_BIF_CMPLE_U2DI:
+    case RS6000_BIF_CMPLE_1TI:
+    case RS6000_BIF_CMPLE_U1TI:
     /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI
        for now, because gimple folding produces worse code for 128-bit
        compares.  */
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index b87a742cca8..1afb8a6d786 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -26,6 +26,9 @@
 ;; Vector int modes
 (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])

+;; Vector int modes for comparison
+(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI V1TI])
+
 ;; 128-bit int modes
 (define_mode_iterator VEC_TI [V1TI TI])

@@ -533,11 +536,12 @@ (define_expand "vcond_mask_<mode><VEC_int>"

 ;; For signed integer vectors comparison.
 (define_expand "vec_cmp<mode><mode>"
-  [(set (match_operand:VEC_I 0 "vint_operand")
+  [(set (match_operand:VEC_IC 0 "vint_operand")
 	(match_operator 1 "signed_or_equality_comparison_operator"
-	  [(match_operand:VEC_I 2 "vint_operand")
-	   (match_operand:VEC_I 3 "vint_operand")]))]
-  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+	  [(match_operand:VEC_IC 2 "vint_operand")
+	   (match_operand:VEC_IC 3 "vint_operand")]))]
+  "(VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && <MODE>mode!= V1TImode)
+   || (<MODE>mode == V1TImode && TARGET_POWER10)"
 {
   enum rtx_code code = GET_CODE (operands[1]);
   rtx tmp = gen_reg_rtx (<MODE>mode);
@@ -573,11 +577,12 @@ (define_expand "vec_cmp<mode><mode>"

 ;; For unsigned integer vectors comparison.
 (define_expand "vec_cmpu<mode><mode>"
-  [(set (match_operand:VEC_I 0 "vint_operand")
+  [(set (match_operand:VEC_IC 0 "vint_operand")
 	(match_operator 1 "unsigned_or_equality_comparison_operator"
-	  [(match_operand:VEC_I 2 "vint_operand")
-	   (match_operand:VEC_I 3 "vint_operand")]))]
-  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+	  [(match_operand:VEC_IC 2 "vint_operand")
+	   (match_operand:VEC_IC 3 "vint_operand")]))]
+  "(VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && <MODE>mode != V1TImode)
+   || (<MODE>mode == V1TImode && TARGET_POWER10)"
 {
   enum rtx_code code = GET_CODE (operands[1]);
   rtx tmp = gen_reg_rtx (<MODE>mode);
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c
new file mode 100644
index 00000000000..da2121a30de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c
@@ -0,0 +1,79 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector bool __int128
+test_eq (vector signed __int128 a, vector signed __int128 b)
+{
+  return a == b;
+}
+
+vector bool __int128
+test_ne (vector signed __int128 a, vector signed __int128 b)
+{
+  return a != b;
+}
+
+vector bool __int128
+test_gt (vector signed __int128 a, vector signed __int128 b)
+{
+  return a > b;
+}
+
+vector bool __int128
+test_ge (vector signed __int128 a, vector signed __int128 b)
+{
+  return a >= b;
+}
+
+vector bool __int128
+test_lt (vector signed __int128 a, vector signed __int128 b)
+{
+  return a < b;
+}
+
+vector bool __int128
+test_le (vector signed __int128 a, vector signed __int128 b)
+{
+  return a <= b;
+}
+
+vector bool __int128
+testu_eq (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a == b;
+}
+
+vector bool __int128
+testu_ne (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a != b;
+}
+
+vector bool __int128
+testu_gt (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a > b;
+}
+
+vector bool __int128
+testu_ge (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a >= b;
+}
+
+vector bool __int128
+testu_lt (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a < b;
+}
+
+vector bool __int128
+testu_le (vector unsigned __int128 a, vector unsigned __int128 b)
+{
+  return a <= b;
+}
+
+/* { dg-final { scan-assembler-times "vcmp" 12 } } */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Ping^1 [PATCH, rs6000] Add V1TI into vector comparison expand [PR103316]
  2022-03-10  6:31 PATCH, rs6000] Add V1TI into vector comparison expand [PR103316] HAO CHEN GUI
@ 2022-03-15  2:05 ` HAO CHEN GUI
  2022-03-15  8:59 ` PATCH, " Kewen.Lin
  1 sibling, 0 replies; 3+ messages in thread
From: HAO CHEN GUI @ 2022-03-15  2:05 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, David, Peter Bergner

Hi,
  Gentle ping this:
  https://gcc.gnu.org/pipermail/gcc-patches/2022-March/591507.html
Thanks

On 10/3/2022 下午 2:31, HAO CHEN GUI wrote:
> Hi,
>    This patch adds V1TI mode into mode iterator used in vector comparison
> expands.With the patch, both built-ins and direct comparison could generate
> P10 new V1TI comparison instructions.
> 
>    Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is
> this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-03-09 Haochen Gui <guihaoc@linux.ibm.com>
> 
> gcc/
> 	PR target/103316
> 	* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
> 	gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
> 	RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
> 	RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
> 	* config/rs6000/vector.md (VEC_IC): Define. Add support for new Power10
> 	V1TI instructions.
> 	(vec_cmp<mode><mode>): Set mode iterator to VEC_IC.
> 	(vec_cmpu<mode><mode>): Likewise.
> 
> gcc/testsuite/
> 	PR target/103316
> 	* gcc.target/powerpc/pr103316.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
> index 5d34c1bcfc9..143effa89bf 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -1994,6 +1994,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_VCMPEQUH:
>      case RS6000_BIF_VCMPEQUW:
>      case RS6000_BIF_VCMPEQUD:
> +    case RS6000_BIF_VCMPEQUT:
>      /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
>         folding produces worse code for 128-bit compares.  */
>        fold_compare_helper (gsi, EQ_EXPR, stmt);
> @@ -2002,6 +2003,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_VCMPNEB:
>      case RS6000_BIF_VCMPNEH:
>      case RS6000_BIF_VCMPNEW:
> +    case RS6000_BIF_VCMPNET:
>      /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
>         folding produces worse code for 128-bit compares.  */
>        fold_compare_helper (gsi, NE_EXPR, stmt);
> @@ -2015,6 +2017,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_CMPGE_U4SI:
>      case RS6000_BIF_CMPGE_2DI:
>      case RS6000_BIF_CMPGE_U2DI:
> +    case RS6000_BIF_CMPGE_1TI:
> +    case RS6000_BIF_CMPGE_U1TI:
>      /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
>         for now, because gimple folding produces worse code for 128-bit
>         compares.  */
> @@ -2029,6 +2033,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_VCMPGTUW:
>      case RS6000_BIF_VCMPGTUD:
>      case RS6000_BIF_VCMPGTSD:
> +    case RS6000_BIF_VCMPGTUT:
> +    case RS6000_BIF_VCMPGTST:
>      /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST
>         for now, because gimple folding produces worse code for 128-bit
>         compares.  */
> @@ -2043,6 +2049,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_CMPLE_U4SI:
>      case RS6000_BIF_CMPLE_2DI:
>      case RS6000_BIF_CMPLE_U2DI:
> +    case RS6000_BIF_CMPLE_1TI:
> +    case RS6000_BIF_CMPLE_U1TI:
>      /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI
>         for now, because gimple folding produces worse code for 128-bit
>         compares.  */
> diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
> index b87a742cca8..1afb8a6d786 100644
> --- a/gcc/config/rs6000/vector.md
> +++ b/gcc/config/rs6000/vector.md
> @@ -26,6 +26,9 @@
>  ;; Vector int modes
>  (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
> 
> +;; Vector int modes for comparison
> +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI V1TI])
> +
>  ;; 128-bit int modes
>  (define_mode_iterator VEC_TI [V1TI TI])
> 
> @@ -533,11 +536,12 @@ (define_expand "vcond_mask_<mode><VEC_int>"
> 
>  ;; For signed integer vectors comparison.
>  (define_expand "vec_cmp<mode><mode>"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>  	(match_operator 1 "signed_or_equality_comparison_operator"
> -	  [(match_operand:VEC_I 2 "vint_operand")
> -	   (match_operand:VEC_I 3 "vint_operand")]))]
> -  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
> +	  [(match_operand:VEC_IC 2 "vint_operand")
> +	   (match_operand:VEC_IC 3 "vint_operand")]))]
> +  "(VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && <MODE>mode!= V1TImode)
> +   || (<MODE>mode == V1TImode && TARGET_POWER10)"
>  {
>    enum rtx_code code = GET_CODE (operands[1]);
>    rtx tmp = gen_reg_rtx (<MODE>mode);
> @@ -573,11 +577,12 @@ (define_expand "vec_cmp<mode><mode>"
> 
>  ;; For unsigned integer vectors comparison.
>  (define_expand "vec_cmpu<mode><mode>"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>  	(match_operator 1 "unsigned_or_equality_comparison_operator"
> -	  [(match_operand:VEC_I 2 "vint_operand")
> -	   (match_operand:VEC_I 3 "vint_operand")]))]
> -  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
> +	  [(match_operand:VEC_IC 2 "vint_operand")
> +	   (match_operand:VEC_IC 3 "vint_operand")]))]
> +  "(VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && <MODE>mode != V1TImode)
> +   || (<MODE>mode == V1TImode && TARGET_POWER10)"
>  {
>    enum rtx_code code = GET_CODE (operands[1]);
>    rtx tmp = gen_reg_rtx (<MODE>mode);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c
> new file mode 100644
> index 00000000000..da2121a30de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c
> @@ -0,0 +1,79 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include <altivec.h>
> +
> +vector bool __int128
> +test_eq (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a == b;
> +}
> +
> +vector bool __int128
> +test_ne (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a != b;
> +}
> +
> +vector bool __int128
> +test_gt (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a > b;
> +}
> +
> +vector bool __int128
> +test_ge (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a >= b;
> +}
> +
> +vector bool __int128
> +test_lt (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a < b;
> +}
> +
> +vector bool __int128
> +test_le (vector signed __int128 a, vector signed __int128 b)
> +{
> +  return a <= b;
> +}
> +
> +vector bool __int128
> +testu_eq (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a == b;
> +}
> +
> +vector bool __int128
> +testu_ne (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a != b;
> +}
> +
> +vector bool __int128
> +testu_gt (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a > b;
> +}
> +
> +vector bool __int128
> +testu_ge (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a >= b;
> +}
> +
> +vector bool __int128
> +testu_lt (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a < b;
> +}
> +
> +vector bool __int128
> +testu_le (vector unsigned __int128 a, vector unsigned __int128 b)
> +{
> +  return a <= b;
> +}
> +
> +/* { dg-final { scan-assembler-times "vcmp" 12 } } */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: PATCH, rs6000] Add V1TI into vector comparison expand [PR103316]
  2022-03-10  6:31 PATCH, rs6000] Add V1TI into vector comparison expand [PR103316] HAO CHEN GUI
  2022-03-15  2:05 ` Ping^1 [PATCH, " HAO CHEN GUI
@ 2022-03-15  8:59 ` Kewen.Lin
  1 sibling, 0 replies; 3+ messages in thread
From: Kewen.Lin @ 2022-03-15  8:59 UTC (permalink / raw)
  To: HAO CHEN GUI; +Cc: Peter Bergner, David, Segher Boessenkool, gcc-patches

Hi Haochen,

Some minor comments are inlined.

on 2022/3/10 2:31 PM, HAO CHEN GUI via Gcc-patches wrote:
> Hi,
>    This patch adds V1TI mode into mode iterator used in vector comparison
> expands.With the patch, both built-ins and direct comparison could generate
> P10 new V1TI comparison instructions.
> 
>    Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is
> this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-03-09 Haochen Gui <guihaoc@linux.ibm.com>
> 
> gcc/
> 	PR target/103316
> 	* config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_builtin): Enable
> 	gimple folding for RS6000_BIF_VCMPEQUT, RS6000_BIF_VCMPNET,
> 	RS6000_BIF_CMPGE_1TI, RS6000_BIF_CMPGE_U1TI, RS6000_BIF_VCMPGTUT,
> 	RS6000_BIF_VCMPGTST, RS6000_BIF_CMPLE_1TI, RS6000_BIF_CMPLE_U1TI.
> 	* config/rs6000/vector.md (VEC_IC): Define. Add support for new Power10
> 	V1TI instructions.
> 	(vec_cmp<mode><mode>): Set mode iterator to VEC_IC.
> 	(vec_cmpu<mode><mode>): Likewise.
> 
> gcc/testsuite/
> 	PR target/103316
> 	* gcc.target/powerpc/pr103316.c: New.
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
> index 5d34c1bcfc9..143effa89bf 100644
> --- a/gcc/config/rs6000/rs6000-builtin.cc
> +++ b/gcc/config/rs6000/rs6000-builtin.cc
> @@ -1994,6 +1994,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_VCMPEQUH:
>      case RS6000_BIF_VCMPEQUW:
>      case RS6000_BIF_VCMPEQUD:
> +    case RS6000_BIF_VCMPEQUT:
>      /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple
>         folding produces worse code for 128-bit compares.  */

The comment above is saying why there is no RS6000_BIF_VCMPEQUT before, IIUC the point
doesn't hold any more with your patch.  So could you remove it to avoid possible
confusions?  Also some similar places ...

>        fold_compare_helper (gsi, EQ_EXPR, stmt);
> @@ -2002,6 +2003,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_VCMPNEB:
>      case RS6000_BIF_VCMPNEH:
>      case RS6000_BIF_VCMPNEW:
> +    case RS6000_BIF_VCMPNET:
>      /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple
>         folding produces worse code for 128-bit compares.  */

here ...

>        fold_compare_helper (gsi, NE_EXPR, stmt);
> @@ -2015,6 +2017,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_CMPGE_U4SI:
>      case RS6000_BIF_CMPGE_2DI:
>      case RS6000_BIF_CMPGE_U2DI:
> +    case RS6000_BIF_CMPGE_1TI:
> +    case RS6000_BIF_CMPGE_U1TI:
>      /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI
>         for now, because gimple folding produces worse code for 128-bit
>         compares.  */

here...

> @@ -2029,6 +2033,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_VCMPGTUW:
>      case RS6000_BIF_VCMPGTUD:
>      case RS6000_BIF_VCMPGTSD:
> +    case RS6000_BIF_VCMPGTUT:
> +    case RS6000_BIF_VCMPGTST:
>      /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST
>         for now, because gimple folding produces worse code for 128-bit
>         compares.  */

here...

> @@ -2043,6 +2049,8 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>      case RS6000_BIF_CMPLE_U4SI:
>      case RS6000_BIF_CMPLE_2DI:
>      case RS6000_BIF_CMPLE_U2DI:
> +    case RS6000_BIF_CMPLE_1TI:
> +    case RS6000_BIF_CMPLE_U1TI:
>      /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI
>         for now, because gimple folding produces worse code for 128-bit
>         compares.  */

here...

> diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
> index b87a742cca8..1afb8a6d786 100644
> --- a/gcc/config/rs6000/vector.md
> +++ b/gcc/config/rs6000/vector.md
> @@ -26,6 +26,9 @@
>  ;; Vector int modes
>  (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
> 
> +;; Vector int modes for comparison
> +(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI V1TI])
> +

Maybe we can make this define be like:

(define_mode_iterator VEC_IC [V16QI V8HI V4SI V2DI (V1TI "TARGET_POWER10")])

...

>  ;; 128-bit int modes
>  (define_mode_iterator VEC_TI [V1TI TI])
> 
> @@ -533,11 +536,12 @@ (define_expand "vcond_mask_<mode><VEC_int>"
> 
>  ;; For signed integer vectors comparison.
>  (define_expand "vec_cmp<mode><mode>"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>  	(match_operator 1 "signed_or_equality_comparison_operator"
> -	  [(match_operand:VEC_I 2 "vint_operand")
> -	   (match_operand:VEC_I 3 "vint_operand")]))]
> -  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
> +	  [(match_operand:VEC_IC 2 "vint_operand")
> +	   (match_operand:VEC_IC 3 "vint_operand")]))]
> +  "(VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && <MODE>mode!= V1TImode)
> +   || (<MODE>mode == V1TImode && TARGET_POWER10)"

and this condition can be kept as simple with VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)?


>  {
>    enum rtx_code code = GET_CODE (operands[1]);
>    rtx tmp = gen_reg_rtx (<MODE>mode);
> @@ -573,11 +577,12 @@ (define_expand "vec_cmp<mode><mode>"
> 
>  ;; For unsigned integer vectors comparison.
>  (define_expand "vec_cmpu<mode><mode>"
> -  [(set (match_operand:VEC_I 0 "vint_operand")
> +  [(set (match_operand:VEC_IC 0 "vint_operand")
>  	(match_operator 1 "unsigned_or_equality_comparison_operator"
> -	  [(match_operand:VEC_I 2 "vint_operand")
> -	   (match_operand:VEC_I 3 "vint_operand")]))]
> -  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
> +	  [(match_operand:VEC_IC 2 "vint_operand")
> +	   (match_operand:VEC_IC 3 "vint_operand")]))]
> +  "(VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && <MODE>mode != V1TImode)
> +   || (<MODE>mode == V1TImode && TARGET_POWER10)"

same as above.

>  {
>    enum rtx_code code = GET_CODE (operands[1]);
>    rtx tmp = gen_reg_rtx (<MODE>mode);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103316.c b/gcc/testsuite/gcc.target/powerpc/pr103316.c

It seems better to have another test case with explicit builtin function calls like vec_cmp{gt, eq, ge ... }
to cover the changes in rs6000_gimple_fold_builtin?

> new file mode 100644
> index 00000000000..da2121a30de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103316.c
> @@ -0,0 +1,79 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include <altivec.h>

Nit: I guess this include is useless?

BR,
Kewen

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-03-15  9:00 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-10  6:31 PATCH, rs6000] Add V1TI into vector comparison expand [PR103316] HAO CHEN GUI
2022-03-15  2:05 ` Ping^1 [PATCH, " HAO CHEN GUI
2022-03-15  8:59 ` PATCH, " Kewen.Lin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).