public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABSU_EXPR + VCE.
@ 2023-06-06  4:31 liuhongt
  2023-06-06  4:31 ` [PATCH] Don't fold _mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when -funsigned-char liuhongt
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: liuhongt @ 2023-06-06  4:31 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools

r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
TYPE_MIN, but PABSB will store unsigned result into dst. The patch
uses ABSU_EXPR + VCE instead of ABS_EXPR.

Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
vector absm2 is guarded with TARGET_MMX_WITH_SSE.

Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
Ok for trunk?


gcc/ChangeLog:

	PR target/110108
	* config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
	_mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
	ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
	TARGET_64BIT.
	* config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
	real codename for __builtin_ia32_pabs{b,w,d}.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110108.c: New test.
---
 gcc/config/i386/i386-builtin.def         |  6 ++--
 gcc/config/i386/i386.cc                  | 44 ++++++++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
 3 files changed, 56 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 383b68a9bb8..7ba5b6a9d11 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
 
 /* SSSE3 */
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
 
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
 BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d4ff56ee8dd..b09b3c79e99 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18433,6 +18433,7 @@ bool
 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 {
   gimple *stmt = gsi_stmt (*gsi), *g;
+  gimple_seq stmts = NULL;
   tree fndecl = gimple_call_fndecl (stmt);
   gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
   int n_args = gimple_call_num_args (stmt);
@@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	{
 	  loc = gimple_location (stmt);
 	  tree type = TREE_TYPE (arg2);
-	  gimple_seq stmts = NULL;
 	  if (VECTOR_FLOAT_TYPE_P (type))
 	    {
 	      tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
@@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	  tree zero_vec = build_zero_cst (type);
 	  tree minus_one_vec = build_minus_one_cst (type);
 	  tree cmp_type = truth_type_for (type);
-	  gimple_seq stmts = NULL;
 	  tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
 	  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
 	  g = gimple_build_assign (gimple_call_lhs (stmt),
@@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       break;
 
     case IX86_BUILTIN_PABSB:
+    case IX86_BUILTIN_PABSW:
+    case IX86_BUILTIN_PABSD:
+      /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
+      if (!TARGET_64BIT)
+	break;
+      /* FALLTHRU.  */
     case IX86_BUILTIN_PABSB128:
     case IX86_BUILTIN_PABSB256:
     case IX86_BUILTIN_PABSB512:
-    case IX86_BUILTIN_PABSW:
     case IX86_BUILTIN_PABSW128:
     case IX86_BUILTIN_PABSW256:
     case IX86_BUILTIN_PABSW512:
-    case IX86_BUILTIN_PABSD:
     case IX86_BUILTIN_PABSD128:
     case IX86_BUILTIN_PABSD256:
     case IX86_BUILTIN_PABSD512:
@@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       if (n_args > 1
 	  && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
 	break;
-      loc = gimple_location (stmt);
-      g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
-      gsi_replace (gsi, g, false);
+      {
+	tree utype, ures, vce;
+	switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
+	  {
+	  case E_QImode:
+	    utype = unsigned_intQI_type_node;
+	    break;
+	  case E_HImode:
+	    utype = unsigned_intHI_type_node;
+	    break;
+	  case E_SImode:
+	    utype = unsigned_intSI_type_node;
+	    break;
+	  case E_DImode:
+	    utype = long_long_unsigned_type_node;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+	utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
+	/* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
+	   instead of ABS_EXPR to hanlde overflow case(TYPE_MIN).  */
+	ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
+	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	loc = gimple_location (stmt);
+	vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
+	g = gimple_build_assign (gimple_call_lhs (stmt),
+				 VIEW_CONVERT_EXPR, vce);
+	gsi_replace (gsi, g, false);
+      }
       return true;
 
     default:
diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
new file mode 100644
index 00000000000..cd05763b9bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110108.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
+#include <immintrin.h>
+
+__m128i do_stuff_128(__m128i X0, __m128i X1) {
+  __m128i AbsX0 = _mm_abs_epi8(X0);
+  __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
+  return Result;
+}
+
+__m256i do_stuff_256(__m256i X0, __m256i X1) {
+  __m256i AbsX0 = _mm256_abs_epi8(X0);
+  __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
+  return Result;
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] Don't fold _mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when -funsigned-char.
  2023-06-06  4:31 [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABSU_EXPR + VCE liuhongt
@ 2023-06-06  4:31 ` liuhongt
  2023-06-06  4:46   ` [PATCH] Don't fold _mm{, 256}_blendv_epi8 " Andrew Pinski
  2023-06-06  4:49 ` [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE Andrew Pinski
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: liuhongt @ 2023-06-06  4:31 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools

Since mask < 0 will be always false when -funsigned-char, but
vpblendvb needs to check the most significant bit.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk and backport to GCC12/GCC13 release branch?

gcc/ChangeLog:

	PR target/110108
	* config/i386/i386-builtin.def (BDESC): Replace
	CODE_FOR_nothing with real code name for blendvb builtins.
	* config/i386/i386.cc (ix86_gimple_fold_builtin): Don't fold
	_mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when
	-funsigned-char.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110108-2.c: New test.
---
 gcc/config/i386/i386-builtin.def           |  4 ++--
 gcc/config/i386/i386.cc                    |  7 +++++++
 gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++
 3 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 7ba5b6a9d11..b4c99ff62a2 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -944,7 +944,7 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", I
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_insertps_v4sf, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
-BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
+BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
 
 BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI)
@@ -1198,7 +1198,7 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256",  IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256",  IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
+BDESC (OPTION_MASK_ISA_AVX2, 0,  CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b09b3c79e99..f8f6c26c8eb 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18548,6 +18548,13 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       /* FALLTHRU.  */
     case IX86_BUILTIN_PBLENDVB128:
     case IX86_BUILTIN_BLENDVPS:
+      /* Don't fold PBLENDVB when funsigned-char since mask < 0
+	 will always be false in the gimple level.  */
+      if ((fn_code == IX86_BUILTIN_PBLENDVB128
+	   || fn_code == IX86_BUILTIN_PBLENDVB256)
+	  && !flag_signed_char)
+	break;
+
       gcc_assert (n_args == 3);
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c
new file mode 100644
index 00000000000..2d1d2fd4991
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2 -funsigned-char" } */
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
+
+#include <immintrin.h>
+__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) {
+  __m128i Result = _mm_blendv_epi8(X0, X1, X2);
+  return Result;
+}
+
+__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) {
+  __m256i Result = _mm256_blendv_epi8(X0, X1, X2);
+  return Result;
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Don't fold _mm{, 256}_blendv_epi8 into (mask < 0 ? src1 : src2) when -funsigned-char.
  2023-06-06  4:31 ` [PATCH] Don't fold _mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when -funsigned-char liuhongt
@ 2023-06-06  4:46   ` Andrew Pinski
  2023-06-06  8:21     ` [PATCH v2] Explicitly view_convert_expr mask to signed type when folding pblendvb builtins liuhongt
  0 siblings, 1 reply; 14+ messages in thread
From: Andrew Pinski @ 2023-06-06  4:46 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, crazylht, hjl.tools

On Mon, Jun 5, 2023 at 9:34 PM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Since mask < 0 will be always false when -funsigned-char, but
> vpblendvb needs to check the most significant bit.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk and backport to GCC12/GCC13 release branch?

I think this is a better patch and will always be correct and still
get folded at the gimple level (correctly):
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d4ff56ee8dd..02bf5ba93a5 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18561,8 +18561,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
              tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
                ? intSI_type_node : intDI_type_node;
              type = get_same_sized_vectype (itype, type);
-             arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
            }
+         else
+           type = signed_type_for (type);
+         arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
          tree zero_vec = build_zero_cst (type);
          tree cmp_type = truth_type_for (type);
          tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);


Thanks,
Andrew Pinski


>
> gcc/ChangeLog:
>
>         PR target/110108
>         * config/i386/i386-builtin.def (BDESC): Replace
>         CODE_FOR_nothing with real code name for blendvb builtins.
>         * config/i386/i386.cc (ix86_gimple_fold_builtin): Don't fold
>         _mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when
>         -funsigned-char.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr110108-2.c: New test.
> ---
>  gcc/config/i386/i386-builtin.def           |  4 ++--
>  gcc/config/i386/i386.cc                    |  7 +++++++
>  gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++
>  3 files changed, 23 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 7ba5b6a9d11..b4c99ff62a2 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -944,7 +944,7 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", I
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_insertps_v4sf, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
> -BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
> +BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI)
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
>
>  BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI)
> @@ -1198,7 +1198,7 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256",  IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256",  IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
> -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
> +BDESC (OPTION_MASK_ISA_AVX2, 0,  CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI)
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index b09b3c79e99..f8f6c26c8eb 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -18548,6 +18548,13 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        /* FALLTHRU.  */
>      case IX86_BUILTIN_PBLENDVB128:
>      case IX86_BUILTIN_BLENDVPS:
> +      /* Don't fold PBLENDVB when funsigned-char since mask < 0
> +        will always be false in the gimple level.  */
> +      if ((fn_code == IX86_BUILTIN_PBLENDVB128
> +          || fn_code == IX86_BUILTIN_PBLENDVB256)
> +         && !flag_signed_char)
> +       break;
> +
>        gcc_assert (n_args == 3);
>        arg0 = gimple_call_arg (stmt, 0);
>        arg1 = gimple_call_arg (stmt, 1);
> diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c
> new file mode 100644
> index 00000000000..2d1d2fd4991
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2 -funsigned-char" } */
> +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> +
> +#include <immintrin.h>
> +__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) {
> +  __m128i Result = _mm_blendv_epi8(X0, X1, X2);
> +  return Result;
> +}
> +
> +__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) {
> +  __m256i Result = _mm256_blendv_epi8(X0, X1, X2);
> +  return Result;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.
  2023-06-06  4:31 [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABSU_EXPR + VCE liuhongt
  2023-06-06  4:31 ` [PATCH] Don't fold _mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when -funsigned-char liuhongt
@ 2023-06-06  4:49 ` Andrew Pinski
  2023-06-06  8:15   ` Hongtao Liu
  2023-06-06  8:35   ` [PATCH 1/2] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} " liuhongt
  2023-06-06  9:08 ` [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} " Uros Bizjak
  2023-06-06  9:11 ` Uros Bizjak
  3 siblings, 2 replies; 14+ messages in thread
From: Andrew Pinski @ 2023-06-06  4:49 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, crazylht, hjl.tools

On Mon, Jun 5, 2023 at 9:34 PM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> uses ABSU_EXPR + VCE instead of ABS_EXPR.
>
> Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> vector absm2 is guarded with TARGET_MMX_WITH_SSE.
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> Ok for trunk?
>
>
> gcc/ChangeLog:
>
>         PR target/110108
>         * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
>         _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
>         ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
>         TARGET_64BIT.
>         * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
>         real codename for __builtin_ia32_pabs{b,w,d}.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr110108.c: New test.
> ---
>  gcc/config/i386/i386-builtin.def         |  6 ++--
>  gcc/config/i386/i386.cc                  | 44 ++++++++++++++++++++----
>  gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
>  3 files changed, 56 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 383b68a9bb8..7ba5b6a9d11 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
>
>  /* SSSE3 */
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
>
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
>  BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index d4ff56ee8dd..b09b3c79e99 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -18433,6 +18433,7 @@ bool
>  ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>  {
>    gimple *stmt = gsi_stmt (*gsi), *g;
> +  gimple_seq stmts = NULL;
>    tree fndecl = gimple_call_fndecl (stmt);
>    gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
>    int n_args = gimple_call_num_args (stmt);
> @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>         {
>           loc = gimple_location (stmt);
>           tree type = TREE_TYPE (arg2);
> -         gimple_seq stmts = NULL;
>           if (VECTOR_FLOAT_TYPE_P (type))
>             {
>               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>           tree zero_vec = build_zero_cst (type);
>           tree minus_one_vec = build_minus_one_cst (type);
>           tree cmp_type = truth_type_for (type);
> -         gimple_seq stmts = NULL;
>           tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
>           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>           g = gimple_build_assign (gimple_call_lhs (stmt),
> @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        break;
>
>      case IX86_BUILTIN_PABSB:
> +    case IX86_BUILTIN_PABSW:
> +    case IX86_BUILTIN_PABSD:
> +      /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
> +      if (!TARGET_64BIT)
> +       break;
> +      /* FALLTHRU.  */
>      case IX86_BUILTIN_PABSB128:
>      case IX86_BUILTIN_PABSB256:
>      case IX86_BUILTIN_PABSB512:
> -    case IX86_BUILTIN_PABSW:
>      case IX86_BUILTIN_PABSW128:
>      case IX86_BUILTIN_PABSW256:
>      case IX86_BUILTIN_PABSW512:
> -    case IX86_BUILTIN_PABSD:
>      case IX86_BUILTIN_PABSD128:
>      case IX86_BUILTIN_PABSD256:
>      case IX86_BUILTIN_PABSD512:
> @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        if (n_args > 1
>           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
>         break;
> -      loc = gimple_location (stmt);
> -      g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> -      gsi_replace (gsi, g, false);
> +      {
> +       tree utype, ures, vce;
> +       switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
> +         {
> +         case E_QImode:
> +           utype = unsigned_intQI_type_node;
> +           break;
> +         case E_HImode:
> +           utype = unsigned_intHI_type_node;
> +           break;
> +         case E_SImode:
> +           utype = unsigned_intSI_type_node;
> +           break;
> +         case E_DImode:
> +           utype = long_long_unsigned_type_node;
> +           break;
> +         default:
> +           gcc_unreachable ();
> +         }
> +       utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));

The above switch can be replaced with just simply
utype = unsigned_type_for (TREE_TYPE (arg0));

> +       /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
> +          instead of ABS_EXPR to hanlde overflow case(TYPE_MIN).  */
> +       ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
> +       gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +       loc = gimple_location (stmt);
> +       vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
> +       g = gimple_build_assign (gimple_call_lhs (stmt),
> +                                VIEW_CONVERT_EXPR, vce);
> +       gsi_replace (gsi, g, false);
> +      }
>        return true;
>
>      default:
> diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
> new file mode 100644
> index 00000000000..cd05763b9bf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110108.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> +#include <immintrin.h>
> +
> +__m128i do_stuff_128(__m128i X0, __m128i X1) {
> +  __m128i AbsX0 = _mm_abs_epi8(X0);
> +  __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
> +  return Result;
> +}
> +
> +__m256i do_stuff_256(__m256i X0, __m256i X1) {
> +  __m256i AbsX0 = _mm256_abs_epi8(X0);
> +  __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
> +  return Result;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.
  2023-06-06  4:49 ` [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE Andrew Pinski
@ 2023-06-06  8:15   ` Hongtao Liu
  2023-06-06  8:35   ` [PATCH 1/2] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} " liuhongt
  1 sibling, 0 replies; 14+ messages in thread
From: Hongtao Liu @ 2023-06-06  8:15 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: liuhongt, gcc-patches, hjl.tools

On Tue, Jun 6, 2023 at 12:49 PM Andrew Pinski <pinskia@gmail.com> wrote:
>
> On Mon, Jun 5, 2023 at 9:34 PM liuhongt via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> >
> > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
> >
> > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > Ok for trunk?
> >
> >
> > gcc/ChangeLog:
> >
> >         PR target/110108
> >         * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
> >         _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
> >         ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
> >         TARGET_64BIT.
> >         * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
> >         real codename for __builtin_ia32_pabs{b,w,d}.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/pr110108.c: New test.
> > ---
> >  gcc/config/i386/i386-builtin.def         |  6 ++--
> >  gcc/config/i386/i386.cc                  | 44 ++++++++++++++++++++----
> >  gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
> >  3 files changed, 56 insertions(+), 10 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
> >
> > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> > index 383b68a9bb8..7ba5b6a9d11 100644
> > --- a/gcc/config/i386/i386-builtin.def
> > +++ b/gcc/config/i386/i386-builtin.def
> > @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
> >
> >  /* SSSE3 */
> >  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> >  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> >  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> >
> >  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
> >  BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index d4ff56ee8dd..b09b3c79e99 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -18433,6 +18433,7 @@ bool
> >  ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >  {
> >    gimple *stmt = gsi_stmt (*gsi), *g;
> > +  gimple_seq stmts = NULL;
> >    tree fndecl = gimple_call_fndecl (stmt);
> >    gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
> >    int n_args = gimple_call_num_args (stmt);
> > @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >         {
> >           loc = gimple_location (stmt);
> >           tree type = TREE_TYPE (arg2);
> > -         gimple_seq stmts = NULL;
> >           if (VECTOR_FLOAT_TYPE_P (type))
> >             {
> >               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> > @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >           tree zero_vec = build_zero_cst (type);
> >           tree minus_one_vec = build_minus_one_cst (type);
> >           tree cmp_type = truth_type_for (type);
> > -         gimple_seq stmts = NULL;
> >           tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
> >           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >           g = gimple_build_assign (gimple_call_lhs (stmt),
> > @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >        break;
> >
> >      case IX86_BUILTIN_PABSB:
> > +    case IX86_BUILTIN_PABSW:
> > +    case IX86_BUILTIN_PABSD:
> > +      /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
> > +      if (!TARGET_64BIT)
> > +       break;
> > +      /* FALLTHRU.  */
> >      case IX86_BUILTIN_PABSB128:
> >      case IX86_BUILTIN_PABSB256:
> >      case IX86_BUILTIN_PABSB512:
> > -    case IX86_BUILTIN_PABSW:
> >      case IX86_BUILTIN_PABSW128:
> >      case IX86_BUILTIN_PABSW256:
> >      case IX86_BUILTIN_PABSW512:
> > -    case IX86_BUILTIN_PABSD:
> >      case IX86_BUILTIN_PABSD128:
> >      case IX86_BUILTIN_PABSD256:
> >      case IX86_BUILTIN_PABSD512:
> > @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >        if (n_args > 1
> >           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
> >         break;
> > -      loc = gimple_location (stmt);
> > -      g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> > -      gsi_replace (gsi, g, false);
> > +      {
> > +       tree utype, ures, vce;
> > +       switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
> > +         {
> > +         case E_QImode:
> > +           utype = unsigned_intQI_type_node;
> > +           break;
> > +         case E_HImode:
> > +           utype = unsigned_intHI_type_node;
> > +           break;
> > +         case E_SImode:
> > +           utype = unsigned_intSI_type_node;
> > +           break;
> > +         case E_DImode:
> > +           utype = long_long_unsigned_type_node;
> > +           break;
> > +         default:
> > +           gcc_unreachable ();
> > +         }
> > +       utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
>
> The above switch can be replaced with just simply
> utype = unsigned_type_for (TREE_TYPE (arg0));
Yes, thanks.
>
> > +       /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
> > +          instead of ABS_EXPR to hanlde overflow case(TYPE_MIN).  */
> > +       ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
> > +       gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > +       loc = gimple_location (stmt);
> > +       vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
> > +       g = gimple_build_assign (gimple_call_lhs (stmt),
> > +                                VIEW_CONVERT_EXPR, vce);
> > +       gsi_replace (gsi, g, false);
> > +      }
> >        return true;
> >
> >      default:
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
> > new file mode 100644
> > index 00000000000..cd05763b9bf
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110108.c
> > @@ -0,0 +1,16 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-mavx2 -O2" } */
> > +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> > +#include <immintrin.h>
> > +
> > +__m128i do_stuff_128(__m128i X0, __m128i X1) {
> > +  __m128i AbsX0 = _mm_abs_epi8(X0);
> > +  __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
> > +  return Result;
> > +}
> > +
> > +__m256i do_stuff_256(__m256i X0, __m256i X1) {
> > +  __m256i AbsX0 = _mm256_abs_epi8(X0);
> > +  __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
> > +  return Result;
> > +}
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2] Explicitly view_convert_expr mask to signed type when folding pblendvb builtins.
  2023-06-06  4:46   ` [PATCH] Don't fold _mm{, 256}_blendv_epi8 " Andrew Pinski
@ 2023-06-06  8:21     ` liuhongt
  2023-06-09  1:49       ` Hongtao Liu
  0 siblings, 1 reply; 14+ messages in thread
From: liuhongt @ 2023-06-06  8:21 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools

> I think this is a better patch and will always be correct and still
> get folded at the gimple level (correctly):
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index d4ff56ee8dd..02bf5ba93a5 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -18561,8 +18561,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
>                 ? intSI_type_node : intDI_type_node;
>               type = get_same_sized_vectype (itype, type);
> -             arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
>             }
> +         else
> +           type = signed_type_for (type);
> +         arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
>           tree zero_vec = build_zero_cst (type);
>           tree cmp_type = truth_type_for (type);
>           tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
>
>

Yes, thanks.

Here's the updated patch:

Since mask < 0 will be always false for vector char when
-funsigned-char, but vpblendvb needs to check the most significant
bit. The patch explicitly VCE to vector signed char.


gcc/ChangeLog:

	PR target/110108
	* config/i386/i386.cc (ix86_gimple_fold_builtin): Explicitly
	view_convert_expr mask to signed type when folding pblendvb
	builtins.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110108-2.c: New test.
---
 gcc/config/i386/i386.cc                    |  4 +++-
 gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index da20c2c49de..4e594a9c88e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18561,8 +18561,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	      tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
 		? intSI_type_node : intDI_type_node;
 	      type = get_same_sized_vectype (itype, type);
-	      arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
 	    }
+	  else
+	    type = signed_type_for (type);
+	  arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
 	  tree zero_vec = build_zero_cst (type);
 	  tree cmp_type = truth_type_for (type);
 	  tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c
new file mode 100644
index 00000000000..2d1d2fd4991
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2 -funsigned-char" } */
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
+
+#include <immintrin.h>
+__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) {
+  __m128i Result = _mm_blendv_epi8(X0, X1, X2);
+  return Result;
+}
+
+__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) {
+  __m256i Result = _mm256_blendv_epi8(X0, X1, X2);
+  return Result;
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/2] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABSU_EXPR + VCE.
  2023-06-06  4:49 ` [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE Andrew Pinski
  2023-06-06  8:15   ` Hongtao Liu
@ 2023-06-06  8:35   ` liuhongt
  1 sibling, 0 replies; 14+ messages in thread
From: liuhongt @ 2023-06-06  8:35 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools

r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
TYPE_MIN, but PABSB will store unsigned result into dst. The patch
uses ABSU_EXPR + VCE instead of ABS_EXPR.

Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
vector absm2 is guarded with TARGET_MMX_WITH_SSE.

gcc/ChangeLog:

	PR target/110108
	* config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
	_mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
	ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
	TARGET_64BIT.
	* config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
	real codename for __builtin_ia32_pabs{b,w,d}.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110108.c: New test.
	* gcc.target/i386/pr110108-3.c: New test.
---
 gcc/config/i386/i386-builtin.def           |  6 ++---
 gcc/config/i386/i386.cc                    | 27 ++++++++++++++++------
 gcc/testsuite/gcc.target/i386/pr109900.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr110108-3.c | 22 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr110108.c   | 16 +++++++++++++
 5 files changed, 62 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 383b68a9bb8..7ba5b6a9d11 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
 
 /* SSSE3 */
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
 
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
 BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d4ff56ee8dd..da20c2c49de 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18433,6 +18433,7 @@ bool
 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 {
   gimple *stmt = gsi_stmt (*gsi), *g;
+  gimple_seq stmts = NULL;
   tree fndecl = gimple_call_fndecl (stmt);
   gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
   int n_args = gimple_call_num_args (stmt);
@@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	{
 	  loc = gimple_location (stmt);
 	  tree type = TREE_TYPE (arg2);
-	  gimple_seq stmts = NULL;
 	  if (VECTOR_FLOAT_TYPE_P (type))
 	    {
 	      tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
@@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	  tree zero_vec = build_zero_cst (type);
 	  tree minus_one_vec = build_minus_one_cst (type);
 	  tree cmp_type = truth_type_for (type);
-	  gimple_seq stmts = NULL;
 	  tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
 	  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
 	  g = gimple_build_assign (gimple_call_lhs (stmt),
@@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       break;
 
     case IX86_BUILTIN_PABSB:
+    case IX86_BUILTIN_PABSW:
+    case IX86_BUILTIN_PABSD:
+      /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
+      if (!TARGET_64BIT)
+	break;
+      /* FALLTHRU.  */
     case IX86_BUILTIN_PABSB128:
     case IX86_BUILTIN_PABSB256:
     case IX86_BUILTIN_PABSB512:
-    case IX86_BUILTIN_PABSW:
     case IX86_BUILTIN_PABSW128:
     case IX86_BUILTIN_PABSW256:
     case IX86_BUILTIN_PABSW512:
-    case IX86_BUILTIN_PABSD:
     case IX86_BUILTIN_PABSD128:
     case IX86_BUILTIN_PABSD256:
     case IX86_BUILTIN_PABSD512:
@@ -18933,9 +18936,19 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       if (n_args > 1
 	  && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
 	break;
-      loc = gimple_location (stmt);
-      g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
-      gsi_replace (gsi, g, false);
+      {
+	tree utype, ures, vce;
+	utype = unsigned_type_for (TREE_TYPE (arg0));
+	/* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
+	   instead of ABS_EXPR to hanlde overflow case(TYPE_MIN).  */
+	ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
+	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	loc = gimple_location (stmt);
+	vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
+	g = gimple_build_assign (gimple_call_lhs (stmt),
+				 VIEW_CONVERT_EXPR, vce);
+	gsi_replace (gsi, g, false);
+      }
       return true;
 
     default:
diff --git a/gcc/testsuite/gcc.target/i386/pr109900.c b/gcc/testsuite/gcc.target/i386/pr109900.c
index f87e8cc11de..db67aac380c 100644
--- a/gcc/testsuite/gcc.target/i386/pr109900.c
+++ b/gcc/testsuite/gcc.target/i386/pr109900.c
@@ -1,7 +1,7 @@
 #include <immintrin.h>
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -O2 -mavx512vl -fdump-tree-optimized" } */
-/* { dg-final { scan-tree-dump-not "builtin_ia32_pabs" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "builtin_ia32_pabs" "optimized" { target { ! ia32 } } } } */
 
 
 __m64
diff --git a/gcc/testsuite/gcc.target/i386/pr110108-3.c b/gcc/testsuite/gcc.target/i386/pr110108-3.c
new file mode 100644
index 00000000000..566f744d80c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110108-3.c
@@ -0,0 +1,22 @@
+#include <immintrin.h>
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "pabs" 3 } } */
+
+__m64
+absb_64 (__m64 a)
+{
+  return _mm_abs_pi8(a);
+}
+
+__m64
+absw_64 (__m64 a)
+{
+  return _mm_abs_pi16(a);
+}
+
+__m64
+absd_64 (__m64 a)
+{
+  return _mm_abs_pi32(a);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
new file mode 100644
index 00000000000..cd05763b9bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110108.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
+#include <immintrin.h>
+
+__m128i do_stuff_128(__m128i X0, __m128i X1) {
+  __m128i AbsX0 = _mm_abs_epi8(X0);
+  __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
+  return Result;
+}
+
+__m256i do_stuff_256(__m256i X0, __m256i X1) {
+  __m256i AbsX0 = _mm256_abs_epi8(X0);
+  __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
+  return Result;
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.
  2023-06-06  4:31 [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABSU_EXPR + VCE liuhongt
  2023-06-06  4:31 ` [PATCH] Don't fold _mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when -funsigned-char liuhongt
  2023-06-06  4:49 ` [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE Andrew Pinski
@ 2023-06-06  9:08 ` Uros Bizjak
  2023-06-06  9:11 ` Uros Bizjak
  3 siblings, 0 replies; 14+ messages in thread
From: Uros Bizjak @ 2023-06-06  9:08 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, crazylht, hjl.tools

On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> uses ABSU_EXPR + VCE instead of ABS_EXPR.
>
> Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> vector absm2 is guarded with TARGET_MMX_WITH_SSE.
>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> Ok for trunk?
>
>
> gcc/ChangeLog:
>
>         PR target/110108
>         * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
>         _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
>         ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
>         TARGET_64BIT.
>         * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
>         real codename for __builtin_ia32_pabs{b,w,d}.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr110108.c: New test.
> ---
>  gcc/config/i386/i386-builtin.def         |  6 ++--
>  gcc/config/i386/i386.cc                  | 44 ++++++++++++++++++++----
>  gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
>  3 files changed, 56 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 383b68a9bb8..7ba5b6a9d11 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
>
>  /* SSSE3 */
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
>
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
>  BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index d4ff56ee8dd..b09b3c79e99 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -18433,6 +18433,7 @@ bool
>  ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>  {
>    gimple *stmt = gsi_stmt (*gsi), *g;
> +  gimple_seq stmts = NULL;
>    tree fndecl = gimple_call_fndecl (stmt);
>    gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
>    int n_args = gimple_call_num_args (stmt);
> @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>         {
>           loc = gimple_location (stmt);
>           tree type = TREE_TYPE (arg2);
> -         gimple_seq stmts = NULL;
>           if (VECTOR_FLOAT_TYPE_P (type))
>             {
>               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>           tree zero_vec = build_zero_cst (type);
>           tree minus_one_vec = build_minus_one_cst (type);
>           tree cmp_type = truth_type_for (type);
> -         gimple_seq stmts = NULL;
>           tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
>           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>           g = gimple_build_assign (gimple_call_lhs (stmt),
> @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        break;
>
>      case IX86_BUILTIN_PABSB:
> +    case IX86_BUILTIN_PABSW:
> +    case IX86_BUILTIN_PABSD:
> +      /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
> +      if (!TARGET_64BIT)

This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
the definition of T_M_W_S in i386.h. OTOH, these builtins are
available for TARGET_MMX, so I'm not sure if the above check is needed
at all.

Uros.

> +       break;
> +      /* FALLTHRU.  */
>      case IX86_BUILTIN_PABSB128:
>      case IX86_BUILTIN_PABSB256:
>      case IX86_BUILTIN_PABSB512:
> -    case IX86_BUILTIN_PABSW:
>      case IX86_BUILTIN_PABSW128:
>      case IX86_BUILTIN_PABSW256:
>      case IX86_BUILTIN_PABSW512:
> -    case IX86_BUILTIN_PABSD:
>      case IX86_BUILTIN_PABSD128:
>      case IX86_BUILTIN_PABSD256:
>      case IX86_BUILTIN_PABSD512:
> @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        if (n_args > 1
>           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
>         break;
> -      loc = gimple_location (stmt);
> -      g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> -      gsi_replace (gsi, g, false);
> +      {
> +       tree utype, ures, vce;
> +       switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
> +         {
> +         case E_QImode:
> +           utype = unsigned_intQI_type_node;
> +           break;
> +         case E_HImode:
> +           utype = unsigned_intHI_type_node;
> +           break;
> +         case E_SImode:
> +           utype = unsigned_intSI_type_node;
> +           break;
> +         case E_DImode:
> +           utype = long_long_unsigned_type_node;
> +           break;
> +         default:
> +           gcc_unreachable ();
> +         }
> +       utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
> +       /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
> +          instead of ABS_EXPR to hanlde overflow case(TYPE_MIN).  */
> +       ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
> +       gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +       loc = gimple_location (stmt);
> +       vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
> +       g = gimple_build_assign (gimple_call_lhs (stmt),
> +                                VIEW_CONVERT_EXPR, vce);
> +       gsi_replace (gsi, g, false);
> +      }
>        return true;
>
>      default:
> diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
> new file mode 100644
> index 00000000000..cd05763b9bf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110108.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> +#include <immintrin.h>
> +
> +__m128i do_stuff_128(__m128i X0, __m128i X1) {
> +  __m128i AbsX0 = _mm_abs_epi8(X0);
> +  __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
> +  return Result;
> +}
> +
> +__m256i do_stuff_256(__m256i X0, __m256i X1) {
> +  __m256i AbsX0 = _mm256_abs_epi8(X0);
> +  __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
> +  return Result;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.
  2023-06-06  4:31 [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABSU_EXPR + VCE liuhongt
                   ` (2 preceding siblings ...)
  2023-06-06  9:08 ` [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} " Uros Bizjak
@ 2023-06-06  9:11 ` Uros Bizjak
  2023-06-06 11:42   ` Hongtao Liu
  3 siblings, 1 reply; 14+ messages in thread
From: Uros Bizjak @ 2023-06-06  9:11 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, crazylht, hjl.tools

On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> uses ABSU_EXPR + VCE instead of ABS_EXPR.
>
> Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> vector absm2 is guarded with TARGET_MMX_WITH_SSE.

Please note that we are using builtins here, so we should not fold to
absm2, but to ssse3_absm2, which is also available with TARGET_MMX.

Uros.

>
> Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> Ok for trunk?
>
>
> gcc/ChangeLog:
>
>         PR target/110108
>         * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
>         _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
>         ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
>         TARGET_64BIT.
>         * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
>         real codename for __builtin_ia32_pabs{b,w,d}.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr110108.c: New test.
> ---
>  gcc/config/i386/i386-builtin.def         |  6 ++--
>  gcc/config/i386/i386.cc                  | 44 ++++++++++++++++++++----
>  gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
>  3 files changed, 56 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 383b68a9bb8..7ba5b6a9d11 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
>
>  /* SSSE3 */
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
>
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
>  BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index d4ff56ee8dd..b09b3c79e99 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -18433,6 +18433,7 @@ bool
>  ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>  {
>    gimple *stmt = gsi_stmt (*gsi), *g;
> +  gimple_seq stmts = NULL;
>    tree fndecl = gimple_call_fndecl (stmt);
>    gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
>    int n_args = gimple_call_num_args (stmt);
> @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>         {
>           loc = gimple_location (stmt);
>           tree type = TREE_TYPE (arg2);
> -         gimple_seq stmts = NULL;
>           if (VECTOR_FLOAT_TYPE_P (type))
>             {
>               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>           tree zero_vec = build_zero_cst (type);
>           tree minus_one_vec = build_minus_one_cst (type);
>           tree cmp_type = truth_type_for (type);
> -         gimple_seq stmts = NULL;
>           tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
>           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>           g = gimple_build_assign (gimple_call_lhs (stmt),
> @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        break;
>
>      case IX86_BUILTIN_PABSB:
> +    case IX86_BUILTIN_PABSW:
> +    case IX86_BUILTIN_PABSD:
> +      /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
> +      if (!TARGET_64BIT)
> +       break;
> +      /* FALLTHRU.  */
>      case IX86_BUILTIN_PABSB128:
>      case IX86_BUILTIN_PABSB256:
>      case IX86_BUILTIN_PABSB512:
> -    case IX86_BUILTIN_PABSW:
>      case IX86_BUILTIN_PABSW128:
>      case IX86_BUILTIN_PABSW256:
>      case IX86_BUILTIN_PABSW512:
> -    case IX86_BUILTIN_PABSD:
>      case IX86_BUILTIN_PABSD128:
>      case IX86_BUILTIN_PABSD256:
>      case IX86_BUILTIN_PABSD512:
> @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        if (n_args > 1
>           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
>         break;
> -      loc = gimple_location (stmt);
> -      g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> -      gsi_replace (gsi, g, false);
> +      {
> +       tree utype, ures, vce;
> +       switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
> +         {
> +         case E_QImode:
> +           utype = unsigned_intQI_type_node;
> +           break;
> +         case E_HImode:
> +           utype = unsigned_intHI_type_node;
> +           break;
> +         case E_SImode:
> +           utype = unsigned_intSI_type_node;
> +           break;
> +         case E_DImode:
> +           utype = long_long_unsigned_type_node;
> +           break;
> +         default:
> +           gcc_unreachable ();
> +         }
> +       utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
> +       /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
> +          instead of ABS_EXPR to hanlde overflow case(TYPE_MIN).  */
> +       ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
> +       gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +       loc = gimple_location (stmt);
> +       vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
> +       g = gimple_build_assign (gimple_call_lhs (stmt),
> +                                VIEW_CONVERT_EXPR, vce);
> +       gsi_replace (gsi, g, false);
> +      }
>        return true;
>
>      default:
> diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
> new file mode 100644
> index 00000000000..cd05763b9bf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110108.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2" } */
> +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> +#include <immintrin.h>
> +
> +__m128i do_stuff_128(__m128i X0, __m128i X1) {
> +  __m128i AbsX0 = _mm_abs_epi8(X0);
> +  __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
> +  return Result;
> +}
> +
> +__m256i do_stuff_256(__m256i X0, __m256i X1) {
> +  __m256i AbsX0 = _mm256_abs_epi8(X0);
> +  __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
> +  return Result;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.
  2023-06-06  9:11 ` Uros Bizjak
@ 2023-06-06 11:42   ` Hongtao Liu
  2023-06-06 14:36     ` Uros Bizjak
  0 siblings, 1 reply; 14+ messages in thread
From: Hongtao Liu @ 2023-06-06 11:42 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: liuhongt, gcc-patches, hjl.tools

On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> >
> > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
>
>This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
>the definition of T_M_W_S in i386.h. OTOH, these builtins are
>available for TARGET_MMX, so I'm not sure if the above check is needed
>at all.
BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0,
CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB,
UNKNOWN, (int) V8QI_FTYPE_V8QI)

ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be
checked by ix86_check_builtin_isa_match which is at the beginning of
ix86_gimple_fold_builtin.
Here, we're folding those builtin into gimple ABSU_EXPR, and
ABSU_EXPR<vector> will be lowered by vec_lower pass when backend
doesn't support corressponding absm2_optab, that's why i only check
TARGET_64BIT here.

> Please note that we are using builtins here, so we should not fold to
> absm2, but to ssse3_absm2, which is also available with TARGET_MMX.
Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT,
backend suppport absm2_optab which exactly matches ssse3_absm2.
w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR,
but let backend expanded to ssse3_absm2.

>
> Uros.
>
> >
> > Bootstrapped and regtested on x86_64-linux-gnu{-m32,}.
> > Ok for trunk?
> >
> >
> > gcc/ChangeLog:
> >
> >         PR target/110108
> >         * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
> >         _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
> >         ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
> >         TARGET_64BIT.
> >         * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
> >         real codename for __builtin_ia32_pabs{b,w,d}.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/pr110108.c: New test.
> > ---
> >  gcc/config/i386/i386-builtin.def         |  6 ++--
> >  gcc/config/i386/i386.cc                  | 44 ++++++++++++++++++++----
> >  gcc/testsuite/gcc.target/i386/pr110108.c | 16 +++++++++
> >  3 files changed, 56 insertions(+), 10 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110108.c
> >
> > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> > index 383b68a9bb8..7ba5b6a9d11 100644
> > --- a/gcc/config/i386/i386-builtin.def
> > +++ b/gcc/config/i386/i386-builtin.def
> > @@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
> >
> >  /* SSSE3 */
> >  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> >  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> >  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> > -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> > +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> >
> >  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
> >  BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index d4ff56ee8dd..b09b3c79e99 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -18433,6 +18433,7 @@ bool
> >  ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >  {
> >    gimple *stmt = gsi_stmt (*gsi), *g;
> > +  gimple_seq stmts = NULL;
> >    tree fndecl = gimple_call_fndecl (stmt);
> >    gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
> >    int n_args = gimple_call_num_args (stmt);
> > @@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >         {
> >           loc = gimple_location (stmt);
> >           tree type = TREE_TYPE (arg2);
> > -         gimple_seq stmts = NULL;
> >           if (VECTOR_FLOAT_TYPE_P (type))
> >             {
> >               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> > @@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >           tree zero_vec = build_zero_cst (type);
> >           tree minus_one_vec = build_minus_one_cst (type);
> >           tree cmp_type = truth_type_for (type);
> > -         gimple_seq stmts = NULL;
> >           tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
> >           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >           g = gimple_build_assign (gimple_call_lhs (stmt),
> > @@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >        break;
> >
> >      case IX86_BUILTIN_PABSB:
> > +    case IX86_BUILTIN_PABSW:
> > +    case IX86_BUILTIN_PABSD:
> > +      /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
> > +      if (!TARGET_64BIT)
> > +       break;
> > +      /* FALLTHRU.  */
> >      case IX86_BUILTIN_PABSB128:
> >      case IX86_BUILTIN_PABSB256:
> >      case IX86_BUILTIN_PABSB512:
> > -    case IX86_BUILTIN_PABSW:
> >      case IX86_BUILTIN_PABSW128:
> >      case IX86_BUILTIN_PABSW256:
> >      case IX86_BUILTIN_PABSW512:
> > -    case IX86_BUILTIN_PABSD:
> >      case IX86_BUILTIN_PABSD128:
> >      case IX86_BUILTIN_PABSD256:
> >      case IX86_BUILTIN_PABSD512:
> > @@ -18933,9 +18936,36 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >        if (n_args > 1
> >           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
> >         break;
> > -      loc = gimple_location (stmt);
> > -      g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> > -      gsi_replace (gsi, g, false);
> > +      {
> > +       tree utype, ures, vce;
> > +       switch (GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0))))
> > +         {
> > +         case E_QImode:
> > +           utype = unsigned_intQI_type_node;
> > +           break;
> > +         case E_HImode:
> > +           utype = unsigned_intHI_type_node;
> > +           break;
> > +         case E_SImode:
> > +           utype = unsigned_intSI_type_node;
> > +           break;
> > +         case E_DImode:
> > +           utype = long_long_unsigned_type_node;
> > +           break;
> > +         default:
> > +           gcc_unreachable ();
> > +         }
> > +       utype = get_same_sized_vectype (utype, TREE_TYPE (arg0));
> > +       /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
> > +          instead of ABS_EXPR to hanlde overflow case(TYPE_MIN).  */
> > +       ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
> > +       gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> > +       loc = gimple_location (stmt);
> > +       vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
> > +       g = gimple_build_assign (gimple_call_lhs (stmt),
> > +                                VIEW_CONVERT_EXPR, vce);
> > +       gsi_replace (gsi, g, false);
> > +      }
> >        return true;
> >
> >      default:
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
> > new file mode 100644
> > index 00000000000..cd05763b9bf
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110108.c
> > @@ -0,0 +1,16 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-mavx2 -O2" } */
> > +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> > +#include <immintrin.h>
> > +
> > +__m128i do_stuff_128(__m128i X0, __m128i X1) {
> > +  __m128i AbsX0 = _mm_abs_epi8(X0);
> > +  __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
> > +  return Result;
> > +}
> > +
> > +__m256i do_stuff_256(__m256i X0, __m256i X1) {
> > +  __m256i AbsX0 = _mm256_abs_epi8(X0);
> > +  __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
> > +  return Result;
> > +}
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.
  2023-06-06 11:42   ` Hongtao Liu
@ 2023-06-06 14:36     ` Uros Bizjak
  2023-06-07  0:31       ` Hongtao Liu
  0 siblings, 1 reply; 14+ messages in thread
From: Uros Bizjak @ 2023-06-06 14:36 UTC (permalink / raw)
  To: Hongtao Liu; +Cc: liuhongt, gcc-patches, hjl.tools

On Tue, Jun 6, 2023 at 1:42 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> > >
> > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
> >
> >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
> >the definition of T_M_W_S in i386.h. OTOH, these builtins are
> >available for TARGET_MMX, so I'm not sure if the above check is needed
> >at all.
> BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0,
> CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB,
> UNKNOWN, (int) V8QI_FTYPE_V8QI)
>
> ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be
> checked by ix86_check_builtin_isa_match which is at the beginning of
> ix86_gimple_fold_builtin.
> Here, we're folding those builtin into gimple ABSU_EXPR, and
> ABSU_EXPR<vector> will be lowered by vec_lower pass when backend
> doesn't support corressponding absm2_optab, that's why i only check
> TARGET_64BIT here.
>
> > Please note that we are using builtins here, so we should not fold to
> > absm2, but to ssse3_absm2, which is also available with TARGET_MMX.
> Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT,
> backend suppport absm2_optab which exactly matches ssse3_absm2.
> w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR,
> but let backend expanded to ssse3_absm2.

Thanks for the explanation, but for consistency, I'd recommend
checking TARGET_MMX_WITH_SSE (= TARGET_64BIT && TARGET_SSE2) here. The
macro is self-explanatory, while the usage of TARGET_64BIT is not that
descriptive.

Uros.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.
  2023-06-06 14:36     ` Uros Bizjak
@ 2023-06-07  0:31       ` Hongtao Liu
  2023-06-09  1:47         ` Hongtao Liu
  0 siblings, 1 reply; 14+ messages in thread
From: Hongtao Liu @ 2023-06-07  0:31 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: liuhongt, gcc-patches, hjl.tools

On Tue, Jun 6, 2023 at 10:36 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Tue, Jun 6, 2023 at 1:42 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
> > > <gcc-patches@gcc.gnu.org> wrote:
> > > >
> > > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > > > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> > > >
> > > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > > > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
> > >
> > >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
> > >the definition of T_M_W_S in i386.h. OTOH, these builtins are
> > >available for TARGET_MMX, so I'm not sure if the above check is needed
> > >at all.
> > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0,
> > CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB,
> > UNKNOWN, (int) V8QI_FTYPE_V8QI)
> >
> > ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be
> > checked by ix86_check_builtin_isa_match which is at the beginning of
> > ix86_gimple_fold_builtin.
> > Here, we're folding those builtin into gimple ABSU_EXPR, and
> > ABSU_EXPR<vector> will be lowered by vec_lower pass when backend
> > doesn't support corressponding absm2_optab, that's why i only check
> > TARGET_64BIT here.
> >
> > > Please note that we are using builtins here, so we should not fold to
> > > absm2, but to ssse3_absm2, which is also available with TARGET_MMX.
> > Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT,
> > backend suppport absm2_optab which exactly matches ssse3_absm2.
> > w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR,
> > but let backend expanded to ssse3_absm2.
>
> Thanks for the explanation, but for consistency, I'd recommend
> checking TARGET_MMX_WITH_SSE (= TARGET_64BIT && TARGET_SSE2) here. The
> macro is self-explanatory, while the usage of TARGET_64BIT is not that
> descriptive.
Sure.
>
> Uros.



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.
  2023-06-07  0:31       ` Hongtao Liu
@ 2023-06-09  1:47         ` Hongtao Liu
  0 siblings, 0 replies; 14+ messages in thread
From: Hongtao Liu @ 2023-06-09  1:47 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: liuhongt, gcc-patches, hjl.tools

On Wed, Jun 7, 2023 at 8:31 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Tue, Jun 6, 2023 at 10:36 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Tue, Jun 6, 2023 at 1:42 PM Hongtao Liu <crazylht@gmail.com> wrote:
> > >
> > > On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > >
> > > > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
> > > > <gcc-patches@gcc.gnu.org> wrote:
> > > > >
> > > > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > > > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > > > > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> > > > >
> > > > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > > > > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
> > > >
> > > >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
> > > >the definition of T_M_W_S in i386.h. OTOH, these builtins are
> > > >available for TARGET_MMX, so I'm not sure if the above check is needed
> > > >at all.
> > > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0,
> > > CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB,
> > > UNKNOWN, (int) V8QI_FTYPE_V8QI)
> > >
> > > ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be
> > > checked by ix86_check_builtin_isa_match which is at the beginning of
> > > ix86_gimple_fold_builtin.
> > > Here, we're folding those builtin into gimple ABSU_EXPR, and
> > > ABSU_EXPR<vector> will be lowered by vec_lower pass when backend
> > > doesn't support corressponding absm2_optab, that's why i only check
> > > TARGET_64BIT here.
> > >
> > > > Please note that we are using builtins here, so we should not fold to
> > > > absm2, but to ssse3_absm2, which is also available with TARGET_MMX.
> > > Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT,
> > > backend suppport absm2_optab which exactly matches ssse3_absm2.
> > > w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR,
> > > but let backend expanded to ssse3_absm2.
> >
> > Thanks for the explanation, but for consistency, I'd recommend
> > checking TARGET_MMX_WITH_SSE (= TARGET_64BIT && TARGET_SSE2) here. The
> > macro is self-explanatory, while the usage of TARGET_64BIT is not that
> > descriptive.
> Sure.
Pushed to trunk.
> >
> > Uros.
>
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] Explicitly view_convert_expr mask to signed type when folding pblendvb builtins.
  2023-06-06  8:21     ` [PATCH v2] Explicitly view_convert_expr mask to signed type when folding pblendvb builtins liuhongt
@ 2023-06-09  1:49       ` Hongtao Liu
  0 siblings, 0 replies; 14+ messages in thread
From: Hongtao Liu @ 2023-06-09  1:49 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, hjl.tools

On Tue, Jun 6, 2023 at 4:23 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> > I think this is a better patch and will always be correct and still
> > get folded at the gimple level (correctly):
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index d4ff56ee8dd..02bf5ba93a5 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -18561,8 +18561,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> >               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
> >                 ? intSI_type_node : intDI_type_node;
> >               type = get_same_sized_vectype (itype, type);
> > -             arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
> >             }
> > +         else
> > +           type = signed_type_for (type);
> > +         arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
> >           tree zero_vec = build_zero_cst (type);
> >           tree cmp_type = truth_type_for (type);
> >           tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
> >
> >
>
> Yes, thanks.
>
> Here's the updated patch:
>
> Since mask < 0 will be always false for vector char when
> -funsigned-char, but vpblendvb needs to check the most significant
> bit. The patch explicitly VCE to vector signed char.
>
Pushed to trunk and backport to GCC-13/GCC-12 release branch.(No need
for GCC-11 and earlier since the bug is introduced in GCC12).
>
> gcc/ChangeLog:
>
>         PR target/110108
>         * config/i386/i386.cc (ix86_gimple_fold_builtin): Explicitly
>         view_convert_expr mask to signed type when folding pblendvb
>         builtins.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr110108-2.c: New test.
> ---
>  gcc/config/i386/i386.cc                    |  4 +++-
>  gcc/testsuite/gcc.target/i386/pr110108-2.c | 14 ++++++++++++++
>  2 files changed, 17 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110108-2.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index da20c2c49de..4e594a9c88e 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -18561,8 +18561,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
>                 ? intSI_type_node : intDI_type_node;
>               type = get_same_sized_vectype (itype, type);
> -             arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
>             }
> +         else
> +           type = signed_type_for (type);
> +         arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
>           tree zero_vec = build_zero_cst (type);
>           tree cmp_type = truth_type_for (type);
>           tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
> diff --git a/gcc/testsuite/gcc.target/i386/pr110108-2.c b/gcc/testsuite/gcc.target/i386/pr110108-2.c
> new file mode 100644
> index 00000000000..2d1d2fd4991
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110108-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx2 -O2 -funsigned-char" } */
> +/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
> +
> +#include <immintrin.h>
> +__m128i do_stuff_128(__m128i X0, __m128i X1, __m128i X2) {
> +  __m128i Result = _mm_blendv_epi8(X0, X1, X2);
> +  return Result;
> +}
> +
> +__m256i do_stuff_256(__m256i X0, __m256i X1, __m256i X2) {
> +  __m256i Result = _mm256_blendv_epi8(X0, X1, X2);
> +  return Result;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2023-06-09  1:49 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-06  4:31 [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABSU_EXPR + VCE liuhongt
2023-06-06  4:31 ` [PATCH] Don't fold _mm{,256}_blendv_epi8 into (mask < 0 ? src1 : src2) when -funsigned-char liuhongt
2023-06-06  4:46   ` [PATCH] Don't fold _mm{, 256}_blendv_epi8 " Andrew Pinski
2023-06-06  8:21     ` [PATCH v2] Explicitly view_convert_expr mask to signed type when folding pblendvb builtins liuhongt
2023-06-09  1:49       ` Hongtao Liu
2023-06-06  4:49 ` [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE Andrew Pinski
2023-06-06  8:15   ` Hongtao Liu
2023-06-06  8:35   ` [PATCH 1/2] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} " liuhongt
2023-06-06  9:08 ` [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} " Uros Bizjak
2023-06-06  9:11 ` Uros Bizjak
2023-06-06 11:42   ` Hongtao Liu
2023-06-06 14:36     ` Uros Bizjak
2023-06-07  0:31       ` Hongtao Liu
2023-06-09  1:47         ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).