public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABS_EXPR.
@ 2023-05-22  7:35 liuhongt
  2023-05-23  2:29 ` Hongtao Liu
  0 siblings, 1 reply; 2+ messages in thread
From: liuhongt @ 2023-05-22  7:35 UTC (permalink / raw)
  To: gcc-patches; +Cc: crazylht, hjl.tools

Also for 64-bit vector abs intrinsics _mm_abs_{pi8,pi16,pi32}.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

	PR target/109900
	* config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
	_mm{,256,512}_abs_{epi8,epi16,epi32,epi64} and
	_mm_abs_{pi8,pi16,pi32} into gimple ABS_EXPR.
	(ix86_masked_all_ones): Handle 64-bit mask.
	* config/i386/i386-builtin.def: Replace icode of related
	non-mask simd abs builtins with CODE_FOR_nothing.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr109900.c: New test.
---
 gcc/config/i386/i386-builtin.def         | 18 ++---
 gcc/config/i386/i386.cc                  | 86 +++++++++++++++------
 gcc/testsuite/gcc.target/i386/pr109900.c | 95 ++++++++++++++++++++++++
 3 files changed, 166 insertions(+), 33 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr109900.c

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index f7b10a6ab1e..c91e3809c75 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -899,12 +899,12 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps"
 BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
 
 /* SSSE3 */
-BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
-BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
-BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
+BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
+BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
 
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
 BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
@@ -1178,9 +1178,9 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_
 
 /* AVX2 */
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI)
-BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI)
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI)
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI)
+BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256",  IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256",  IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI)
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256",  IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 6a4b3326219..7d1d1bb0760 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -17928,6 +17928,8 @@ ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
     return false;
 
   unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
+  if (elems == HOST_BITS_PER_WIDE_INT)
+    return  mask == HOST_WIDE_INT_M1U;
   if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
     return false;
 
@@ -18407,7 +18409,7 @@ ix86_fold_builtin (tree fndecl, int n_args,
 bool
 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 {
-  gimple *stmt = gsi_stmt (*gsi);
+  gimple *stmt = gsi_stmt (*gsi), *g;
   tree fndecl = gimple_call_fndecl (stmt);
   gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
   int n_args = gimple_call_num_args (stmt);
@@ -18420,6 +18422,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   unsigned HOST_WIDE_INT count;
   bool is_vshift;
   unsigned HOST_WIDE_INT elems;
+  location_t loc;
 
   /* Don't fold when there's isa mismatch.  */
   if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
@@ -18455,8 +18458,8 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	  if (!expr_not_equal_to (arg0, wi::zero (prec)))
 	    return false;
 
-	  location_t loc = gimple_location (stmt);
-	  gimple *g = gimple_build_call (decl, 1, arg0);
+	  loc = gimple_location (stmt);
+	  g = gimple_build_call (decl, 1, arg0);
 	  gimple_set_location (g, loc);
 	  tree lhs = make_ssa_name (integer_type_node);
 	  gimple_call_set_lhs (g, lhs);
@@ -18478,8 +18481,8 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	  arg0 = gimple_call_arg (stmt, 0);
 	  if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
 	    break;
-	  location_t loc = gimple_location (stmt);
-	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
+	  loc = gimple_location (stmt);
+	  g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
 	  gimple_set_location (g, loc);
 	  gsi_replace (gsi, g, false);
 	  return true;
@@ -18494,9 +18497,9 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg1 = gimple_call_arg (stmt, 1);
       if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
 	{
-	  location_t loc = gimple_location (stmt);
+	  loc = gimple_location (stmt);
 	  arg0 = gimple_call_arg (stmt, 0);
-	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
+	  g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
 	  gimple_set_location (g, loc);
 	  gsi_replace (gsi, g, false);
 	  return true;
@@ -18527,7 +18530,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg2 = gimple_call_arg (stmt, 2);
       if (gimple_call_lhs (stmt))
 	{
-	  location_t loc = gimple_location (stmt);
+	  loc = gimple_location (stmt);
 	  tree type = TREE_TYPE (arg2);
 	  gimple_seq stmts = NULL;
 	  if (VECTOR_FLOAT_TYPE_P (type))
@@ -18541,9 +18544,9 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	  tree cmp_type = truth_type_for (type);
 	  tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
 	  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
-	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
-					   VEC_COND_EXPR, cmp,
-					   arg1, arg0);
+	  g = gimple_build_assign (gimple_call_lhs (stmt),
+				   VEC_COND_EXPR, cmp,
+				   arg1, arg0);
 	  gimple_set_location (g, loc);
 	  gsi_replace (gsi, g, false);
 	}
@@ -18579,7 +18582,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg1 = gimple_call_arg (stmt, 1);
       if (gimple_call_lhs (stmt))
 	{
-	  location_t loc = gimple_location (stmt);
+	  loc = gimple_location (stmt);
 	  tree type = TREE_TYPE (arg0);
 	  tree zero_vec = build_zero_cst (type);
 	  tree minus_one_vec = build_minus_one_cst (type);
@@ -18587,9 +18590,9 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	  gimple_seq stmts = NULL;
 	  tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
 	  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
-	  gimple* g = gimple_build_assign (gimple_call_lhs (stmt),
-					   VEC_COND_EXPR, cmp,
-					   minus_one_vec, zero_vec);
+	  g = gimple_build_assign (gimple_call_lhs (stmt),
+				   VEC_COND_EXPR, cmp,
+				   minus_one_vec, zero_vec);
 	  gimple_set_location (g, loc);
 	  gsi_replace (gsi, g, false);
 	}
@@ -18794,8 +18797,8 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       if (count == 0)
 	{
 	  /* Just return the first argument for shift by 0.  */
-	  location_t loc = gimple_location (stmt);
-	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
+	  loc = gimple_location (stmt);
+	  g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
 	  gimple_set_location (g, loc);
 	  gsi_replace (gsi, g, false);
 	  return true;
@@ -18805,9 +18808,9 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	{
 	  /* For shift counts equal or greater than precision, except for
 	     arithmetic right shift the result is zero.  */
-	  location_t loc = gimple_location (stmt);
-	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
-					   build_zero_cst (TREE_TYPE (arg0)));
+	  loc = gimple_location (stmt);
+	  g = gimple_build_assign (gimple_call_lhs (stmt),
+				   build_zero_cst (TREE_TYPE (arg0)));
 	  gimple_set_location (g, loc);
 	  gsi_replace (gsi, g, false);
 	  return true;
@@ -18836,7 +18839,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	    return false;
 
 	  machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
-	  location_t loc = gimple_location (stmt);
+	  loc = gimple_location (stmt);
 	  tree itype = (imode == E_DFmode
 			? long_long_integer_type_node : integer_type_node);
 	  tree vtype = build_vector_type (itype, elems);
@@ -18867,9 +18870,9 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 
 	  tree perm_mask = elts.build ();
 	  arg1 = gimple_call_arg (stmt, 1);
-	  gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
-					   VEC_PERM_EXPR,
-					   arg0, arg1, perm_mask);
+	  g = gimple_build_assign (gimple_call_lhs (stmt),
+				   VEC_PERM_EXPR,
+				   arg0, arg1, perm_mask);
 	  gimple_set_location (g, loc);
 	  gsi_replace (gsi, g, false);
 	  return true;
@@ -18877,6 +18880,41 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       // Do not error yet, the constant could be propagated later?
       break;
 
+    case IX86_BUILTIN_PABSB:
+    case IX86_BUILTIN_PABSB128:
+    case IX86_BUILTIN_PABSB256:
+    case IX86_BUILTIN_PABSB512:
+    case IX86_BUILTIN_PABSW:
+    case IX86_BUILTIN_PABSW128:
+    case IX86_BUILTIN_PABSW256:
+    case IX86_BUILTIN_PABSW512:
+    case IX86_BUILTIN_PABSD:
+    case IX86_BUILTIN_PABSD128:
+    case IX86_BUILTIN_PABSD256:
+    case IX86_BUILTIN_PABSD512:
+    case IX86_BUILTIN_PABSQ128:
+    case IX86_BUILTIN_PABSQ256:
+    case IX86_BUILTIN_PABSQ512:
+    case IX86_BUILTIN_PABSB128_MASK:
+    case IX86_BUILTIN_PABSB256_MASK:
+    case IX86_BUILTIN_PABSW128_MASK:
+    case IX86_BUILTIN_PABSW256_MASK:
+    case IX86_BUILTIN_PABSD128_MASK:
+    case IX86_BUILTIN_PABSD256_MASK:
+      gcc_assert (n_args >= 1);
+      if (!gimple_call_lhs (stmt))
+	break;
+      arg0 = gimple_call_arg (stmt, 0);
+      elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
+      /* For masked ABS, only optimize if the mask is all ones.  */
+      if (n_args > 1
+	  && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
+	break;
+      loc = gimple_location (stmt);
+      g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
+      gsi_replace (gsi, g, false);
+      return true;
+
     default:
       break;
     }
diff --git a/gcc/testsuite/gcc.target/i386/pr109900.c b/gcc/testsuite/gcc.target/i386/pr109900.c
new file mode 100644
index 00000000000..f87e8cc11de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109900.c
@@ -0,0 +1,95 @@
+#include <immintrin.h>
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2 -mavx512vl -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "builtin_ia32_pabs" "optimized" } } */
+
+
+__m64
+absb_64 ()
+{
+  return _mm_abs_pi8(_mm_set1_pi8 (-1));
+}
+
+__m128i
+absb_128 ()
+{
+  return _mm_abs_epi8(_mm_set1_epi8 (-1));
+}
+
+__m256i
+absb_256 ()
+{
+  return _mm256_abs_epi8(_mm256_set1_epi8 (-1));
+}
+
+__m512i
+absb_512 ()
+{
+  return _mm512_abs_epi8(_mm512_set1_epi8 (-1));
+}
+
+__m64
+absw_64 ()
+{
+  return _mm_abs_pi16(_mm_set1_pi16 (-1));
+}
+
+__m128i
+absw_128 ()
+{
+  return _mm_abs_epi16(_mm_set1_epi16 (-1));
+}
+
+__m256i
+absw_256 ()
+{
+  return _mm256_abs_epi16(_mm256_set1_epi16 (-1));
+}
+
+__m512i
+absw_512 ()
+{
+  return _mm512_abs_epi16(_mm512_set1_epi16 (-1));
+}
+
+__m64
+absd_64 ()
+{
+  return _mm_abs_pi32(_mm_set1_pi32 (-1));
+}
+
+__m128i
+absd_128 ()
+{
+  return _mm_abs_epi32(_mm_set1_epi32 (-1));
+}
+
+__m256i
+absd_256 ()
+{
+  return _mm256_abs_epi32(_mm256_set1_epi32 (-1));
+}
+
+__m512i
+absd_512 ()
+{
+  return _mm512_abs_epi32(_mm512_set1_epi32 (-1));
+}
+
+__m128i
+absq_128 ()
+{
+  return _mm_abs_epi64(_mm_set1_epi64x (-1));
+}
+
+__m256i
+absq_256 ()
+{
+  return _mm256_abs_epi64(_mm256_set1_epi64x (-1));
+}
+
+__m512i
+absq_512 ()
+{
+  return _mm512_abs_epi64(_mm512_set1_epi64 (-1));
+}
-- 
2.39.1.388.g2fc9e9ca3c


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABS_EXPR.
  2023-05-22  7:35 [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABS_EXPR liuhongt
@ 2023-05-23  2:29 ` Hongtao Liu
  0 siblings, 0 replies; 2+ messages in thread
From: Hongtao Liu @ 2023-05-23  2:29 UTC (permalink / raw)
  To: liuhongt; +Cc: gcc-patches, hjl.tools

On Mon, May 22, 2023 at 3:35 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> Also for 64-bit vector abs intrinsics _mm_abs_{pi8,pi16,pi32}.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
Ready push to main trunk.
>
> gcc/ChangeLog:
>
>         PR target/109900
>         * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
>         _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} and
>         _mm_abs_{pi8,pi16,pi32} into gimple ABS_EXPR.
>         (ix86_masked_all_ones): Handle 64-bit mask.
>         * config/i386/i386-builtin.def: Replace icode of related
>         non-mask simd abs builtins with CODE_FOR_nothing.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr109900.c: New test.
> ---
>  gcc/config/i386/i386-builtin.def         | 18 ++---
>  gcc/config/i386/i386.cc                  | 86 +++++++++++++++------
>  gcc/testsuite/gcc.target/i386/pr109900.c | 95 ++++++++++++++++++++++++
>  3 files changed, 166 insertions(+), 33 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr109900.c
>
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index f7b10a6ab1e..c91e3809c75 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -899,12 +899,12 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps"
>  BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
>
>  /* SSSE3 */
> -BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> -BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> -BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> -BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
> +BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
> +BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
> +BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
> +BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
>
>  BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
>  BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
> @@ -1178,9 +1178,9 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_
>
>  /* AVX2 */
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT)
> -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI)
> -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI)
> -BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI)
> +BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI)
> +BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI)
> +BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256",  IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256",  IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI)
>  BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256",  IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI)
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 6a4b3326219..7d1d1bb0760 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -17928,6 +17928,8 @@ ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
>      return false;
>
>    unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
> +  if (elems == HOST_BITS_PER_WIDE_INT)
> +    return  mask == HOST_WIDE_INT_M1U;
>    if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
>      return false;
>
> @@ -18407,7 +18409,7 @@ ix86_fold_builtin (tree fndecl, int n_args,
>  bool
>  ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>  {
> -  gimple *stmt = gsi_stmt (*gsi);
> +  gimple *stmt = gsi_stmt (*gsi), *g;
>    tree fndecl = gimple_call_fndecl (stmt);
>    gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
>    int n_args = gimple_call_num_args (stmt);
> @@ -18420,6 +18422,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>    unsigned HOST_WIDE_INT count;
>    bool is_vshift;
>    unsigned HOST_WIDE_INT elems;
> +  location_t loc;
>
>    /* Don't fold when there's isa mismatch.  */
>    if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
> @@ -18455,8 +18458,8 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>           if (!expr_not_equal_to (arg0, wi::zero (prec)))
>             return false;
>
> -         location_t loc = gimple_location (stmt);
> -         gimple *g = gimple_build_call (decl, 1, arg0);
> +         loc = gimple_location (stmt);
> +         g = gimple_build_call (decl, 1, arg0);
>           gimple_set_location (g, loc);
>           tree lhs = make_ssa_name (integer_type_node);
>           gimple_call_set_lhs (g, lhs);
> @@ -18478,8 +18481,8 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>           arg0 = gimple_call_arg (stmt, 0);
>           if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
>             break;
> -         location_t loc = gimple_location (stmt);
> -         gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
> +         loc = gimple_location (stmt);
> +         g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
>           gimple_set_location (g, loc);
>           gsi_replace (gsi, g, false);
>           return true;
> @@ -18494,9 +18497,9 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        arg1 = gimple_call_arg (stmt, 1);
>        if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
>         {
> -         location_t loc = gimple_location (stmt);
> +         loc = gimple_location (stmt);
>           arg0 = gimple_call_arg (stmt, 0);
> -         gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
> +         g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
>           gimple_set_location (g, loc);
>           gsi_replace (gsi, g, false);
>           return true;
> @@ -18527,7 +18530,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        arg2 = gimple_call_arg (stmt, 2);
>        if (gimple_call_lhs (stmt))
>         {
> -         location_t loc = gimple_location (stmt);
> +         loc = gimple_location (stmt);
>           tree type = TREE_TYPE (arg2);
>           gimple_seq stmts = NULL;
>           if (VECTOR_FLOAT_TYPE_P (type))
> @@ -18541,9 +18544,9 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>           tree cmp_type = truth_type_for (type);
>           tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
>           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> -         gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
> -                                          VEC_COND_EXPR, cmp,
> -                                          arg1, arg0);
> +         g = gimple_build_assign (gimple_call_lhs (stmt),
> +                                  VEC_COND_EXPR, cmp,
> +                                  arg1, arg0);
>           gimple_set_location (g, loc);
>           gsi_replace (gsi, g, false);
>         }
> @@ -18579,7 +18582,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        arg1 = gimple_call_arg (stmt, 1);
>        if (gimple_call_lhs (stmt))
>         {
> -         location_t loc = gimple_location (stmt);
> +         loc = gimple_location (stmt);
>           tree type = TREE_TYPE (arg0);
>           tree zero_vec = build_zero_cst (type);
>           tree minus_one_vec = build_minus_one_cst (type);
> @@ -18587,9 +18590,9 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>           gimple_seq stmts = NULL;
>           tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
>           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> -         gimple* g = gimple_build_assign (gimple_call_lhs (stmt),
> -                                          VEC_COND_EXPR, cmp,
> -                                          minus_one_vec, zero_vec);
> +         g = gimple_build_assign (gimple_call_lhs (stmt),
> +                                  VEC_COND_EXPR, cmp,
> +                                  minus_one_vec, zero_vec);
>           gimple_set_location (g, loc);
>           gsi_replace (gsi, g, false);
>         }
> @@ -18794,8 +18797,8 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        if (count == 0)
>         {
>           /* Just return the first argument for shift by 0.  */
> -         location_t loc = gimple_location (stmt);
> -         gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
> +         loc = gimple_location (stmt);
> +         g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
>           gimple_set_location (g, loc);
>           gsi_replace (gsi, g, false);
>           return true;
> @@ -18805,9 +18808,9 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>         {
>           /* For shift counts equal or greater than precision, except for
>              arithmetic right shift the result is zero.  */
> -         location_t loc = gimple_location (stmt);
> -         gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
> -                                          build_zero_cst (TREE_TYPE (arg0)));
> +         loc = gimple_location (stmt);
> +         g = gimple_build_assign (gimple_call_lhs (stmt),
> +                                  build_zero_cst (TREE_TYPE (arg0)));
>           gimple_set_location (g, loc);
>           gsi_replace (gsi, g, false);
>           return true;
> @@ -18836,7 +18839,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>             return false;
>
>           machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
> -         location_t loc = gimple_location (stmt);
> +         loc = gimple_location (stmt);
>           tree itype = (imode == E_DFmode
>                         ? long_long_integer_type_node : integer_type_node);
>           tree vtype = build_vector_type (itype, elems);
> @@ -18867,9 +18870,9 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>
>           tree perm_mask = elts.build ();
>           arg1 = gimple_call_arg (stmt, 1);
> -         gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
> -                                          VEC_PERM_EXPR,
> -                                          arg0, arg1, perm_mask);
> +         g = gimple_build_assign (gimple_call_lhs (stmt),
> +                                  VEC_PERM_EXPR,
> +                                  arg0, arg1, perm_mask);
>           gimple_set_location (g, loc);
>           gsi_replace (gsi, g, false);
>           return true;
> @@ -18877,6 +18880,41 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
>        // Do not error yet, the constant could be propagated later?
>        break;
>
> +    case IX86_BUILTIN_PABSB:
> +    case IX86_BUILTIN_PABSB128:
> +    case IX86_BUILTIN_PABSB256:
> +    case IX86_BUILTIN_PABSB512:
> +    case IX86_BUILTIN_PABSW:
> +    case IX86_BUILTIN_PABSW128:
> +    case IX86_BUILTIN_PABSW256:
> +    case IX86_BUILTIN_PABSW512:
> +    case IX86_BUILTIN_PABSD:
> +    case IX86_BUILTIN_PABSD128:
> +    case IX86_BUILTIN_PABSD256:
> +    case IX86_BUILTIN_PABSD512:
> +    case IX86_BUILTIN_PABSQ128:
> +    case IX86_BUILTIN_PABSQ256:
> +    case IX86_BUILTIN_PABSQ512:
> +    case IX86_BUILTIN_PABSB128_MASK:
> +    case IX86_BUILTIN_PABSB256_MASK:
> +    case IX86_BUILTIN_PABSW128_MASK:
> +    case IX86_BUILTIN_PABSW256_MASK:
> +    case IX86_BUILTIN_PABSD128_MASK:
> +    case IX86_BUILTIN_PABSD256_MASK:
> +      gcc_assert (n_args >= 1);
> +      if (!gimple_call_lhs (stmt))
> +       break;
> +      arg0 = gimple_call_arg (stmt, 0);
> +      elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> +      /* For masked ABS, only optimize if the mask is all ones.  */
> +      if (n_args > 1
> +         && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
> +       break;
> +      loc = gimple_location (stmt);
> +      g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
> +      gsi_replace (gsi, g, false);
> +      return true;
> +
>      default:
>        break;
>      }
> diff --git a/gcc/testsuite/gcc.target/i386/pr109900.c b/gcc/testsuite/gcc.target/i386/pr109900.c
> new file mode 100644
> index 00000000000..f87e8cc11de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr109900.c
> @@ -0,0 +1,95 @@
> +#include <immintrin.h>
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512bw -O2 -mavx512vl -fdump-tree-optimized" } */
> +/* { dg-final { scan-tree-dump-not "builtin_ia32_pabs" "optimized" } } */
> +
> +
> +__m64
> +absb_64 ()
> +{
> +  return _mm_abs_pi8(_mm_set1_pi8 (-1));
> +}
> +
> +__m128i
> +absb_128 ()
> +{
> +  return _mm_abs_epi8(_mm_set1_epi8 (-1));
> +}
> +
> +__m256i
> +absb_256 ()
> +{
> +  return _mm256_abs_epi8(_mm256_set1_epi8 (-1));
> +}
> +
> +__m512i
> +absb_512 ()
> +{
> +  return _mm512_abs_epi8(_mm512_set1_epi8 (-1));
> +}
> +
> +__m64
> +absw_64 ()
> +{
> +  return _mm_abs_pi16(_mm_set1_pi16 (-1));
> +}
> +
> +__m128i
> +absw_128 ()
> +{
> +  return _mm_abs_epi16(_mm_set1_epi16 (-1));
> +}
> +
> +__m256i
> +absw_256 ()
> +{
> +  return _mm256_abs_epi16(_mm256_set1_epi16 (-1));
> +}
> +
> +__m512i
> +absw_512 ()
> +{
> +  return _mm512_abs_epi16(_mm512_set1_epi16 (-1));
> +}
> +
> +__m64
> +absd_64 ()
> +{
> +  return _mm_abs_pi32(_mm_set1_pi32 (-1));
> +}
> +
> +__m128i
> +absd_128 ()
> +{
> +  return _mm_abs_epi32(_mm_set1_epi32 (-1));
> +}
> +
> +__m256i
> +absd_256 ()
> +{
> +  return _mm256_abs_epi32(_mm256_set1_epi32 (-1));
> +}
> +
> +__m512i
> +absd_512 ()
> +{
> +  return _mm512_abs_epi32(_mm512_set1_epi32 (-1));
> +}
> +
> +__m128i
> +absq_128 ()
> +{
> +  return _mm_abs_epi64(_mm_set1_epi64x (-1));
> +}
> +
> +__m256i
> +absq_256 ()
> +{
> +  return _mm256_abs_epi64(_mm256_set1_epi64x (-1));
> +}
> +
> +__m512i
> +absq_512 ()
> +{
> +  return _mm512_abs_epi64(_mm512_set1_epi64 (-1));
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>


-- 
BR,
Hongtao

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-05-23  2:29 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-22  7:35 [PATCH] Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABS_EXPR liuhongt
2023-05-23  2:29 ` Hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).