public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-1640] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.
@ 2023-06-09  1:42 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2023-06-09  1:42 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:07b86ab138bf8be8cb331015cd2b9775c6856ac6

commit r14-1640-g07b86ab138bf8be8cb331015cd2b9775c6856ac6
Author: liuhongt <hongtao.liu@intel.com>
Date:   Mon Jun 5 11:59:33 2023 +0800

    Fold _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple ABSU_EXPR + VCE.
    
    r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
    TYPE_MIN, but PABSB will store unsigned result into dst. The patch
    uses ABSU_EXPR + VCE instead of ABS_EXPR.
    
    Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
    vector absm2 is guarded with TARGET_MMX_WITH_SSE.
    
    gcc/ChangeLog:
    
            PR target/110108
            * config/i386/i386.cc (ix86_gimple_fold_builtin): Fold
            _mm{,256,512}_abs_{epi8,epi16,epi32,epi64} into gimple
            ABSU_EXPR + VCE, don't fold _mm_abs_{pi8,pi16,pi32} w/o
            TARGET_64BIT.
            * config/i386/i386-builtin.def: Replace CODE_FOR_nothing with
            real codename for __builtin_ia32_pabs{b,w,d}.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr110108.c: New test.
            * gcc.target/i386/pr110108-3.c: New test.
            * gcc.target/i386/pr109900.c: Adjust testcase.

Diff:
---
 gcc/config/i386/i386-builtin.def           |  6 +++---
 gcc/config/i386/i386.cc                    | 27 ++++++++++++++++++++-------
 gcc/testsuite/gcc.target/i386/pr109900.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr110108-3.c | 22 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr110108.c   | 16 ++++++++++++++++
 5 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 383b68a9bb8..7ba5b6a9d11 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
 
 /* SSSE3 */
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
 
 BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
 BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index c4591d63063..660fe15cce1 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18444,6 +18444,7 @@ bool
 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 {
   gimple *stmt = gsi_stmt (*gsi), *g;
+  gimple_seq stmts = NULL;
   tree fndecl = gimple_call_fndecl (stmt);
   gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
   int n_args = gimple_call_num_args (stmt);
@@ -18566,7 +18567,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	{
 	  loc = gimple_location (stmt);
 	  tree type = TREE_TYPE (arg2);
-	  gimple_seq stmts = NULL;
 	  if (VECTOR_FLOAT_TYPE_P (type))
 	    {
 	      tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
@@ -18621,7 +18621,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	  tree zero_vec = build_zero_cst (type);
 	  tree minus_one_vec = build_minus_one_cst (type);
 	  tree cmp_type = truth_type_for (type);
-	  gimple_seq stmts = NULL;
 	  tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
 	  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
 	  g = gimple_build_assign (gimple_call_lhs (stmt),
@@ -18915,14 +18914,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       break;
 
     case IX86_BUILTIN_PABSB:
+    case IX86_BUILTIN_PABSW:
+    case IX86_BUILTIN_PABSD:
+      /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
+      if (!TARGET_MMX_WITH_SSE)
+	break;
+      /* FALLTHRU.  */
     case IX86_BUILTIN_PABSB128:
     case IX86_BUILTIN_PABSB256:
     case IX86_BUILTIN_PABSB512:
-    case IX86_BUILTIN_PABSW:
     case IX86_BUILTIN_PABSW128:
     case IX86_BUILTIN_PABSW256:
     case IX86_BUILTIN_PABSW512:
-    case IX86_BUILTIN_PABSD:
     case IX86_BUILTIN_PABSD128:
     case IX86_BUILTIN_PABSD256:
     case IX86_BUILTIN_PABSD512:
@@ -18944,9 +18947,19 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       if (n_args > 1
 	  && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
 	break;
-      loc = gimple_location (stmt);
-      g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
-      gsi_replace (gsi, g, false);
+      {
+	tree utype, ures, vce;
+	utype = unsigned_type_for (TREE_TYPE (arg0));
+	/* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
+	   instead of ABS_EXPR to hanlde overflow case(TYPE_MIN).  */
+	ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
+	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	loc = gimple_location (stmt);
+	vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
+	g = gimple_build_assign (gimple_call_lhs (stmt),
+				 VIEW_CONVERT_EXPR, vce);
+	gsi_replace (gsi, g, false);
+      }
       return true;
 
     default:
diff --git a/gcc/testsuite/gcc.target/i386/pr109900.c b/gcc/testsuite/gcc.target/i386/pr109900.c
index f87e8cc11de..db67aac380c 100644
--- a/gcc/testsuite/gcc.target/i386/pr109900.c
+++ b/gcc/testsuite/gcc.target/i386/pr109900.c
@@ -1,7 +1,7 @@
 #include <immintrin.h>
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -O2 -mavx512vl -fdump-tree-optimized" } */
-/* { dg-final { scan-tree-dump-not "builtin_ia32_pabs" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "builtin_ia32_pabs" "optimized" { target { ! ia32 } } } } */
 
 
 __m64
diff --git a/gcc/testsuite/gcc.target/i386/pr110108-3.c b/gcc/testsuite/gcc.target/i386/pr110108-3.c
new file mode 100644
index 00000000000..566f744d80c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110108-3.c
@@ -0,0 +1,22 @@
+#include <immintrin.h>
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "pabs" 3 } } */
+
+__m64
+absb_64 (__m64 a)
+{
+  return _mm_abs_pi8(a);
+}
+
+__m64
+absw_64 (__m64 a)
+{
+  return _mm_abs_pi16(a);
+}
+
+__m64
+absd_64 (__m64 a)
+{
+  return _mm_abs_pi32(a);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr110108.c b/gcc/testsuite/gcc.target/i386/pr110108.c
new file mode 100644
index 00000000000..cd05763b9bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110108.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
+#include <immintrin.h>
+
+__m128i do_stuff_128(__m128i X0, __m128i X1) {
+  __m128i AbsX0 = _mm_abs_epi8(X0);
+  __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
+  return Result;
+}
+
+__m256i do_stuff_256(__m256i X0, __m256i X1) {
+  __m256i AbsX0 = _mm256_abs_epi8(X0);
+  __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
+  return Result;
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-06-09  1:42 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-09  1:42 [gcc r14-1640] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).