public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-746] i386: Optimize vpblendvb on inverted mask register to vpblendvb on swapping the order of operand 1 a
@ 2021-05-12 11:44 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2021-05-12 11:44 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8da3b309d8fb3ddec0b42218ca6762967b402dc3

commit r12-746-g8da3b309d8fb3ddec0b42218ca6762967b402dc3
Author: liuhongt <hongtao.liu@intel.com>
Date:   Wed Apr 7 09:58:54 2021 +0800

    i386: Optimize vpblendvb on inverted mask register to vpblendvb on swapping the order of operand 1 and operand 2. [PR target/99908]
    
    -       vpcmpeqd        %ymm3, %ymm3, %ymm3
    -       vpandn  %ymm3, %ymm2, %ymm2
    -       vpblendvb       %ymm2, %ymm1, %ymm0, %ymm0
    +       vpblendvb       %ymm2, %ymm0, %ymm1, %ymm0
    
    gcc/ChangeLog:
    
            PR target/99908
            * config/i386/sse.md (<sse4_1_avx2>_pblendvb): Add
            splitters for pblendvb of NOT mask register.
    
    gcc/testsuite/ChangeLog:
    
            PR target/99908
            * gcc.target/i386/avx2-pr99908.c: New test.
            * gcc.target/i386/sse4_1-pr99908.c: New test.

Diff:
---
 gcc/config/i386/sse.md                         | 29 ++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/avx2-pr99908.c   | 25 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/sse4_1-pr99908.c | 23 ++++++++++++++++++++
 3 files changed, 77 insertions(+)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 244fb13e97a..49dda74fe18 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17734,6 +17734,35 @@
    (set_attr "btver2_decode" "vector,vector,vector")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_split
+  [(set (match_operand:VI1_AVX2 0 "register_operand")
+	(unspec:VI1_AVX2
+	  [(match_operand:VI1_AVX2 1 "vector_operand")
+	   (match_operand:VI1_AVX2 2 "register_operand")
+	   (not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))]
+	  UNSPEC_BLENDV))]
+  "TARGET_SSE4_1"
+  [(set (match_dup 0)
+	(unspec:VI1_AVX2
+	  [(match_dup 2) (match_dup 1) (match_dup 3)]
+	  UNSPEC_BLENDV))])
+
+(define_split
+  [(set (match_operand:VI1_AVX2 0 "register_operand")
+	(unspec:VI1_AVX2
+	  [(match_operand:VI1_AVX2 1 "vector_operand")
+	   (match_operand:VI1_AVX2 2 "register_operand")
+	   (subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)]
+	  UNSPEC_BLENDV))]
+  "TARGET_SSE4_1
+   && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
+   && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>"
+  [(set (match_dup 0)
+	(unspec:VI1_AVX2
+	  [(match_dup 2) (match_dup 1) (match_dup 4)]
+	  UNSPEC_BLENDV))]
+  "operands[4] = gen_lowpart (<MODE>mode, operands[3]);")
+
 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
   [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
 	(unspec:VI1_AVX2
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr99908.c b/gcc/testsuite/gcc.target/i386/avx2-pr99908.c
new file mode 100644
index 00000000000..2775f3b50f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr99908.c
@@ -0,0 +1,25 @@
+/* PR target/99908 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -masm=att" } */
+/* { dg-final { scan-assembler-times "\tvpblendvb\t" 2 } } */
+/* { dg-final { scan-assembler-not "\tvpcmpeq" } } */
+/* { dg-final { scan-assembler-not "\tvpandn" } } */
+
+#include <x86intrin.h>
+
+__m256i
+f1 (__m256i a, __m256i b, __m256i mask)
+{
+  return _mm256_blendv_epi8(a, b, 
+    _mm256_andnot_si256(mask, _mm256_set1_epi8(255)));
+}
+
+__m256i
+f2 (__v32qi x, __v32qi a, __v32qi b)
+{
+  x ^= (__v32qi) { -1, -1, -1, -1, -1, -1, -1, -1,
+		   -1, -1, -1, -1, -1, -1, -1, -1,
+		   -1, -1, -1, -1, -1, -1, -1, -1,
+		   -1, -1, -1, -1, -1, -1, -1, -1 };
+  return _mm256_blendv_epi8 ((__m256i) a, (__m256i) b, (__m256i) x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-pr99908.c b/gcc/testsuite/gcc.target/i386/sse4_1-pr99908.c
new file mode 100644
index 00000000000..c13e730b220
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-pr99908.c
@@ -0,0 +1,23 @@
+/* PR target/99908 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1 -mno-avx -masm=att" } */
+/* { dg-final { scan-assembler-times "\tpblendvb\t" 2 } } */
+/* { dg-final { scan-assembler-not "\tpcmpeq" } } */
+/* { dg-final { scan-assembler-not "\tpandn" } } */
+
+#include <x86intrin.h>
+
+__m128i
+f1 (__m128i a, __m128i b, __m128i mask)
+{
+  return _mm_blendv_epi8(a, b, 
+    _mm_andnot_si128(mask, _mm_set1_epi8(255)));
+}
+
+__m128i
+f2 (__v16qi x, __v16qi a, __v16qi b)
+{
+  x ^= (__v16qi) { -1, -1, -1, -1, -1, -1, -1, -1,
+		   -1, -1, -1, -1, -1, -1, -1, -1 };
+  return _mm_blendv_epi8 ((__m128i) a, (__m128i) b, (__m128i) x);
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-05-12 11:44 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-12 11:44 [gcc r12-746] i386: Optimize vpblendvb on inverted mask register to vpblendvb on swapping the order of operand 1 a hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).