public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] x86: Use Yw constraint on *ssse3_pshufbv8qi3
@ 2022-03-27 18:14 H.J. Lu
  2022-03-27 18:35 ` Uros Bizjak
  0 siblings, 1 reply; 3+ messages in thread
From: H.J. Lu @ 2022-03-27 18:14 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak, liuhongt

Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the
"Yv" register constraint with the "Yw" register constraint.

gcc/

	PR target/105068
	* config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with
	"Yw".

gcc/testsuite/

	PR target/105068
	* gcc.target/i386/pr105068.c: New test.
---
 gcc/config/i386/sse.md                   |  6 +--
 gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 33bd2c4768a..58d2bd972ed 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20758,9 +20758,9 @@ (define_expand "ssse3_pshufbv8qi3"
 })
 
 (define_insn_and_split "*ssse3_pshufbv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
-	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
-		      (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
+		      (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")
 		      (match_operand:V4SI 4 "reg_or_const_vector_operand"
 					  "i,3,3")]
 		     UNSPEC_PSHUFB))
diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c b/gcc/testsuite/gcc.target/i386/pr105068.c
new file mode 100644
index 00000000000..e5fb0338e3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr105068.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread -fstack-protector-all" } */
+
+typedef char __attribute__((__vector_size__(8))) C;
+typedef int __attribute__((__vector_size__(8))) U;
+typedef int __attribute__((__vector_size__(16))) V;
+typedef int __attribute__((__vector_size__(32))) W;
+typedef long long __attribute__((__vector_size__(64))) L;
+typedef _Float64 __attribute__((__vector_size__(16))) F;
+typedef _Float64 __attribute__((__vector_size__(64))) G;
+C c;
+int i;
+
+U foo0( W v256u32_0,
+           W v256s32_0,
+           V v128u64_0,
+           V v128s64_0,
+           W v256u64_0,
+           W v256s64_0,
+           L v512s64_0,
+           W v256u128_0,
+           W v256s128_0,
+           V v128f32_0,
+           W v256f32_0,
+           F F_0,
+           W v256f64_0,
+           G G_0) {
+  C U_1 = __builtin_ia32_pshufb(c, c);
+  G_0 += __builtin_convertvector(v512s64_0, G);
+  F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2);
+  W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 +
+                    v256s128_0 + v256f32_0 + v256f64_0;
+  V V_r = ((union {
+                      W a;
+                      V b;
+                    })W_r)
+                        .b +
+                    i + v128u64_0 + v128s64_0 + v128f32_0 +
+                    (V)F_1;
+  U U_r = ((union {
+                    V a;
+                    U b;
+                  })V_r)
+                      .b +
+                  (U)U_1;
+  return U_r;
+}
-- 
2.35.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] x86: Use Yw constraint on *ssse3_pshufbv8qi3
  2022-03-27 18:14 [PATCH] x86: Use Yw constraint on *ssse3_pshufbv8qi3 H.J. Lu
@ 2022-03-27 18:35 ` Uros Bizjak
  2022-03-28 13:38   ` H.J. Lu
  0 siblings, 1 reply; 3+ messages in thread
From: Uros Bizjak @ 2022-03-27 18:35 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches, liuhongt

On Sun, Mar 27, 2022 at 8:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the
> "Yv" register constraint with the "Yw" register constraint.

This is an obvious fix, as said in https://gcc.gnu.org/gitwrite.html :

Obvious fixes can be committed without prior approval. Just check in
the fix and copy it to gcc-patches. A good test to determine whether a
fix is obvious: will the person who objects to my work the most be
able to find a fault with my fix? If the fix is later found to be
faulty, it can always be rolled back. We don't want to get overly
restrictive about checkin policies.

Thanks,
Uros.

>
> gcc/
>
>         PR target/105068
>         * config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with
>         "Yw".
>
> gcc/testsuite/
>
>         PR target/105068
>         * gcc.target/i386/pr105068.c: New test.
> ---
>  gcc/config/i386/sse.md                   |  6 +--
>  gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++
>  2 files changed, 50 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 33bd2c4768a..58d2bd972ed 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -20758,9 +20758,9 @@ (define_expand "ssse3_pshufbv8qi3"
>  })
>
>  (define_insn_and_split "*ssse3_pshufbv8qi3"
> -  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
> -       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
> -                     (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
> +  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
> +       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
> +                     (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")
>                       (match_operand:V4SI 4 "reg_or_const_vector_operand"
>                                           "i,3,3")]
>                      UNSPEC_PSHUFB))
> diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c b/gcc/testsuite/gcc.target/i386/pr105068.c
> new file mode 100644
> index 00000000000..e5fb0338e3b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr105068.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread -fstack-protector-all" } */
> +
> +typedef char __attribute__((__vector_size__(8))) C;
> +typedef int __attribute__((__vector_size__(8))) U;
> +typedef int __attribute__((__vector_size__(16))) V;
> +typedef int __attribute__((__vector_size__(32))) W;
> +typedef long long __attribute__((__vector_size__(64))) L;
> +typedef _Float64 __attribute__((__vector_size__(16))) F;
> +typedef _Float64 __attribute__((__vector_size__(64))) G;
> +C c;
> +int i;
> +
> +U foo0( W v256u32_0,
> +           W v256s32_0,
> +           V v128u64_0,
> +           V v128s64_0,
> +           W v256u64_0,
> +           W v256s64_0,
> +           L v512s64_0,
> +           W v256u128_0,
> +           W v256s128_0,
> +           V v128f32_0,
> +           W v256f32_0,
> +           F F_0,
> +           W v256f64_0,
> +           G G_0) {
> +  C U_1 = __builtin_ia32_pshufb(c, c);
> +  G_0 += __builtin_convertvector(v512s64_0, G);
> +  F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2);
> +  W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 +
> +                    v256s128_0 + v256f32_0 + v256f64_0;
> +  V V_r = ((union {
> +                      W a;
> +                      V b;
> +                    })W_r)
> +                        .b +
> +                    i + v128u64_0 + v128s64_0 + v128f32_0 +
> +                    (V)F_1;
> +  U U_r = ((union {
> +                    V a;
> +                    U b;
> +                  })V_r)
> +                      .b +
> +                  (U)U_1;
> +  return U_r;
> +}
> --
> 2.35.1
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] x86: Use Yw constraint on *ssse3_pshufbv8qi3
  2022-03-27 18:35 ` Uros Bizjak
@ 2022-03-28 13:38   ` H.J. Lu
  0 siblings, 0 replies; 3+ messages in thread
From: H.J. Lu @ 2022-03-28 13:38 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, liuhongt

On Sun, Mar 27, 2022 at 11:35 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Sun, Mar 27, 2022 at 8:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the
> > "Yv" register constraint with the "Yw" register constraint.
>
> This is an obvious fix, as said in https://gcc.gnu.org/gitwrite.html :
>
> Obvious fixes can be committed without prior approval. Just check in
> the fix and copy it to gcc-patches. A good test to determine whether a
> fix is obvious: will the person who objects to my work the most be
> able to find a fault with my fix? If the fix is later found to be
> faulty, it can always be rolled back. We don't want to get overly
> restrictive about checkin policies.

I checked this into the master branch.  I am backporting it to
release branches.  I will drop the testcase for release branches
since __builtin_shufflevector is new for GCC 12.

> Thanks,
> Uros.
>
> >
> > gcc/
> >
> >         PR target/105068
> >         * config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with
> >         "Yw".
> >
> > gcc/testsuite/
> >
> >         PR target/105068
> >         * gcc.target/i386/pr105068.c: New test.
> > ---
> >  gcc/config/i386/sse.md                   |  6 +--
> >  gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++
> >  2 files changed, 50 insertions(+), 3 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 33bd2c4768a..58d2bd972ed 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -20758,9 +20758,9 @@ (define_expand "ssse3_pshufbv8qi3"
> >  })
> >
> >  (define_insn_and_split "*ssse3_pshufbv8qi3"
> > -  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
> > -       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
> > -                     (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
> > +  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
> > +       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
> > +                     (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")
> >                       (match_operand:V4SI 4 "reg_or_const_vector_operand"
> >                                           "i,3,3")]
> >                      UNSPEC_PSHUFB))
> > diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c b/gcc/testsuite/gcc.target/i386/pr105068.c
> > new file mode 100644
> > index 00000000000..e5fb0338e3b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr105068.c
> > @@ -0,0 +1,47 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread -fstack-protector-all" } */
> > +
> > +typedef char __attribute__((__vector_size__(8))) C;
> > +typedef int __attribute__((__vector_size__(8))) U;
> > +typedef int __attribute__((__vector_size__(16))) V;
> > +typedef int __attribute__((__vector_size__(32))) W;
> > +typedef long long __attribute__((__vector_size__(64))) L;
> > +typedef _Float64 __attribute__((__vector_size__(16))) F;
> > +typedef _Float64 __attribute__((__vector_size__(64))) G;
> > +C c;
> > +int i;
> > +
> > +U foo0( W v256u32_0,
> > +           W v256s32_0,
> > +           V v128u64_0,
> > +           V v128s64_0,
> > +           W v256u64_0,
> > +           W v256s64_0,
> > +           L v512s64_0,
> > +           W v256u128_0,
> > +           W v256s128_0,
> > +           V v128f32_0,
> > +           W v256f32_0,
> > +           F F_0,
> > +           W v256f64_0,
> > +           G G_0) {
> > +  C U_1 = __builtin_ia32_pshufb(c, c);
> > +  G_0 += __builtin_convertvector(v512s64_0, G);
> > +  F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2);
> > +  W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 +
> > +                    v256s128_0 + v256f32_0 + v256f64_0;
> > +  V V_r = ((union {
> > +                      W a;
> > +                      V b;
> > +                    })W_r)
> > +                        .b +
> > +                    i + v128u64_0 + v128s64_0 + v128f32_0 +
> > +                    (V)F_1;
> > +  U U_r = ((union {
> > +                    V a;
> > +                    U b;
> > +                  })V_r)
> > +                      .b +
> > +                  (U)U_1;
> > +  return U_r;
> > +}
> > --
> > 2.35.1
> >



-- 
H.J.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-03-28 13:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-27 18:14 [PATCH] x86: Use Yw constraint on *ssse3_pshufbv8qi3 H.J. Lu
2022-03-27 18:35 ` Uros Bizjak
2022-03-28 13:38   ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).