* [PATCH] x86: Mark scratch operand in ssse3_pshufbv8qi3 as earlyclobber
@ 2020-04-03 16:51 H.J. Lu
2020-04-03 16:57 ` Uros Bizjak
0 siblings, 1 reply; 2+ messages in thread
From: H.J. Lu @ 2020-04-03 16:51 UTC (permalink / raw)
To: gcc-patches; +Cc: Uros Bizjak, Jakub Jelinek
commit 16ed2601ad0a4aa82f11e9df86ea92183f94f979
Author: H.J. Lu <hongjiu.lu@intel.com>
Date: Wed May 15 15:26:19 2019 +0000
i386: Emulate MMX pshufb with SSE version
has
+(define_insn_and_split "ssse3_pshufbv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
+ UNSPEC_PSHUFB))
+ (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
^^^ There are earlyclobber.
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ pshufb\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 3) (match_dup 5))
+ (set (match_dup 3)
+ (and:V4SI (match_dup 3) (match_dup 2)))
+ (set (match_dup 0)
+ (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
If input register operand 2 is dead after this insn, RA may choose it
as scratch operand. Since it isn't marked as earlyclobber, operand 2
becomes unused after split and then it gets optimized out. Mark scratch
operand as earlyclobber fixes the issue.
OK for master if there are no regressions?
H.J.
--
gcc/
PR target/94467
* config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand
as earlyclobber.
gcc/
PR target/94467
* testsuite/gcc.target/i386/pr94467-1.c: New test.
* testsuite/gcc.target/i386/pr94467-2.c: Likewise.
---
gcc/config/i386/sse.md | 2 +-
gcc/testsuite/gcc.target/i386/pr94467-1.c | 40 +++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr94467-2.c | 48 +++++++++++++++++++++++
3 files changed, 89 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr94467-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr94467-2.c
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index fba91b7369a..1de03a515d9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16695,7 +16695,7 @@ (define_insn_and_split "ssse3_pshufbv8qi3"
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
(match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
UNSPEC_PSHUFB))
- (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
+ (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
"@
pshufb\t{%2, %0|%0, %2}
diff --git a/gcc/testsuite/gcc.target/i386/pr94467-1.c b/gcc/testsuite/gcc.target/i386/pr94467-1.c
new file mode 100644
index 00000000000..a51c3a8f5fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr94467-1.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O -mavx" } */
+
+#include "avx-check.h"
+
+typedef char __attribute__ ((__vector_size__ (8))) v8qi;
+typedef short __attribute__ ((__vector_size__ (8))) v4hi;
+typedef int __attribute__ ((__vector_size__ (8))) v2si;
+typedef long long __attribute__ ((__vector_size__ (8))) v1di;
+typedef unsigned long long u64;
+u64 k, c;
+
+v8qi g, h, p, q;
+v4hi d, e, f, l, n, o;
+v2si j;
+
+u64
+foo (v4hi r)
+{
+ v8qi s;
+ f = (v4hi) j;
+ e = __builtin_ia32_psrlwi ((v4hi) k, c);
+ s = __builtin_ia32_pavgb (h, h);
+ n = __builtin_ia32_pabsw (f);
+ o = __builtin_ia32_psubusw (n, l);
+ p = __builtin_ia32_packsswb (r, o);
+ q = __builtin_ia32_pshufb (p, s);
+ g = __builtin_ia32_punpcklbw (q, (v8qi) r);
+ d = r;
+ return (u64) g + (u64) h + (u64) j;
+}
+
+static void
+avx_test (void)
+{
+ u64 x = foo ((v4hi) { 5 });
+ if (x != 0x0005000500050505)
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr94467-2.c b/gcc/testsuite/gcc.target/i386/pr94467-2.c
new file mode 100644
index 00000000000..8128be325e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr94467-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-require-effective-target ssse3 } */
+/* { dg-options "-O -mssse3" } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+typedef char __attribute__ ((__vector_size__ (8))) v8qi;
+typedef short __attribute__ ((__vector_size__ (8))) v4hi;
+typedef int __attribute__ ((__vector_size__ (8))) v2si;
+typedef long long __attribute__ ((__vector_size__ (8))) v1di;
+typedef unsigned long long u64;
+u64 k, c;
+
+v8qi g, h, p, q;
+v4hi d, e, f, l, n, o;
+v2si j;
+
+u64
+foo (v4hi r)
+{
+ v8qi s;
+ f = (v4hi) j;
+ e = __builtin_ia32_psrlwi ((v4hi) k, c);
+ s = __builtin_ia32_pavgb (h, h);
+ n = __builtin_ia32_pabsw (f);
+ o = __builtin_ia32_psubusw (n, l);
+ p = __builtin_ia32_packsswb (r, o);
+ q = __builtin_ia32_pshufb (p, s);
+ g = __builtin_ia32_punpcklbw (q, (v8qi) r);
+ d = r;
+ return (u64) g + (u64) h + (u64) j;
+}
+
+static void
+ssse3_test (void)
+{
+ u64 x = foo ((v4hi) { 5 });
+ if (x != 0x0005000500050505)
+ __builtin_abort ();
+}
--
2.25.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] x86: Mark scratch operand in ssse3_pshufbv8qi3 as earlyclobber
2020-04-03 16:51 [PATCH] x86: Mark scratch operand in ssse3_pshufbv8qi3 as earlyclobber H.J. Lu
@ 2020-04-03 16:57 ` Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2020-04-03 16:57 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches, Jakub Jelinek
On Fri, Apr 3, 2020 at 6:51 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> commit 16ed2601ad0a4aa82f11e9df86ea92183f94f979
> Author: H.J. Lu <hongjiu.lu@intel.com>
> Date: Wed May 15 15:26:19 2019 +0000
>
> i386: Emulate MMX pshufb with SSE version
>
> has
>
> +(define_insn_and_split "ssse3_pshufbv8qi3"
> + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
> + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
> + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
> + UNSPEC_PSHUFB))
> + (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
> ^^^ There are earlyclobber.
> + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
> + "@
> + pshufb\t{%2, %0|%0, %2}
> + #
> + #"
> + "TARGET_MMX_WITH_SSE && reload_completed"
> + [(set (match_dup 3) (match_dup 5))
> + (set (match_dup 3)
> + (and:V4SI (match_dup 3) (match_dup 2)))
> + (set (match_dup 0)
> + (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
>
> If input register operand 2 is dead after this insn, RA may choose it
> as scratch operand. Since it isn't marked as earlyclobber, operand 2
> becomes unused after split and then it gets optimized out. Mark scratch
> operand as earlyclobber fixes the issue.
>
> OK for master if there are no regressions?
>
> H.J.
> --
> gcc/
>
> PR target/94467
> * config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand
> as earlyclobber.
>
> gcc/
>
> PR target/94467
> * testsuite/gcc.target/i386/pr94467-1.c: New test.
> * testsuite/gcc.target/i386/pr94467-2.c: Likewise.
OK.
Thanks,
Uros.
> ---
> gcc/config/i386/sse.md | 2 +-
> gcc/testsuite/gcc.target/i386/pr94467-1.c | 40 +++++++++++++++++++
> gcc/testsuite/gcc.target/i386/pr94467-2.c | 48 +++++++++++++++++++++++
> 3 files changed, 89 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr94467-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr94467-2.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index fba91b7369a..1de03a515d9 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -16695,7 +16695,7 @@ (define_insn_and_split "ssse3_pshufbv8qi3"
> (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
> (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
> UNSPEC_PSHUFB))
> - (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
> + (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
> "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
> "@
> pshufb\t{%2, %0|%0, %2}
> diff --git a/gcc/testsuite/gcc.target/i386/pr94467-1.c b/gcc/testsuite/gcc.target/i386/pr94467-1.c
> new file mode 100644
> index 00000000000..a51c3a8f5fe
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr94467-1.c
> @@ -0,0 +1,40 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O -mavx" } */
> +
> +#include "avx-check.h"
> +
> +typedef char __attribute__ ((__vector_size__ (8))) v8qi;
> +typedef short __attribute__ ((__vector_size__ (8))) v4hi;
> +typedef int __attribute__ ((__vector_size__ (8))) v2si;
> +typedef long long __attribute__ ((__vector_size__ (8))) v1di;
> +typedef unsigned long long u64;
> +u64 k, c;
> +
> +v8qi g, h, p, q;
> +v4hi d, e, f, l, n, o;
> +v2si j;
> +
> +u64
> +foo (v4hi r)
> +{
> + v8qi s;
> + f = (v4hi) j;
> + e = __builtin_ia32_psrlwi ((v4hi) k, c);
> + s = __builtin_ia32_pavgb (h, h);
> + n = __builtin_ia32_pabsw (f);
> + o = __builtin_ia32_psubusw (n, l);
> + p = __builtin_ia32_packsswb (r, o);
> + q = __builtin_ia32_pshufb (p, s);
> + g = __builtin_ia32_punpcklbw (q, (v8qi) r);
> + d = r;
> + return (u64) g + (u64) h + (u64) j;
> +}
> +
> +static void
> +avx_test (void)
> +{
> + u64 x = foo ((v4hi) { 5 });
> + if (x != 0x0005000500050505)
> + __builtin_abort ();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr94467-2.c b/gcc/testsuite/gcc.target/i386/pr94467-2.c
> new file mode 100644
> index 00000000000..8128be325e4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr94467-2.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target ssse3 } */
> +/* { dg-options "-O -mssse3" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +typedef char __attribute__ ((__vector_size__ (8))) v8qi;
> +typedef short __attribute__ ((__vector_size__ (8))) v4hi;
> +typedef int __attribute__ ((__vector_size__ (8))) v2si;
> +typedef long long __attribute__ ((__vector_size__ (8))) v1di;
> +typedef unsigned long long u64;
> +u64 k, c;
> +
> +v8qi g, h, p, q;
> +v4hi d, e, f, l, n, o;
> +v2si j;
> +
> +u64
> +foo (v4hi r)
> +{
> + v8qi s;
> + f = (v4hi) j;
> + e = __builtin_ia32_psrlwi ((v4hi) k, c);
> + s = __builtin_ia32_pavgb (h, h);
> + n = __builtin_ia32_pabsw (f);
> + o = __builtin_ia32_psubusw (n, l);
> + p = __builtin_ia32_packsswb (r, o);
> + q = __builtin_ia32_pshufb (p, s);
> + g = __builtin_ia32_punpcklbw (q, (v8qi) r);
> + d = r;
> + return (u64) g + (u64) h + (u64) j;
> +}
> +
> +static void
> +ssse3_test (void)
> +{
> + u64 x = foo ((v4hi) { 5 });
> + if (x != 0x0005000500050505)
> + __builtin_abort ();
> +}
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2020-04-03 16:57 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-03 16:51 [PATCH] x86: Mark scratch operand in ssse3_pshufbv8qi3 as earlyclobber H.J. Lu
2020-04-03 16:57 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).