From: "Roger Sayle" <roger@nextmovesoftware.com>
To: <gcc-patches@gcc.gnu.org>
Cc: "'Hongtao Liu'" <hongtao.liu@intel.com>,
"'Uros Bizjak'" <ubizjak@gmail.com>
Subject: [x86 PATCH] Improve V[48]QI shifts on AVX512
Date: Thu, 9 May 2024 23:26:31 +0100 [thread overview]
Message-ID: <009601daa25f$f2a73c50$d7f5b4f0$@nextmovesoftware.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1866 bytes --]
The following one line patch improves the code generated for V8QI and V4QI
shifts when AV512BW and AVX512VL functionality is available.
For the testcase (from gcc.target/i386/vect-shiftv8qi.c):
typedef signed char v8qi __attribute__ ((__vector_size__ (8)));
v8qi foo (v8qi x) { return x >> 5; }
GCC with -O2 -march=cascadelake currently generates:
foo: movl $67372036, %eax
vpsraw $5, %xmm0, %xmm2
vpbroadcastd %eax, %xmm1
movl $117901063, %eax
vpbroadcastd %eax, %xmm3
vmovdqa %xmm1, %xmm0
vmovdqa %xmm3, -24(%rsp)
vpternlogd $120, -24(%rsp), %xmm2, %xmm0
vpsubb %xmm1, %xmm0, %xmm0
ret
with this patch we now generate the much improved:
foo: vpmovsxbw %xmm0, %xmm0
vpsraw $5, %xmm0, %xmm0
vpmovwb %xmm0, %xmm0
ret
This patch also fixes the FAILs of gcc.target/i386/vect-shiftv[48]qi.c
when run with the additional -march=cascadelake flag, by splitting these
tests into two; one form testing code generation with -msse2 (and
-mno-avx512vl) as originally intended, and the other testing AVX512
code generation with an explicit -march=cascadelake.
This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures. Ok for mainline?
2024-05-09 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
* config/i386/i386-expand.cc (ix86_expand_vecop_qihi_partial):
Don't attempt ix86_expand_vec_shift_qihi_constant on AVX512.
gcc/testsuite/ChangeLog
* gcc.target/i386/vect-shiftv4qi.c: Specify -mno-avx512vl.
* gcc.target/i386/vect-shiftv8qi.c: Likewise.
* gcc.target/i386/vect-shiftv4qi-2.c: New test case.
* gcc.target/i386/vect-shiftv8qi-2.c: Likewise.
Thanks in advance,
Roger
--
[-- Attachment #2: patchsv3.txt --]
[-- Type: text/plain, Size: 3490 bytes --]
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a613291..8eb31b2 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -24212,6 +24212,8 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2)
if (CONST_INT_P (op2)
&& (code == ASHIFT || code == LSHIFTRT || code == ASHIFTRT)
+ /* With AVX512 its cheaper to do vpmovsxbw/op/vpmovwb. */
+ && !(TARGET_AVX512BW && TARGET_AVX512VL && TARGET_SSE4_1)
&& ix86_expand_vec_shift_qihi_constant (code, qdest, qop1, qop2))
{
emit_move_insn (dest, gen_lowpart (qimode, qdest));
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi-2.c b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi-2.c
new file mode 100644
index 0000000..abc1a27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi-2.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=cascadelake" } */
+
+#define N 4
+
+typedef unsigned char __vu __attribute__ ((__vector_size__ (N)));
+typedef signed char __vi __attribute__ ((__vector_size__ (N)));
+
+__vu sll (__vu a, int n)
+{
+ return a << n;
+}
+
+__vu sll_c (__vu a)
+{
+ return a << 5;
+}
+
+/* { dg-final { scan-assembler-times "vpsllw" 2 } } */
+
+__vu srl (__vu a, int n)
+{
+ return a >> n;
+}
+
+__vu srl_c (__vu a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "vpsrlw" 2 } } */
+
+__vi sra (__vi a, int n)
+{
+ return a >> n;
+}
+
+__vi sra_c (__vi a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "vpsraw" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
index b7e45c2..9b52582 100644
--- a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
+++ b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse2 -mno-avx2 -mno-avx512vl" } */
#define N 4
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi-2.c b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi-2.c
new file mode 100644
index 0000000..52760f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi-2.c
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=cascadelake" } */
+
+#define N 8
+
+typedef unsigned char __vu __attribute__ ((__vector_size__ (N)));
+typedef signed char __vi __attribute__ ((__vector_size__ (N)));
+
+__vu sll (__vu a, int n)
+{
+ return a << n;
+}
+
+__vu sll_c (__vu a)
+{
+ return a << 5;
+}
+
+/* { dg-final { scan-assembler-times "vpsllw" 2 } } */
+
+__vu srl (__vu a, int n)
+{
+ return a >> n;
+}
+
+__vu srl_c (__vu a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "vpsrlw" 2 } } */
+
+__vi sra (__vi a, int n)
+{
+ return a >> n;
+}
+
+__vi sra_c (__vi a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "vpsraw" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
index 2471e6e..3dfcfd2 100644
--- a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
+++ b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse2 -mno-avx2 -mno-avx512vl" } */
#define N 8
next reply other threads:[~2024-05-09 22:26 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-09 22:26 Roger Sayle [this message]
2024-05-10 2:39 ` Hongtao Liu
2024-05-10 7:41 ` Roger Sayle
2024-05-10 7:56 ` Hongtao Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='009601daa25f$f2a73c50$d7f5b4f0$@nextmovesoftware.com' \
--to=roger@nextmovesoftware.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hongtao.liu@intel.com \
--cc=ubizjak@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).