* [COMMITTED] i386: Add V8QI and V4QImode partial vector shift operations
@ 2023-05-23 16:02 Uros Bizjak
0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2023-05-23 16:02 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1152 bytes --]
Add V8QImode and V4QImode vector shift patterns that call into
ix86_expand_vecop_qihi_partial. Generate special sequences
for constant count operands.
The patch regresses g++.dg/pr91838.C - as explained in PR91838, the
test returns different results, depending on whether V8QImode shift
pattern is present in target *.md files. The tree optimizers produce:
V f (V x)
{
V _2;
<bb 2> [local count: 1073741824]:
_2 = x_1(D) >> 8;
return _2;
}
and without the named expander:
V f (V x)
{
<bb 2> [local count: 1073741824]:
return { 0, 0, 0, 0, 0, 0, 0, 0 };
}
RTL part just expands from there.
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_expand_vecop_qihi_partial):
Call ix86_expand_vec_shift_qihi_constant for shifts
with constant count operand.
* config/i386/i386.cc (ix86_shift_rotate_cost):
Handle V4QImode and V8QImode.
* config/i386/mmx.md (<insn>v8qi3): New insn pattern.
(<insn>v4qi3): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/vect-shiftv4qi.c: New test.
* gcc.target/i386/vect-shiftv8qi.c: New test.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 5198 bytes --]
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 50d9d34ebcb..ff3d382f1b4 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -23294,6 +23294,16 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2)
else
qop2 = op2;
+ qdest = gen_reg_rtx (V16QImode);
+
+ if (CONST_INT_P (op2)
+ && (code == ASHIFT || code == LSHIFTRT || code == ASHIFTRT)
+ && ix86_expand_vec_shift_qihi_constant (code, qdest, qop1, qop2))
+ {
+ emit_move_insn (dest, gen_lowpart (qimode, qdest));
+ return;
+ }
+
switch (code)
{
case MULT:
@@ -23358,8 +23368,6 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2)
bool ok;
int i;
- qdest = gen_reg_rtx (V16QImode);
-
/* Merge the data back into the right place. */
d.target = qdest;
d.op0 = qres;
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 38125ce284a..2710c6dfc56 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20580,6 +20580,37 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
switch (mode)
{
+ case V4QImode:
+ case V8QImode:
+ if (TARGET_AVX2)
+ /* Use vpbroadcast. */
+ extra = cost->sse_op;
+ else
+ extra = cost->sse_load[2];
+
+ if (constant_op1)
+ {
+ if (code == ASHIFTRT)
+ {
+ count = 4;
+ extra *= 2;
+ }
+ else
+ count = 2;
+ }
+ else if (TARGET_AVX512BW && TARGET_AVX512VL)
+ {
+ count = 3;
+ return ix86_vec_cost (mode, cost->sse_op * count);
+ }
+ else if (TARGET_SSE4_1)
+ count = 4;
+ else if (code == ASHIFTRT)
+ count = 5;
+ else
+ count = 4;
+ return ix86_vec_cost (mode, cost->sse_op * count) + extra;
+
case V16QImode:
if (TARGET_XOP)
{
@@ -20600,7 +20631,12 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
}
/* FALLTHRU */
case V32QImode:
- extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+ if (TARGET_AVX2)
+ /* Use vpbroadcast. */
+ extra = cost->sse_op;
+ else
+ extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+
if (constant_op1)
{
if (code == ASHIFTRT)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 45773673049..a37bbbb811f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2680,6 +2680,28 @@
(const_string "0")))
(set_attr "mode" "TI")])
+(define_expand "<insn>v8qi3"
+ [(set (match_operand:V8QI 0 "register_operand")
+ (any_shift:V8QI (match_operand:V8QI 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_MMX_WITH_SSE"
+{
+ ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
+ operands[1], operands[2]);
+ DONE;
+})
+
+(define_expand "<insn>v4qi3"
+ [(set (match_operand:V4QI 0 "register_operand")
+ (any_shift:V4QI (match_operand:V4QI 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_SSE2"
+{
+ ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
+ operands[1], operands[2]);
+ DONE;
+})
+
(define_insn_and_split "<insn>v2qi3"
[(set (match_operand:V2QI 0 "register_operand" "=Q")
(any_shift:V2QI
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
new file mode 100644
index 00000000000..c06dfb87bd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define N 4
+
+typedef unsigned char __vu __attribute__ ((__vector_size__ (N)));
+typedef signed char __vi __attribute__ ((__vector_size__ (N)));
+
+__vu sll (__vu a, int n)
+{
+ return a << n;
+}
+
+__vu sll_c (__vu a)
+{
+ return a << 5;
+}
+
+/* { dg-final { scan-assembler-times "psllw" 2 } } */
+
+__vu srl (__vu a, int n)
+{
+ return a >> n;
+}
+
+__vu srl_c (__vu a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psrlw" 2 } } */
+
+__vi sra (__vi a, int n)
+{
+ return a >> n;
+}
+
+__vi sra_c (__vi a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psraw" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
new file mode 100644
index 00000000000..f5e8925aa25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define N 8
+
+typedef unsigned char __vu __attribute__ ((__vector_size__ (N)));
+typedef signed char __vi __attribute__ ((__vector_size__ (N)));
+
+__vu sll (__vu a, int n)
+{
+ return a << n;
+}
+
+__vu sll_c (__vu a)
+{
+ return a << 5;
+}
+
+/* { dg-final { scan-assembler-times "psllw" 2 } } */
+
+__vu srl (__vu a, int n)
+{
+ return a >> n;
+}
+
+__vu srl_c (__vu a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psrlw" 2 } } */
+
+__vi sra (__vi a, int n)
+{
+ return a >> n;
+}
+
+__vi sra_c (__vi a)
+{
+ return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psraw" 2 } } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-05-23 16:02 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-23 16:02 [COMMITTED] i386: Add V8QI and V4QImode partial vector shift operations Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).