* [committed] i386: psrlq is not used for PERM<a,{0},1,2,3,4> [PR113871]
@ 2024-02-14 19:46 Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2024-02-14 19:46 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1811 bytes --]
Introduce vec_shl_<mode> and vec_shr_<mode> expanders to improve
'*a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4);'
and
'*a = __builtin_shufflevector((vect64){0}, *a, 3, 4, 5, 6);'
shuffles. The generated code improves from:
movzwl 6(%rdi), %eax
movzwl 4(%rdi), %edx
salq $16, %rax
orq %rdx, %rax
movzwl 2(%rdi), %edx
salq $16, %rax
orq %rdx, %rax
movq %rax, (%rdi)
to:
movq (%rdi), %xmm0
psrlq $16, %xmm0
movq %xmm0, (%rdi)
and to:
movq (%rdi), %xmm0
psllq $16, %xmm0
movq %xmm0, (%rdi)
in the second case.
The patch handles 32-bit vectors as well and improves generated code from:
movd (%rdi), %xmm0
pxor %xmm1, %xmm1
punpcklwd %xmm1, %xmm0
pshuflw $230, %xmm0, %xmm0
movd %xmm0, (%rdi)
to:
movd (%rdi), %xmm0
psrld $16, %xmm0
movd %xmm0, (%rdi)
and to:
movd (%rdi), %xmm0
pslld $16, %xmm0
movd %xmm0, (%rdi)
PR target/113871
gcc/ChangeLog:
* config/i386/mmx.md (V248FI): New mode iterator.
(V24FI_32): DItto.
(vec_shl_<V248FI:mode>): New expander.
(vec_shl_<V24FI_32:mode>): Ditto.
(vec_shr_<V248FI:mode>): Ditto.
(vec_shr_<V24FI_32:mode>): Ditto.
* config/i386/sse.md (vec_shl_<V_128:mode>): Simplify expander.
(vec_shr_<V248FI:mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr113871-1a.c: New test.
* gcc.target/i386/pr113871-1b.c: New test.
* gcc.target/i386/pr113871-2a.c: New test.
* gcc.target/i386/pr113871-2b.c: New test.
* gcc.target/i386/pr113871-3a.c: New test.
* gcc.target/i386/pr113871-3b.c: New test.
* gcc.target/i386/pr113871-4a.c: New test.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 9237 bytes --]
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 6215b12f05f..075309cca9f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -84,6 +84,11 @@ (define_mode_iterator V_16_32_64
(define_mode_iterator V2FI [V2SF V2SI])
(define_mode_iterator V24FI [V2SF V2SI V4HF V4HI])
+
+(define_mode_iterator V248FI [V2SF V2SI V4HF V4HI V8QI])
+
+(define_mode_iterator V24FI_32 [V2HF V2HI V4QI])
+
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr mmxvecsize
[(V8QI "b") (V4QI "b") (V2QI "b")
@@ -3729,6 +3734,70 @@ (define_expand "v<insn>v4qi3"
DONE;
})
+(define_expand "vec_shl_<mode>"
+ [(set (match_operand:V248FI 0 "register_operand")
+ (ashift:V1DI
+ (match_operand:V248FI 1 "nonimmediate_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_MMX_WITH_SSE"
+{
+ rtx op0 = gen_reg_rtx (V1DImode);
+ rtx op1 = force_reg (<MODE>mode, operands[1]);
+
+ emit_insn (gen_mmx_ashlv1di3
+ (op0, gen_lowpart (V1DImode, op1), operands[2]));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
+ DONE;
+})
+
+(define_expand "vec_shl_<mode>"
+ [(set (match_operand:V24FI_32 0 "register_operand")
+ (ashift:V1SI
+ (match_operand:V24FI_32 1 "nonimmediate_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_SSE2"
+{
+ rtx op0 = gen_reg_rtx (V1SImode);
+ rtx op1 = force_reg (<MODE>mode, operands[1]);
+
+ emit_insn (gen_mmx_ashlv1si3
+ (op0, gen_lowpart (V1SImode, op1), operands[2]));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
+ DONE;
+})
+
+(define_expand "vec_shr_<mode>"
+ [(set (match_operand:V248FI 0 "register_operand")
+ (lshiftrt:V1DI
+ (match_operand:V248FI 1 "nonimmediate_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_MMX_WITH_SSE"
+{
+ rtx op0 = gen_reg_rtx (V1DImode);
+ rtx op1 = force_reg (<MODE>mode, operands[1]);
+
+ emit_insn (gen_mmx_lshrv1di3
+ (op0, gen_lowpart (V1DImode, op1), operands[2]));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
+ DONE;
+})
+
+(define_expand "vec_shr_<mode>"
+ [(set (match_operand:V24FI_32 0 "register_operand")
+ (lshiftrt:V1SI
+ (match_operand:V24FI_32 1 "nonimmediate_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+ "TARGET_SSE2"
+{
+ rtx op0 = gen_reg_rtx (V1SImode);
+ rtx op1 = force_reg (<MODE>mode, operands[1]);
+
+ emit_insn (gen_mmx_lshrv1si3
+ (op0, gen_lowpart (V1SImode, op1), operands[2]));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral comparisons
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index acd10908d76..1bc614ab702 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16498,29 +16498,35 @@ (define_split
"operands[3] = XVECEXP (operands[2], 0, 0);")
(define_expand "vec_shl_<mode>"
- [(set (match_dup 3)
+ [(set (match_operand:V_128 0 "register_operand")
(ashift:V1TI
- (match_operand:V_128 1 "register_operand")
- (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
- (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
+ (match_operand:V_128 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
"TARGET_SSE2"
{
- operands[1] = gen_lowpart (V1TImode, operands[1]);
- operands[3] = gen_reg_rtx (V1TImode);
- operands[4] = gen_lowpart (<MODE>mode, operands[3]);
+ rtx op0 = gen_reg_rtx (V1TImode);
+ rtx op1 = force_reg (<MODE>mode, operands[1]);
+
+ emit_insn (gen_sse2_ashlv1ti3
+ (op0, gen_lowpart (V1TImode, op1), operands[2]));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
+ DONE;
})
(define_expand "vec_shr_<mode>"
- [(set (match_dup 3)
+ [(set (match_operand:V_128 0 "register_operand")
(lshiftrt:V1TI
- (match_operand:V_128 1 "register_operand")
- (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
- (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
+ (match_operand:V_128 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
"TARGET_SSE2"
{
- operands[1] = gen_lowpart (V1TImode, operands[1]);
- operands[3] = gen_reg_rtx (V1TImode);
- operands[4] = gen_lowpart (<MODE>mode, operands[3]);
+ rtx op0 = gen_reg_rtx (V1TImode);
+ rtx op1 = force_reg (<MODE>mode, operands[1]);
+
+ emit_insn (gen_sse2_lshrv1ti3
+ (op0, gen_lowpart (V1TImode, op1), operands[2]));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0));
+ DONE;
})
(define_expand "ashlv1ti3"
diff --git a/gcc/testsuite/gcc.target/i386/pr113871-1a.c b/gcc/testsuite/gcc.target/i386/pr113871-1a.c
new file mode 100644
index 00000000000..f720927b90d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113871-1a.c
@@ -0,0 +1,19 @@
+/* PR target/113871 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+typedef char vect64 __attribute__((vector_size(8)));
+
+void f (vect64 *a)
+{
+ *a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4, 5, 6, 7, 8);
+}
+
+/* { dg-final { scan-assembler "psrlq" } } */
+
+void g(vect64 *a)
+{
+ *a = __builtin_shufflevector((vect64){0}, *a, 7, 8, 9, 10, 11, 12, 13, 14);
+}
+
+/* { dg-final { scan-assembler "psllq" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr113871-1b.c b/gcc/testsuite/gcc.target/i386/pr113871-1b.c
new file mode 100644
index 00000000000..705cf5cfe56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113871-1b.c
@@ -0,0 +1,19 @@
+/* PR target/113871 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef char vect32 __attribute__((vector_size(4)));
+
+void f (vect32 *a)
+{
+ *a = __builtin_shufflevector(*a, (vect32){0}, 1, 2, 3, 4);
+}
+
+/* { dg-final { scan-assembler "psrld" } } */
+
+void g(vect32 *a)
+{
+ *a = __builtin_shufflevector((vect32){0}, *a, 3, 4, 5, 6);
+}
+
+/* { dg-final { scan-assembler "pslld" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr113871-2a.c b/gcc/testsuite/gcc.target/i386/pr113871-2a.c
new file mode 100644
index 00000000000..5430f69908d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113871-2a.c
@@ -0,0 +1,19 @@
+/* PR target/113871 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+typedef short vect64 __attribute__((vector_size(8)));
+
+void f (vect64 *a)
+{
+ *a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4);
+}
+
+/* { dg-final { scan-assembler "psrlq" } } */
+
+void g(vect64 *a)
+{
+ *a = __builtin_shufflevector((vect64){0}, *a, 3, 4, 5, 6);
+}
+
+/* { dg-final { scan-assembler "psllq" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr113871-2b.c b/gcc/testsuite/gcc.target/i386/pr113871-2b.c
new file mode 100644
index 00000000000..06e2a444262
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113871-2b.c
@@ -0,0 +1,19 @@
+/* PR target/113871 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef short vect32 __attribute__((vector_size(4)));
+
+void f (vect32 *a)
+{
+ *a = __builtin_shufflevector(*a, (vect32){0}, 1, 2);
+}
+
+/* { dg-final { scan-assembler "psrld" } } */
+
+void g(vect32 *a)
+{
+ *a = __builtin_shufflevector((vect32){0}, *a, 1, 2);
+}
+
+/* { dg-final { scan-assembler "pslld" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr113871-3a.c b/gcc/testsuite/gcc.target/i386/pr113871-3a.c
new file mode 100644
index 00000000000..825d48e32b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113871-3a.c
@@ -0,0 +1,19 @@
+/* PR target/113871 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+typedef _Float16 vect64 __attribute__((vector_size(8)));
+
+void f (vect64 *a)
+{
+ *a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4);
+}
+
+/* { dg-final { scan-assembler "psrlq" } } */
+
+void g(vect64 *a)
+{
+ *a = __builtin_shufflevector((vect64){0}, *a, 3, 4, 5, 6);
+}
+
+/* { dg-final { scan-assembler "psllq" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr113871-3b.c b/gcc/testsuite/gcc.target/i386/pr113871-3b.c
new file mode 100644
index 00000000000..f8e02997eb5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113871-3b.c
@@ -0,0 +1,19 @@
+/* PR target/113871 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef _Float16 vect32 __attribute__((vector_size(4)));
+
+void f (vect32 *a)
+{
+ *a = __builtin_shufflevector(*a, (vect32){0}, 1, 2);
+}
+
+/* { dg-final { scan-assembler "psrld" } } */
+
+void g(vect32 *a)
+{
+ *a = __builtin_shufflevector((vect32){0}, *a, 1, 2);
+}
+
+/* { dg-final { scan-assembler "pslld" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr113871-4a.c b/gcc/testsuite/gcc.target/i386/pr113871-4a.c
new file mode 100644
index 00000000000..3887b1f33e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113871-4a.c
@@ -0,0 +1,19 @@
+/* PR target/113871 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+typedef int vect64 __attribute__((vector_size(8)));
+
+void f (vect64 *a)
+{
+ *a = __builtin_shufflevector(*a, (vect64){0}, 1, 2);
+}
+
+/* { dg-final { scan-assembler "psrlq" } } */
+
+void g(vect64 *a)
+{
+ *a = __builtin_shufflevector((vect64){0}, *a, 1, 2);
+}
+
+/* { dg-final { scan-assembler "psllq" } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
* [committed] i386: psrlq is not used for PERM<a,{0},1,2,3,4> [PR113871]
@ 2024-02-27 17:44 Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2024-02-27 17:44 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 328 bytes --]
Also handle V2BF mode.
PR target/113871
gcc/ChangeLog:
* config/i386/mmx.md (V248FI): Add V2BF mode.
(V24FI_32): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr113871-5a.c: New test.
* gcc.target/i386/pr113871-5b.c: New test.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Uros.
[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 1900 bytes --]
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 075309cca9f..2856ae6ffef 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -85,9 +85,9 @@ (define_mode_iterator V2FI [V2SF V2SI])
(define_mode_iterator V24FI [V2SF V2SI V4HF V4HI])
-(define_mode_iterator V248FI [V2SF V2SI V4HF V4HI V8QI])
+(define_mode_iterator V248FI [V2SF V2SI V4HF V4BF V4HI V8QI])
-(define_mode_iterator V24FI_32 [V2HF V2HI V4QI])
+(define_mode_iterator V24FI_32 [V2HF V2BF V2HI V4QI])
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr mmxvecsize
diff --git a/gcc/testsuite/gcc.target/i386/pr113871-5a.c b/gcc/testsuite/gcc.target/i386/pr113871-5a.c
new file mode 100644
index 00000000000..25ab82a6eab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113871-5a.c
@@ -0,0 +1,19 @@
+/* PR target/113871 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+typedef __bf16 vect64 __attribute__((vector_size(8)));
+
+void f (vect64 *a)
+{
+ *a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4);
+}
+
+/* { dg-final { scan-assembler "psrlq" } } */
+
+void g(vect64 *a)
+{
+ *a = __builtin_shufflevector((vect64){0}, *a, 3, 4, 5, 6);
+}
+
+/* { dg-final { scan-assembler "psllq" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr113871-5b.c b/gcc/testsuite/gcc.target/i386/pr113871-5b.c
new file mode 100644
index 00000000000..363a0f516cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113871-5b.c
@@ -0,0 +1,19 @@
+/* PR target/113871 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef __bf16 vect32 __attribute__((vector_size(4)));
+
+void f (vect32 *a)
+{
+ *a = __builtin_shufflevector(*a, (vect32){0}, 1, 2);
+}
+
+/* { dg-final { scan-assembler "psrld" } } */
+
+void g(vect32 *a)
+{
+ *a = __builtin_shufflevector((vect32){0}, *a, 1, 2);
+}
+
+/* { dg-final { scan-assembler "pslld" } } */
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-02-27 17:45 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-14 19:46 [committed] i386: psrlq is not used for PERM<a,{0},1,2,3,4> [PR113871] Uros Bizjak
2024-02-27 17:44 Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).