public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Uros Bizjak <uros@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-8989] i386: psrlq is not used for PERM<a, {0}, 1, 2, 3, 4> [PR113871] Date: Wed, 14 Feb 2024 19:44:49 +0000 (GMT) [thread overview] Message-ID: <20240214194449.D94403860770@sourceware.org> (raw) https://gcc.gnu.org/g:2c2f57e4158924467afbf4c2fd3938e507287dab commit r14-8989-g2c2f57e4158924467afbf4c2fd3938e507287dab Author: Uros Bizjak <ubizjak@gmail.com> Date: Wed Feb 14 20:41:42 2024 +0100 i386: psrlq is not used for PERM<a,{0},1,2,3,4> [PR113871] Introduce vec_shl_<mode> and vec_shr_<mode> expanders to improve '*a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4);' and '*a = __builtin_shufflevector((vect64){0}, *a, 3, 4, 5, 6);' shuffles. The generated code improves from: movzwl 6(%rdi), %eax movzwl 4(%rdi), %edx salq $16, %rax orq %rdx, %rax movzwl 2(%rdi), %edx salq $16, %rax orq %rdx, %rax movq %rax, (%rdi) to: movq (%rdi), %xmm0 psrlq $16, %xmm0 movq %xmm0, (%rdi) and to: movq (%rdi), %xmm0 psllq $16, %xmm0 movq %xmm0, (%rdi) in the second case. The patch handles 32-bit vectors as well and improves generated code from: movd (%rdi), %xmm0 pxor %xmm1, %xmm1 punpcklwd %xmm1, %xmm0 pshuflw $230, %xmm0, %xmm0 movd %xmm0, (%rdi) to: movd (%rdi), %xmm0 psrld $16, %xmm0 movd %xmm0, (%rdi) and to: movd (%rdi), %xmm0 pslld $16, %xmm0 movd %xmm0, (%rdi) PR target/113871 gcc/ChangeLog: * config/i386/mmx.md (V248FI): New mode iterator. (V24FI_32): DItto. (vec_shl_<V248FI:mode>): New expander. (vec_shl_<V24FI_32:mode>): Ditto. (vec_shr_<V248FI:mode>): Ditto. (vec_shr_<V24FI_32:mode>): Ditto. * config/i386/sse.md (vec_shl_<V_128:mode>): Simplify expander. (vec_shr_<V248FI:mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr113871-1a.c: New test. * gcc.target/i386/pr113871-1b.c: New test. * gcc.target/i386/pr113871-2a.c: New test. * gcc.target/i386/pr113871-2b.c: New test. * gcc.target/i386/pr113871-3a.c: New test. * gcc.target/i386/pr113871-3b.c: New test. * gcc.target/i386/pr113871-4a.c: New test. Diff: --- gcc/config/i386/mmx.md | 69 +++++++++++++++++++++++++++++ gcc/config/i386/sse.md | 34 ++++++++------ gcc/testsuite/gcc.target/i386/pr113871-1a.c | 19 ++++++++ gcc/testsuite/gcc.target/i386/pr113871-1b.c | 19 ++++++++ gcc/testsuite/gcc.target/i386/pr113871-2a.c | 19 ++++++++ gcc/testsuite/gcc.target/i386/pr113871-2b.c | 19 ++++++++ gcc/testsuite/gcc.target/i386/pr113871-3a.c | 19 ++++++++ gcc/testsuite/gcc.target/i386/pr113871-3b.c | 19 ++++++++ gcc/testsuite/gcc.target/i386/pr113871-4a.c | 19 ++++++++ 9 files changed, 222 insertions(+), 14 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 6215b12f05f5..075309cca9f6 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -84,6 +84,11 @@ (define_mode_iterator V2FI [V2SF V2SI]) (define_mode_iterator V24FI [V2SF V2SI V4HF V4HI]) + +(define_mode_iterator V248FI [V2SF V2SI V4HF V4HI V8QI]) + +(define_mode_iterator V24FI_32 [V2HF V2HI V4QI]) + ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr mmxvecsize [(V8QI "b") (V4QI "b") (V2QI "b") @@ -3729,6 +3734,70 @@ DONE; }) +(define_expand "vec_shl_<mode>" + [(set (match_operand:V248FI 0 "register_operand") + (ashift:V1DI + (match_operand:V248FI 1 "nonimmediate_operand") + (match_operand:DI 2 "nonmemory_operand")))] + "TARGET_MMX_WITH_SSE" +{ + rtx op0 = gen_reg_rtx (V1DImode); + rtx op1 = force_reg (<MODE>mode, operands[1]); + + emit_insn (gen_mmx_ashlv1di3 + (op0, gen_lowpart (V1DImode, op1), operands[2])); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0)); + DONE; +}) + +(define_expand "vec_shl_<mode>" + [(set (match_operand:V24FI_32 0 "register_operand") + (ashift:V1SI + (match_operand:V24FI_32 1 "nonimmediate_operand") + (match_operand:DI 2 "nonmemory_operand")))] + "TARGET_SSE2" +{ + rtx op0 = gen_reg_rtx (V1SImode); + rtx op1 = force_reg (<MODE>mode, operands[1]); + + emit_insn (gen_mmx_ashlv1si3 + (op0, gen_lowpart (V1SImode, op1), operands[2])); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0)); + DONE; +}) + +(define_expand "vec_shr_<mode>" + [(set (match_operand:V248FI 0 "register_operand") + (lshiftrt:V1DI + (match_operand:V248FI 1 "nonimmediate_operand") + (match_operand:DI 2 "nonmemory_operand")))] + "TARGET_MMX_WITH_SSE" +{ + rtx op0 = gen_reg_rtx (V1DImode); + rtx op1 = force_reg (<MODE>mode, operands[1]); + + emit_insn (gen_mmx_lshrv1di3 + (op0, gen_lowpart (V1DImode, op1), operands[2])); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0)); + DONE; +}) + +(define_expand "vec_shr_<mode>" + [(set (match_operand:V24FI_32 0 "register_operand") + (lshiftrt:V1SI + (match_operand:V24FI_32 1 "nonimmediate_operand") + (match_operand:DI 2 "nonmemory_operand")))] + "TARGET_SSE2" +{ + rtx op0 = gen_reg_rtx (V1SImode); + rtx op1 = force_reg (<MODE>mode, operands[1]); + + emit_insn (gen_mmx_lshrv1si3 + (op0, gen_lowpart (V1SImode, op1), operands[2])); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0)); + DONE; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral comparisons diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index acd10908d762..1bc614ab7027 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -16498,29 +16498,35 @@ "operands[3] = XVECEXP (operands[2], 0, 0);") (define_expand "vec_shl_<mode>" - [(set (match_dup 3) + [(set (match_operand:V_128 0 "register_operand") (ashift:V1TI - (match_operand:V_128 1 "register_operand") - (match_operand:SI 2 "const_0_to_255_mul_8_operand"))) - (set (match_operand:V_128 0 "register_operand") (match_dup 4))] + (match_operand:V_128 1 "nonimmediate_operand") + (match_operand:SI 2 "const_0_to_255_mul_8_operand")))] "TARGET_SSE2" { - operands[1] = gen_lowpart (V1TImode, operands[1]); - operands[3] = gen_reg_rtx (V1TImode); - operands[4] = gen_lowpart (<MODE>mode, operands[3]); + rtx op0 = gen_reg_rtx (V1TImode); + rtx op1 = force_reg (<MODE>mode, operands[1]); + + emit_insn (gen_sse2_ashlv1ti3 + (op0, gen_lowpart (V1TImode, op1), operands[2])); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0)); + DONE; }) (define_expand "vec_shr_<mode>" - [(set (match_dup 3) + [(set (match_operand:V_128 0 "register_operand") (lshiftrt:V1TI - (match_operand:V_128 1 "register_operand") - (match_operand:SI 2 "const_0_to_255_mul_8_operand"))) - (set (match_operand:V_128 0 "register_operand") (match_dup 4))] + (match_operand:V_128 1 "nonimmediate_operand") + (match_operand:SI 2 "const_0_to_255_mul_8_operand")))] "TARGET_SSE2" { - operands[1] = gen_lowpart (V1TImode, operands[1]); - operands[3] = gen_reg_rtx (V1TImode); - operands[4] = gen_lowpart (<MODE>mode, operands[3]); + rtx op0 = gen_reg_rtx (V1TImode); + rtx op1 = force_reg (<MODE>mode, operands[1]); + + emit_insn (gen_sse2_lshrv1ti3 + (op0, gen_lowpart (V1TImode, op1), operands[2])); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, op0)); + DONE; }) (define_expand "ashlv1ti3" diff --git a/gcc/testsuite/gcc.target/i386/pr113871-1a.c b/gcc/testsuite/gcc.target/i386/pr113871-1a.c new file mode 100644 index 000000000000..f720927b90dc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113871-1a.c @@ -0,0 +1,19 @@ +/* PR target/113871 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +typedef char vect64 __attribute__((vector_size(8))); + +void f (vect64 *a) +{ + *a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4, 5, 6, 7, 8); +} + +/* { dg-final { scan-assembler "psrlq" } } */ + +void g(vect64 *a) +{ + *a = __builtin_shufflevector((vect64){0}, *a, 7, 8, 9, 10, 11, 12, 13, 14); +} + +/* { dg-final { scan-assembler "psllq" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr113871-1b.c b/gcc/testsuite/gcc.target/i386/pr113871-1b.c new file mode 100644 index 000000000000..705cf5cfe56f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113871-1b.c @@ -0,0 +1,19 @@ +/* PR target/113871 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef char vect32 __attribute__((vector_size(4))); + +void f (vect32 *a) +{ + *a = __builtin_shufflevector(*a, (vect32){0}, 1, 2, 3, 4); +} + +/* { dg-final { scan-assembler "psrld" } } */ + +void g(vect32 *a) +{ + *a = __builtin_shufflevector((vect32){0}, *a, 3, 4, 5, 6); +} + +/* { dg-final { scan-assembler "pslld" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr113871-2a.c b/gcc/testsuite/gcc.target/i386/pr113871-2a.c new file mode 100644 index 000000000000..5430f69908d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113871-2a.c @@ -0,0 +1,19 @@ +/* PR target/113871 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +typedef short vect64 __attribute__((vector_size(8))); + +void f (vect64 *a) +{ + *a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4); +} + +/* { dg-final { scan-assembler "psrlq" } } */ + +void g(vect64 *a) +{ + *a = __builtin_shufflevector((vect64){0}, *a, 3, 4, 5, 6); +} + +/* { dg-final { scan-assembler "psllq" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr113871-2b.c b/gcc/testsuite/gcc.target/i386/pr113871-2b.c new file mode 100644 index 000000000000..06e2a4442623 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113871-2b.c @@ -0,0 +1,19 @@ +/* PR target/113871 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef short vect32 __attribute__((vector_size(4))); + +void f (vect32 *a) +{ + *a = __builtin_shufflevector(*a, (vect32){0}, 1, 2); +} + +/* { dg-final { scan-assembler "psrld" } } */ + +void g(vect32 *a) +{ + *a = __builtin_shufflevector((vect32){0}, *a, 1, 2); +} + +/* { dg-final { scan-assembler "pslld" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr113871-3a.c b/gcc/testsuite/gcc.target/i386/pr113871-3a.c new file mode 100644 index 000000000000..825d48e32b03 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113871-3a.c @@ -0,0 +1,19 @@ +/* PR target/113871 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +typedef _Float16 vect64 __attribute__((vector_size(8))); + +void f (vect64 *a) +{ + *a = __builtin_shufflevector(*a, (vect64){0}, 1, 2, 3, 4); +} + +/* { dg-final { scan-assembler "psrlq" } } */ + +void g(vect64 *a) +{ + *a = __builtin_shufflevector((vect64){0}, *a, 3, 4, 5, 6); +} + +/* { dg-final { scan-assembler "psllq" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr113871-3b.c b/gcc/testsuite/gcc.target/i386/pr113871-3b.c new file mode 100644 index 000000000000..f8e02997eb58 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113871-3b.c @@ -0,0 +1,19 @@ +/* PR target/113871 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +typedef _Float16 vect32 __attribute__((vector_size(4))); + +void f (vect32 *a) +{ + *a = __builtin_shufflevector(*a, (vect32){0}, 1, 2); +} + +/* { dg-final { scan-assembler "psrld" } } */ + +void g(vect32 *a) +{ + *a = __builtin_shufflevector((vect32){0}, *a, 1, 2); +} + +/* { dg-final { scan-assembler "pslld" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr113871-4a.c b/gcc/testsuite/gcc.target/i386/pr113871-4a.c new file mode 100644 index 000000000000..3887b1f33e88 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr113871-4a.c @@ -0,0 +1,19 @@ +/* PR target/113871 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +typedef int vect64 __attribute__((vector_size(8))); + +void f (vect64 *a) +{ + *a = __builtin_shufflevector(*a, (vect64){0}, 1, 2); +} + +/* { dg-final { scan-assembler "psrlq" } } */ + +void g(vect64 *a) +{ + *a = __builtin_shufflevector((vect64){0}, *a, 1, 2); +} + +/* { dg-final { scan-assembler "psllq" } } */
reply other threads:[~2024-02-14 19:44 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240214194449.D94403860770@sourceware.org \ --to=uros@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).