public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: hongtao Liu <liuhongt@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-518] Optimize vpermtiw/b to vpunpcklqdq for certain cases. Date: Tue, 17 May 2022 01:31:03 +0000 (GMT) [thread overview] Message-ID: <20220517013103.2DD703858C51@sourceware.org> (raw) https://gcc.gnu.org/g:105c56a8cfde6015b989ab22c20c915c1b4e69ec commit r13-518-g105c56a8cfde6015b989ab22c20c915c1b4e69ec Author: liuhongt <hongtao.liu@intel.com> Date: Fri May 13 09:59:13 2022 +0800 Optimize vpermtiw/b to vpunpcklqdq for certain cases. Assembly Optimization like: - vmovq %xmm0, %xmm2 - vmovdqa .LC0(%rip), %xmm0 vmovq %xmm1, %xmm1 - vpermi2w %xmm1, %xmm2, %xmm0 + vmovq %xmm0, %xmm0 + vpunpcklqdq %xmm1, %xmm0, %xmm0 ... -.LC0: - .value 0 - .value 1 - .value 2 - .value 3 - .value 8 - .value 9 - .value 10 - .value 11 gcc/ChangeLog: PR target/105033 * config/i386/sse.md (*vec_concatv4si): Extend to .. (*vec_concat<mode>): .. V16QI and V8HImode. (*vec_concatv16qi_permt2): New pre_reload define_insn_and_split. (*vec_concatv8hi_permt2): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr105033.c: New test. Diff: --- gcc/config/i386/sse.md | 64 +++++++++++++++++++++++++++++--- gcc/testsuite/gcc.target/i386/pr105033.c | 27 ++++++++++++++ 2 files changed, 86 insertions(+), 5 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 175ce013e5d..873d048acfe 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -19644,11 +19644,11 @@ (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov") (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")]) -(define_insn "*vec_concatv4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v") - (vec_concat:V4SI - (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v") - (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))] +(define_insn "*vec_concat<mode>" + [(set (match_operand:VI124_128 0 "register_operand" "=x,v,x,x,v") + (vec_concat:VI124_128 + (match_operand:<ssehalfvecmode> 1 "register_operand" " 0,v,0,0,v") + (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" " x,v,x,m,m")))] "TARGET_SSE" "@ punpcklqdq\t{%2, %0|%0, %2} @@ -19661,6 +19661,60 @@ (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex") (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")]) +(define_insn_and_split "*vec_concatv16qi_permt2" + [(set (match_operand:V16QI 0 "register_operand") + (unspec:V16QI + [(const_vector:V16QI [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 16) (const_int 17) + (const_int 18) (const_int 19) + (const_int 20) (const_int 21) + (const_int 22) (const_int 23)]) + (match_operand:V16QI 1 "register_operand") + (match_operand:V16QI 2 "nonimmediate_operand")] + UNSPEC_VPERMT2))] + "TARGET_AVX512VL && TARGET_AVX512VBMI + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_concat:V16QI (match_dup 1) (match_dup 2)))] +{ + operands[1] = lowpart_subreg (V8QImode, + force_reg (V16QImode, operands[1]), + V16QImode); + if (!MEM_P (operands[2])) + operands[2] = force_reg (V16QImode, operands[2]); + operands[2] = lowpart_subreg (V8QImode, operands[2], V16QImode); +}) + +(define_insn_and_split "*vec_concatv8hi_permt2" + [(set (match_operand:V8HI 0 "register_operand") + (unspec:V8HI + [(const_vector:V8HI [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 8) (const_int 9) + (const_int 10) (const_int 11)]) + (match_operand:V8HI 1 "register_operand") + (match_operand:V8HI 2 "nonimmediate_operand")] + UNSPEC_VPERMT2))] + "TARGET_AVX512VL && TARGET_AVX512BW + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_concat:V8HI (match_dup 1) (match_dup 2)))] +{ + operands[1] = lowpart_subreg (V4HImode, + force_reg (V8HImode, operands[1]), + V8HImode); + if (!MEM_P (operands[2])) + operands[2] = force_reg (V8HImode, operands[2]); + operands[2] = lowpart_subreg (V4HImode, operands[2], V8HImode); +}) + (define_insn "*vec_concat<mode>_0" [(set (match_operand:VI124_128 0 "register_operand" "=v,x") (vec_concat:VI124_128 diff --git a/gcc/testsuite/gcc.target/i386/pr105033.c b/gcc/testsuite/gcc.target/i386/pr105033.c new file mode 100644 index 00000000000..ab05e3b3bc8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105033.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-march=sapphirerapids -O2" } */ +/* { dg-final { scan-assembler-times {vpunpcklqdq[ \t]+} 3 } } */ +/* { dg-final { scan-assembler-not {vpermi2[wb][ \t]+} } } */ + +typedef _Float16 v8hf __attribute__((vector_size (16))); +typedef _Float16 v4hf __attribute__((vector_size (8))); +typedef short v8hi __attribute__((vector_size (16))); +typedef short v4hi __attribute__((vector_size (8))); +typedef char v16qi __attribute__((vector_size (16))); +typedef char v8qi __attribute__((vector_size (8))); + +v8hf foo (v4hf a, v4hf b) +{ + return __builtin_shufflevector (a, b, 0, 1, 2, 3, 4, 5, 6, 7); +} + +v8hi foo2 (v4hi a, v4hi b) +{ + return __builtin_shufflevector (a, b, 0, 1, 2, 3, 4, 5, 6, 7); +} + +v16qi foo3 (v8qi a, v8qi b) +{ + return __builtin_shufflevector (a, b, 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); +}
reply other threads:[~2022-05-17 1:31 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20220517013103.2DD703858C51@sourceware.org \ --to=liuhongt@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).