From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 2DD703858C51; Tue, 17 May 2022 01:31:03 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 2DD703858C51 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-518] Optimize vpermtiw/b to vpunpcklqdq for certain cases. X-Act-Checkin: gcc X-Git-Author: liuhongt X-Git-Refname: refs/heads/master X-Git-Oldrev: 1fba0608d12a209a5d76d65bcb1dec1c07bc33e9 X-Git-Newrev: 105c56a8cfde6015b989ab22c20c915c1b4e69ec Message-Id: <20220517013103.2DD703858C51@sourceware.org> Date: Tue, 17 May 2022 01:31:03 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 17 May 2022 01:31:03 -0000 https://gcc.gnu.org/g:105c56a8cfde6015b989ab22c20c915c1b4e69ec commit r13-518-g105c56a8cfde6015b989ab22c20c915c1b4e69ec Author: liuhongt Date: Fri May 13 09:59:13 2022 +0800 Optimize vpermtiw/b to vpunpcklqdq for certain cases. Assembly Optimization like: - vmovq %xmm0, %xmm2 - vmovdqa .LC0(%rip), %xmm0 vmovq %xmm1, %xmm1 - vpermi2w %xmm1, %xmm2, %xmm0 + vmovq %xmm0, %xmm0 + vpunpcklqdq %xmm1, %xmm0, %xmm0 ... -.LC0: - .value 0 - .value 1 - .value 2 - .value 3 - .value 8 - .value 9 - .value 10 - .value 11 gcc/ChangeLog: PR target/105033 * config/i386/sse.md (*vec_concatv4si): Extend to .. (*vec_concat): .. V16QI and V8HImode. (*vec_concatv16qi_permt2): New pre_reload define_insn_and_split. (*vec_concatv8hi_permt2): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr105033.c: New test. Diff: --- gcc/config/i386/sse.md | 64 +++++++++++++++++++++++++++++--- gcc/testsuite/gcc.target/i386/pr105033.c | 27 ++++++++++++++ 2 files changed, 86 insertions(+), 5 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 175ce013e5d..873d048acfe 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -19644,11 +19644,11 @@ (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov") (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")]) -(define_insn "*vec_concatv4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v") - (vec_concat:V4SI - (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v") - (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))] +(define_insn "*vec_concat" + [(set (match_operand:VI124_128 0 "register_operand" "=x,v,x,x,v") + (vec_concat:VI124_128 + (match_operand: 1 "register_operand" " 0,v,0,0,v") + (match_operand: 2 "nonimmediate_operand" " x,v,x,m,m")))] "TARGET_SSE" "@ punpcklqdq\t{%2, %0|%0, %2} @@ -19661,6 +19661,60 @@ (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex") (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")]) +(define_insn_and_split "*vec_concatv16qi_permt2" + [(set (match_operand:V16QI 0 "register_operand") + (unspec:V16QI + [(const_vector:V16QI [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7) + (const_int 16) (const_int 17) + (const_int 18) (const_int 19) + (const_int 20) (const_int 21) + (const_int 22) (const_int 23)]) + (match_operand:V16QI 1 "register_operand") + (match_operand:V16QI 2 "nonimmediate_operand")] + UNSPEC_VPERMT2))] + "TARGET_AVX512VL && TARGET_AVX512VBMI + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_concat:V16QI (match_dup 1) (match_dup 2)))] +{ + operands[1] = lowpart_subreg (V8QImode, + force_reg (V16QImode, operands[1]), + V16QImode); + if (!MEM_P (operands[2])) + operands[2] = force_reg (V16QImode, operands[2]); + operands[2] = lowpart_subreg (V8QImode, operands[2], V16QImode); +}) + +(define_insn_and_split "*vec_concatv8hi_permt2" + [(set (match_operand:V8HI 0 "register_operand") + (unspec:V8HI + [(const_vector:V8HI [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 8) (const_int 9) + (const_int 10) (const_int 11)]) + (match_operand:V8HI 1 "register_operand") + (match_operand:V8HI 2 "nonimmediate_operand")] + UNSPEC_VPERMT2))] + "TARGET_AVX512VL && TARGET_AVX512BW + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_concat:V8HI (match_dup 1) (match_dup 2)))] +{ + operands[1] = lowpart_subreg (V4HImode, + force_reg (V8HImode, operands[1]), + V8HImode); + if (!MEM_P (operands[2])) + operands[2] = force_reg (V8HImode, operands[2]); + operands[2] = lowpart_subreg (V4HImode, operands[2], V8HImode); +}) + (define_insn "*vec_concat_0" [(set (match_operand:VI124_128 0 "register_operand" "=v,x") (vec_concat:VI124_128 diff --git a/gcc/testsuite/gcc.target/i386/pr105033.c b/gcc/testsuite/gcc.target/i386/pr105033.c new file mode 100644 index 00000000000..ab05e3b3bc8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105033.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-march=sapphirerapids -O2" } */ +/* { dg-final { scan-assembler-times {vpunpcklqdq[ \t]+} 3 } } */ +/* { dg-final { scan-assembler-not {vpermi2[wb][ \t]+} } } */ + +typedef _Float16 v8hf __attribute__((vector_size (16))); +typedef _Float16 v4hf __attribute__((vector_size (8))); +typedef short v8hi __attribute__((vector_size (16))); +typedef short v4hi __attribute__((vector_size (8))); +typedef char v16qi __attribute__((vector_size (16))); +typedef char v8qi __attribute__((vector_size (8))); + +v8hf foo (v4hf a, v4hf b) +{ + return __builtin_shufflevector (a, b, 0, 1, 2, 3, 4, 5, 6, 7); +} + +v8hi foo2 (v4hi a, v4hi b) +{ + return __builtin_shufflevector (a, b, 0, 1, 2, 3, 4, 5, 6, 7); +} + +v16qi foo3 (v8qi a, v8qi b) +{ + return __builtin_shufflevector (a, b, 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); +}