From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1816) id 4CCE6385697D; Wed, 24 May 2023 13:53:23 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4CCE6385697D DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1684936403; bh=IHBuLaWTNzicibiyi90z3q3rMGQIpFTCyR2CVCllbUg=; h=From:To:Subject:Date:From; b=PkDP4BMbdH+mbm3Qgk5fepmyAgdYRt8ZjJJRyIWD9UveMd6/p1MFdZwyQc5rNPxGu Ws1TVeLDAeX/h3Y8QYa/P9bY+f0UFYX4DxmPXN58oTugWuT67VfMtUHU6CLrcxgvWx gUgsg7nCX8IRBcOi47SUhtk5YaFzaSHg8eSSD768= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Kyrylo Tkachov To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-1167] aarch64: PR target/99195 Annotate vector shift patterns for vec-concat-zero X-Act-Checkin: gcc X-Git-Author: Kyrylo Tkachov X-Git-Refname: refs/heads/master X-Git-Oldrev: affee7dcfa1ee272d43ac7cb68cf423dbd956fd8 X-Git-Newrev: b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e Message-Id: <20230524135323.4CCE6385697D@sourceware.org> Date: Wed, 24 May 2023 13:53:23 +0000 (GMT) List-Id: https://gcc.gnu.org/g:b30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e commit r14-1167-gb30ab0dcf9db2ac6d81fb3743add1fbfa0d18f6e Author: Kyrylo Tkachov Date: Wed May 24 14:52:34 2023 +0100 aarch64: PR target/99195 Annotate vector shift patterns for vec-concat-zero Continuing the series of straightforward annotations, this one handles the normal (not widening or narrowing) vector shifts. Tests included. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: PR target/99195 * config/aarch64/aarch64-simd.md (aarch64_simd_lshr): Rename to... (aarch64_simd_lshr): ... This. (aarch64_simd_ashr): Rename to... (aarch64_simd_ashr): ... This. (aarch64_simd_imm_shl): Rename to... (aarch64_simd_imm_shl): ... This. (aarch64_simd_reg_sshl): Rename to... (aarch64_simd_reg_sshl): ... This. (aarch64_simd_reg_shl_unsigned): Rename to... (aarch64_simd_reg_shl_unsigned): ... This. (aarch64_simd_reg_shl_signed): Rename to... (aarch64_simd_reg_shl_signed): ... This. (vec_shr_): Rename to... (vec_shr_): ... This. (aarch64_shl): Rename to... (aarch64_shl): ... This. (aarch64_qshl): Rename to... (aarch64_qshl): ... This. gcc/testsuite/ChangeLog: PR target/99195 * gcc.target/aarch64/simd/pr99195_1.c: Add testing for shifts. * gcc.target/aarch64/simd/pr99195_6.c: Likewise. * gcc.target/aarch64/simd/pr99195_8.c: New test. Diff: --- gcc/config/aarch64/aarch64-simd.md | 18 +++++------ gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c | 6 ++-- gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c | 10 ++++++ gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c | 39 +++++++++++++++++++++++ 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index af95bbb29a7..0df97310fd9 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1280,7 +1280,7 @@ DONE; }) -(define_insn "aarch64_simd_lshr" +(define_insn "aarch64_simd_lshr" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] @@ -1289,7 +1289,7 @@ [(set_attr "type" "neon_shift_imm")] ) -(define_insn "aarch64_simd_ashr" +(define_insn "aarch64_simd_ashr" [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w") (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "D1,Dr")))] @@ -1312,7 +1312,7 @@ [(set_attr "type" "neon_shift_acc")] ) -(define_insn "aarch64_simd_imm_shl" +(define_insn "aarch64_simd_imm_shl" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))] @@ -1321,7 +1321,7 @@ [(set_attr "type" "neon_shift_imm")] ) -(define_insn "aarch64_simd_reg_sshl" +(define_insn "aarch64_simd_reg_sshl" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") (match_operand:VDQ_I 2 "register_operand" "w")))] @@ -1330,7 +1330,7 @@ [(set_attr "type" "neon_shift_reg")] ) -(define_insn "aarch64_simd_reg_shl_unsigned" +(define_insn "aarch64_simd_reg_shl_unsigned" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") (match_operand:VDQ_I 2 "register_operand" "w")] @@ -1340,7 +1340,7 @@ [(set_attr "type" "neon_shift_reg")] ) -(define_insn "aarch64_simd_reg_shl_signed" +(define_insn "aarch64_simd_reg_shl_signed" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") (match_operand:VDQ_I 2 "register_operand" "w")] @@ -1522,7 +1522,7 @@ ) ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. -(define_insn "vec_shr_" +(define_insn "vec_shr_" [(set (match_operand:VD 0 "register_operand" "=w") (unspec:VD [(match_operand:VD 1 "register_operand" "w") (match_operand:SI 2 "immediate_operand" "i")] @@ -6340,7 +6340,7 @@ ;; vshl -(define_insn "aarch64_shl" +(define_insn "aarch64_shl" [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") @@ -6354,7 +6354,7 @@ ;; vqshl -(define_insn "aarch64_qshl" +(define_insn "aarch64_qshl" [(set (match_operand:VSDQ_I 0 "register_operand" "=w") (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w") diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c index fde501d28e3..8b6548a154f 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c @@ -65,9 +65,9 @@ OPNINETEEN (int8, 8, 16, s8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) -OPFOUR (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2) -OPFOUR (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2) -OPFOUR (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2) +OPSIX (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2, shl, qshl) +OPSIX (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl) +OPSIX (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl) OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c index 52ad2709400..c86506e96d1 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_6.c @@ -25,6 +25,16 @@ MYOP (uint16x8_t, uint16x4_t, int16x4_t, sqadd, u16) MYOP (uint32x4_t, uint32x2_t, int32x2_t, sqadd, u32) MYOP (uint64x2_t, uint64x1_t, int64x1_t, sqadd, u64) +MYOP (uint8x16_t, uint8x8_t, int8x8_t, shl, u8) +MYOP (uint16x8_t, uint16x4_t, int16x4_t, shl, u16) +MYOP (uint32x4_t, uint32x2_t, int32x2_t, shl, u32) +MYOP (uint64x2_t, uint64x1_t, int64x1_t, shl, u64) + +MYOP (uint8x16_t, uint8x8_t, int8x8_t, qshl, u8) +MYOP (uint16x8_t, uint16x4_t, int16x4_t, qshl, u16) +MYOP (uint32x4_t, uint32x2_t, int32x2_t, qshl, u32) +MYOP (uint64x2_t, uint64x1_t, int64x1_t, qshl, u64) + /* { dg-final { scan-assembler-not {\tfmov\t} } } */ /* { dg-final { scan-assembler-not {\tmov\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c new file mode 100644 index 00000000000..29499e71df6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_8.c @@ -0,0 +1,39 @@ +/* PR target/99195. */ +/* Check that we take advantage of 64-bit Advanced SIMD operations clearing + the top half of the vector register and no explicit zeroing instructions + are emitted. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +#define MYOP(OT,IT1,OP,S,OS) \ +OT \ +foo_##OP##_##S##OS (IT1 a) \ +{ \ + IT1 zeros = vcreate_##S##OS (0); \ + return vcombine_##S##OS (v##OP##_##S##OS (a, 3), zeros); \ +} \ +OT \ +foo_##OP##_##S##OS##_s (IT1 a) \ +{ \ + IT1 zeros = vcreate_##S##OS (0); \ + return vcombine_##S##OS (v##OP##_##S##OS (a, OS - 1), zeros); \ +} + +MYOP (int8x16_t, int8x8_t, shr_n, s, 8) +MYOP (int16x8_t, int16x4_t, shr_n, s, 16) +MYOP (int32x4_t, int32x2_t, shr_n, s, 32) +MYOP (uint8x16_t, uint8x8_t, shr_n, u, 8) +MYOP (uint16x8_t, uint16x4_t, shr_n, u, 16) +MYOP (uint32x4_t, uint32x2_t, shr_n, u, 32) +MYOP (int8x16_t, int8x8_t, shl_n, s, 8) +MYOP (int16x8_t, int16x4_t, shl_n, s, 16) +MYOP (int32x4_t, int32x2_t, shl_n, s, 32) +MYOP (uint8x16_t, uint8x8_t, shl_n, u, 8) +MYOP (uint16x8_t, uint16x4_t, shl_n, u, 16) +MYOP (uint32x4_t, uint32x2_t, shl_n, u, 32) + +/* { dg-final { scan-assembler-not {\tfmov\t} } } */ +/* { dg-final { scan-assembler-not {\tmov\t} } } */ +