From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7835) id 15E623850428; Wed, 16 Jun 2021 13:23:36 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 15E623850428 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Jonathan Wright To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-1534] aarch64: Model zero-high-half semantics of ADDHN/SUBHN instructions X-Act-Checkin: gcc X-Git-Author: Jonathan Wright X-Git-Refname: refs/heads/master X-Git-Oldrev: d0889b5d37ff40149b44e3c7d82f693d430cd891 X-Git-Newrev: dbfc149b639342a9555c60aa9ee787fb3d009316 Message-Id: <20210616132336.15E623850428@sourceware.org> Date: Wed, 16 Jun 2021 13:23:36 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 16 Jun 2021 13:23:36 -0000 https://gcc.gnu.org/g:dbfc149b639342a9555c60aa9ee787fb3d009316 commit r12-1534-gdbfc149b639342a9555c60aa9ee787fb3d009316 Author: Jonathan Wright Date: Mon Jun 14 16:18:44 2021 +0100 aarch64: Model zero-high-half semantics of ADDHN/SUBHN instructions Model the zero-high-half semantics of the narrowing arithmetic Neon instructions in the aarch64_hn RTL pattern. Modeling these semantics allows for better RTL combinations while also removing some register allocation issues as the compiler now knows that the operation is totally destructive. Add new tests to narrow_zero_high_half.c to verify the benefit of this change. gcc/ChangeLog: 2021-06-14 Jonathan Wright * config/aarch64/aarch64-simd.md (aarch64_hn): Change to an expander that emits the correct instruction depending on endianness. (aarch64_hn_insn_le): Define. (aarch64_hn_insn_be): Define. gcc/testsuite/ChangeLog: * gcc.target/aarch64/narrow_zero_high_half.c: Add new tests. Diff: --- gcc/config/aarch64/aarch64-simd.md | 49 +++++++++++++++++++--- .../gcc.target/aarch64/narrow_zero_high_half.c | 40 ++++++++++++++++++ 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 2b75e57eb77..540244cf0a9 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4661,16 +4661,53 @@ ;; hn. -(define_insn "aarch64_hn" - [(set (match_operand: 0 "register_operand" "=w") - (unspec: [(match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "register_operand" "w")] - ADDSUBHN))] - "TARGET_SIMD" +(define_insn "aarch64_hn_insn_le" + [(set (match_operand: 0 "register_operand" "=w") + (vec_concat: + (unspec: [(match_operand:VQN 1 "register_operand" "w") + (match_operand:VQN 2 "register_operand" "w")] + ADDSUBHN) + (match_operand: 3 "aarch64_simd_or_scalar_imm_zero")))] + "TARGET_SIMD && !BYTES_BIG_ENDIAN" + "hn\\t%0., %1., %2." + [(set_attr "type" "neon__halve_narrow_q")] +) + +(define_insn "aarch64_hn_insn_be" + [(set (match_operand: 0 "register_operand" "=w") + (vec_concat: + (match_operand: 3 "aarch64_simd_or_scalar_imm_zero") + (unspec: [(match_operand:VQN 1 "register_operand" "w") + (match_operand:VQN 2 "register_operand" "w")] + ADDSUBHN)))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" "hn\\t%0., %1., %2." [(set_attr "type" "neon__halve_narrow_q")] ) +(define_expand "aarch64_hn" + [(set (match_operand: 0 "register_operand") + (unspec: [(match_operand:VQN 1 "register_operand") + (match_operand:VQN 2 "register_operand")] + ADDSUBHN))] + "TARGET_SIMD" + { + rtx tmp = gen_reg_rtx (mode); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_hn_insn_be (tmp, operands[1], + operands[2], CONST0_RTX (mode))); + else + emit_insn (gen_aarch64_hn_insn_le (tmp, operands[1], + operands[2], CONST0_RTX (mode))); + + /* The intrinsic expects a narrow result, so emit a subreg that will get + optimized away as appropriate. */ + emit_move_insn (operands[0], lowpart_subreg (mode, tmp, + mode)); + DONE; + } +) + (define_insn "aarch64_hn2_insn_le" [(set (match_operand: 0 "register_operand" "=w") (vec_concat: diff --git a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c index aa6c7ef389d..dd5ddf83b99 100644 --- a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c +++ b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c @@ -74,6 +74,42 @@ TEST_UNARY (vqmovn, uint8x16_t, uint16x8_t, u16, u8) TEST_UNARY (vqmovn, uint16x8_t, uint32x4_t, u32, u16) TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32) +#define TEST_ARITH(name, rettype, intype, fs, rs) \ + rettype test_ ## name ## _ ## fs ## _zero_high \ + (intype a, intype b) \ + { \ + return vcombine_ ## rs (name ## _ ## fs (a, b), \ + vdup_n_ ## rs (0)); \ + } + +TEST_ARITH (vaddhn, int8x16_t, int16x8_t, s16, s8) +TEST_ARITH (vaddhn, int16x8_t, int32x4_t, s32, s16) +TEST_ARITH (vaddhn, int32x4_t, int64x2_t, s64, s32) +TEST_ARITH (vaddhn, uint8x16_t, uint16x8_t, u16, u8) +TEST_ARITH (vaddhn, uint16x8_t, uint32x4_t, u32, u16) +TEST_ARITH (vaddhn, uint32x4_t, uint64x2_t, u64, u32) + +TEST_ARITH (vraddhn, int8x16_t, int16x8_t, s16, s8) +TEST_ARITH (vraddhn, int16x8_t, int32x4_t, s32, s16) +TEST_ARITH (vraddhn, int32x4_t, int64x2_t, s64, s32) +TEST_ARITH (vraddhn, uint8x16_t, uint16x8_t, u16, u8) +TEST_ARITH (vraddhn, uint16x8_t, uint32x4_t, u32, u16) +TEST_ARITH (vraddhn, uint32x4_t, uint64x2_t, u64, u32) + +TEST_ARITH (vsubhn, int8x16_t, int16x8_t, s16, s8) +TEST_ARITH (vsubhn, int16x8_t, int32x4_t, s32, s16) +TEST_ARITH (vsubhn, int32x4_t, int64x2_t, s64, s32) +TEST_ARITH (vsubhn, uint8x16_t, uint16x8_t, u16, u8) +TEST_ARITH (vsubhn, uint16x8_t, uint32x4_t, u32, u16) +TEST_ARITH (vsubhn, uint32x4_t, uint64x2_t, u64, u32) + +TEST_ARITH (vrsubhn, int8x16_t, int16x8_t, s16, s8) +TEST_ARITH (vrsubhn, int16x8_t, int32x4_t, s32, s16) +TEST_ARITH (vrsubhn, int32x4_t, int64x2_t, s64, s32) +TEST_ARITH (vrsubhn, uint8x16_t, uint16x8_t, u16, u8) +TEST_ARITH (vrsubhn, uint16x8_t, uint32x4_t, u32, u16) +TEST_ARITH (vrsubhn, uint32x4_t, uint64x2_t, u64, u32) + /* { dg-final { scan-assembler-not "dup\\t" } } */ /* { dg-final { scan-assembler-times "\\tshrn\\tv" 6} } */ @@ -88,3 +124,7 @@ TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32) /* { dg-final { scan-assembler-times "\\tsqxtun\\tv" 3} } */ /* { dg-final { scan-assembler-times "\\tuqxtn\\tv" 3} } */ /* { dg-final { scan-assembler-times "\\tsqxtn\\tv" 3} } */ +/* { dg-final { scan-assembler-times "\\taddhn\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\tsubhn\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\trsubhn\\tv" 6} } */ +/* { dg-final { scan-assembler-times "\\traddhn\\tv" 6} } */