public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-1534] aarch64: Model zero-high-half semantics of ADDHN/SUBHN instructions
@ 2021-06-16 13:23 Jonathan Wright
0 siblings, 0 replies; only message in thread
From: Jonathan Wright @ 2021-06-16 13:23 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:dbfc149b639342a9555c60aa9ee787fb3d009316
commit r12-1534-gdbfc149b639342a9555c60aa9ee787fb3d009316
Author: Jonathan Wright <jonathan.wright@arm.com>
Date: Mon Jun 14 16:18:44 2021 +0100
aarch64: Model zero-high-half semantics of ADDHN/SUBHN instructions
Model the zero-high-half semantics of the narrowing arithmetic Neon
instructions in the aarch64_<sur><addsub>hn<mode> RTL pattern.
Modeling these semantics allows for better RTL combinations while
also removing some register allocation issues as the compiler now
knows that the operation is totally destructive.
Add new tests to narrow_zero_high_half.c to verify the benefit of
this change.
gcc/ChangeLog:
2021-06-14 Jonathan Wright <jonathan.wright@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_<sur><addsub>hn<mode>):
Change to an expander that emits the correct instruction
depending on endianness.
(aarch64_<sur><addsub>hn<mode>_insn_le): Define.
(aarch64_<sur><addsub>hn<mode>_insn_be): Define.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/narrow_zero_high_half.c: Add new tests.
Diff:
---
gcc/config/aarch64/aarch64-simd.md | 49 +++++++++++++++++++---
.../gcc.target/aarch64/narrow_zero_high_half.c | 40 ++++++++++++++++++
2 files changed, 83 insertions(+), 6 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 2b75e57eb77..540244cf0a9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4661,16 +4661,53 @@
;; <r><addsub>hn<q>.
-(define_insn "aarch64_<sur><addsub>hn<mode>"
- [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
- (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
- (match_operand:VQN 2 "register_operand" "w")]
- ADDSUBHN))]
- "TARGET_SIMD"
+(define_insn "aarch64_<sur><addsub>hn<mode>_insn_le"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "register_operand" "w")]
+ ADDSUBHN)
+ (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+ "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
+)
+
+(define_insn "aarch64_<sur><addsub>hn<mode>_insn_be"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
+ (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "register_operand" "w")]
+ ADDSUBHN)))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
"<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_<addsub>_halve_narrow_q")]
)
+(define_expand "aarch64_<sur><addsub>hn<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
+ (match_operand:VQN 2 "register_operand")]
+ ADDSUBHN))]
+ "TARGET_SIMD"
+ {
+ rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_be (tmp, operands[1],
+ operands[2], CONST0_RTX (<VNARROWQ>mode)));
+ else
+ emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_le (tmp, operands[1],
+ operands[2], CONST0_RTX (<VNARROWQ>mode)));
+
+ /* The intrinsic expects a narrow result, so emit a subreg that will get
+ optimized away as appropriate. */
+ emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+ <VNARROWQ2>mode));
+ DONE;
+ }
+)
+
(define_insn "aarch64_<sur><addsub>hn2<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
diff --git a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
index aa6c7ef389d..dd5ddf83b99 100644
--- a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
+++ b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
@@ -74,6 +74,42 @@ TEST_UNARY (vqmovn, uint8x16_t, uint16x8_t, u16, u8)
TEST_UNARY (vqmovn, uint16x8_t, uint32x4_t, u32, u16)
TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32)
+#define TEST_ARITH(name, rettype, intype, fs, rs) \
+ rettype test_ ## name ## _ ## fs ## _zero_high \
+ (intype a, intype b) \
+ { \
+ return vcombine_ ## rs (name ## _ ## fs (a, b), \
+ vdup_n_ ## rs (0)); \
+ }
+
+TEST_ARITH (vaddhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vaddhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vaddhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vaddhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vaddhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vaddhn, uint32x4_t, uint64x2_t, u64, u32)
+
+TEST_ARITH (vraddhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vraddhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vraddhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vraddhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vraddhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vraddhn, uint32x4_t, uint64x2_t, u64, u32)
+
+TEST_ARITH (vsubhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vsubhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vsubhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vsubhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vsubhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vsubhn, uint32x4_t, uint64x2_t, u64, u32)
+
+TEST_ARITH (vrsubhn, int8x16_t, int16x8_t, s16, s8)
+TEST_ARITH (vrsubhn, int16x8_t, int32x4_t, s32, s16)
+TEST_ARITH (vrsubhn, int32x4_t, int64x2_t, s64, s32)
+TEST_ARITH (vrsubhn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_ARITH (vrsubhn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_ARITH (vrsubhn, uint32x4_t, uint64x2_t, u64, u32)
+
/* { dg-final { scan-assembler-not "dup\\t" } } */
/* { dg-final { scan-assembler-times "\\tshrn\\tv" 6} } */
@@ -88,3 +124,7 @@ TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32)
/* { dg-final { scan-assembler-times "\\tsqxtun\\tv" 3} } */
/* { dg-final { scan-assembler-times "\\tuqxtn\\tv" 3} } */
/* { dg-final { scan-assembler-times "\\tsqxtn\\tv" 3} } */
+/* { dg-final { scan-assembler-times "\\taddhn\\tv" 6} } */
+/* { dg-final { scan-assembler-times "\\tsubhn\\tv" 6} } */
+/* { dg-final { scan-assembler-times "\\trsubhn\\tv" 6} } */
+/* { dg-final { scan-assembler-times "\\traddhn\\tv" 6} } */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-06-16 13:23 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-16 13:23 [gcc r12-1534] aarch64: Model zero-high-half semantics of ADDHN/SUBHN instructions Jonathan Wright
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).