[gcc r12-1533] aarch64: Model zero-high-half semantics of [SU]QXTN instructions

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r12-1533] aarch64: Model zero-high-half semantics of [SU]QXTN instructions
@ 2021-06-16 13:23 Jonathan Wright
  0 siblings, 0 replies; only message in thread
From: Jonathan Wright @ 2021-06-16 13:23 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:d0889b5d37ff40149b44e3c7d82f693d430cd891

commit r12-1533-gd0889b5d37ff40149b44e3c7d82f693d430cd891
Author: Jonathan Wright <jonathan.wright@arm.com>
Date:   Mon Jun 14 15:09:18 2021 +0100

    aarch64: Model zero-high-half semantics of [SU]QXTN instructions
    
    Split the aarch64_<su>qmovn<mode> pattern into separate scalar and
    vector variants. Further split the vector RTL  pattern into big/
    little endian variants that model the zero-high-half semantics of the
    underlying instruction. Modeling these semantics allows for better
    RTL combinations while also removing some register allocation issues
    as the compiler now knows that the operation is totally destructive.
    
    Add new tests to narrow_zero_high_half.c to verify the benefit of
    this change.
    
    gcc/ChangeLog:
    
    2021-06-14  Jonathan Wright  <jonathan.wright@arm.com>
    
            * config/aarch64/aarch64-simd-builtins.def: Split generator
            for aarch64_<su>qmovn builtins into scalar and vector
            variants.
            * config/aarch64/aarch64-simd.md (aarch64_<su>qmovn<mode>_insn_le):
            Define.
            (aarch64_<su>qmovn<mode>_insn_be): Define.
            (aarch64_<su>qmovn<mode>): Split into scalar and vector
            variants. Change vector variant to an expander that emits the
            correct instruction depending on endianness.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/narrow_zero_high_half.c: Add new tests.

Diff:
---
 gcc/config/aarch64/aarch64-simd-builtins.def       |  6 ++-
 gcc/config/aarch64/aarch64-simd.md                 | 48 +++++++++++++++++++++-
 .../gcc.target/aarch64/narrow_zero_high_half.c     |  9 ++++
 3 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 2adb4b12752..ac5d4fc7ff1 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -271,8 +271,10 @@
   BUILTIN_VQN (BINOP_UUS, sqxtun2, 0, NONE)
 
   /* Implemented by aarch64_<su>qmovn<mode>.  */
-  BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0, NONE)
-  BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0, NONE)
+  BUILTIN_VQN (UNOP, sqmovn, 0, NONE)
+  BUILTIN_SD_HSDI (UNOP, sqmovn, 0, NONE)
+  BUILTIN_VQN (UNOP, uqmovn, 0, NONE)
+  BUILTIN_SD_HSDI (UNOP, uqmovn, 0, NONE)
 
   /* Implemented by aarch64_<su>qxtn2<mode>.  */
   BUILTIN_VQN (BINOP, sqxtn2, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 59779b851fb..2b75e57eb77 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4875,10 +4875,54 @@
 (define_insn "aarch64_<su>qmovn<mode>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
 	(SAT_TRUNC:<VNARROWQ>
-    (match_operand:VSQN_HSDI 1 "register_operand" "w")))]
+	  (match_operand:SD_HSDI 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
-   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_<su>qmovn<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (SAT_TRUNC:<VNARROWQ>
+	    (match_operand:VQN 1 "register_operand" "w"))
+	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_<su>qmovn<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
+	  (SAT_TRUNC:<VNARROWQ>
+	    (match_operand:VQN 1 "register_operand" "w"))))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_<su>qmovn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(SAT_TRUNC:<VNARROWQ>
+	  (match_operand:VQN 1 "register_operand")))]
+  "TARGET_SIMD"
+  {
+    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
+				CONST0_RTX (<VNARROWQ>mode)));
+    else
+      emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
+				CONST0_RTX (<VNARROWQ>mode)));
+
+    /* The intrinsic expects a narrow result, so emit a subreg that will get
+       optimized away as appropriate.  */
+    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+						 <VNARROWQ2>mode));
+    DONE;
+  }
 )
 
 (define_insn "aarch64_<su>qxtn2<mode>_le"
diff --git a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
index 53e03d3594d..aa6c7ef389d 100644
--- a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
+++ b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
@@ -67,6 +67,13 @@ TEST_UNARY (vqmovun, uint8x16_t, int16x8_t, s16, u8)
 TEST_UNARY (vqmovun, uint16x8_t, int32x4_t, s32, u16)
 TEST_UNARY (vqmovun, uint32x4_t, int64x2_t, s64, u32)
 
+TEST_UNARY (vqmovn, int8x16_t, int16x8_t, s16, s8)
+TEST_UNARY (vqmovn, int16x8_t, int32x4_t, s32, s16)
+TEST_UNARY (vqmovn, int32x4_t, int64x2_t, s64, s32)
+TEST_UNARY (vqmovn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_UNARY (vqmovn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32)
+
 /* { dg-final { scan-assembler-not "dup\\t" } } */
 
 /* { dg-final { scan-assembler-times "\\tshrn\\tv" 6} }  */
@@ -79,3 +86,5 @@ TEST_UNARY (vqmovun, uint32x4_t, int64x2_t, s64, u32)
 /* { dg-final { scan-assembler-times "\\tsqrshrun\\tv" 3} }  */
 /* { dg-final { scan-assembler-times "\\txtn\\tv" 6} }  */
 /* { dg-final { scan-assembler-times "\\tsqxtun\\tv" 3} }  */
+/* { dg-final { scan-assembler-times "\\tuqxtn\\tv" 3} }  */
+/* { dg-final { scan-assembler-times "\\tsqxtn\\tv" 3} }  */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-06-16 13:23 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-16 13:23 [gcc r12-1533] aarch64: Model zero-high-half semantics of [SU]QXTN instructions Jonathan Wright

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).