[gcc r14-3981] aarch64: Coerce addresses to be suitable for LD1RQ

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r14-3981] aarch64: Coerce addresses to be suitable for LD1RQ
@ 2023-09-14 10:40 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2023-09-14 10:40 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0f1f6cf872a03d82ab1973780b37bb8572e96f58

commit r14-3981-g0f1f6cf872a03d82ab1973780b37bb8572e96f58
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Thu Sep 14 11:39:53 2023 +0100

    aarch64: Coerce addresses to be suitable for LD1RQ
    
    In the following test:
    
      svuint8_t ld(uint8_t *ptr) { return svld1rq(svptrue_b8(), ptr + 2); }
    
    ptr + 2 is a valid address for an Advanced SIMD load, but not for
    an SVE load.  We therefore ended up generating:
    
            ldr     q0, [x0, 2]
            dup     z0.q, z0.q[0]
    
    This patch makes us generate LD1RQ for that case too.  It takes the
    slightly old-school approach of making the predicate broader than
    the constraint.  That is: any valid memory address is accepted as
    an operand before RA.  If the instruction remains during RA, LRA will
    coerce the address to match the constraint.  If the instruction gets
    split before RA, the splitter will load invalid addresses into a
    scratch register.
    
    gcc/
            * config/aarch64/aarch64-sve.md (@aarch64_vec_duplicate_vq<mode>_le):
            Accept all nonimmediate_operands, but keep the existing constraints.
            If the instruction is split before RA, load invalid addresses into
            a temporary register.
            * config/aarch64/predicates.md (aarch64_sve_dup_ld1rq_operand): Delete.
    
    gcc/testsuite/
            * gcc.target/aarch64/sve/acle/general/ld1rq_1.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve.md                  | 15 +++++++++-
 gcc/config/aarch64/predicates.md                   |  4 ---
 .../gcc.target/aarch64/sve/acle/general/ld1rq_1.c  | 33 ++++++++++++++++++++++
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index da5534c3e32..b223e7d3c9d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -2611,11 +2611,18 @@
 )
 
 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
+;;
+;; The addressing mode range of LD1RQ does not match the addressing mode
+;; range of LDR Qn.  If the predicate enforced the LD1RQ range, we would
+;; not be able to combine LDR Qns outside that range.  The predicate
+;; therefore accepts all memory operands, with only the constraints
+;; enforcing the actual restrictions.  If the instruction is split
+;; before RA, we need to load invalid addresses into a temporary.
 
 (define_insn_and_split "@aarch64_vec_duplicate_vq<mode>_le"
   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w")
 	(vec_duplicate:SVE_FULL
-	  (match_operand:<V128> 1 "aarch64_sve_dup_ld1rq_operand" "w, UtQ")))
+	  (match_operand:<V128> 1 "nonimmediate_operand" "w, UtQ")))
    (clobber (match_scratch:VNx16BI 2 "=X, Upl"))]
   "TARGET_SVE && !BYTES_BIG_ENDIAN"
   {
@@ -2633,6 +2640,12 @@
   "&& MEM_P (operands[1])"
   [(const_int 0)]
   {
+    if (can_create_pseudo_p ()
+        && !aarch64_sve_ld1rq_operand (operands[1], <V128>mode))
+      {
+	rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+	operands[1] = replace_equiv_address (operands[1], addr);
+      }
     if (GET_CODE (operands[2]) == SCRATCH)
       operands[2] = gen_reg_rtx (VNx16BImode);
     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 2d8d1fe25c1..01de4743974 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -732,10 +732,6 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_ld1r_operand")))
 
-(define_predicate "aarch64_sve_dup_ld1rq_operand"
-  (ior (match_operand 0 "register_operand")
-       (match_operand 0 "aarch64_sve_ld1rq_operand")))
-
 (define_predicate "aarch64_sve_ptrue_svpattern_immediate"
   (and (match_code "const")
        (match_test "aarch64_sve_ptrue_svpattern_p (op, NULL)")))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_1.c
new file mode 100644
index 00000000000..9242c639731
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_1.c
@@ -0,0 +1,33 @@
+/* { dg-options "-O2" } */
+
+#include <arm_sve.h>
+
+#define TEST_OFFSET(TYPE, SUFFIX, OFFSET) \
+  sv##TYPE##_t \
+  test_##TYPE##_##SUFFIX (TYPE##_t *ptr) \
+  { \
+    return svld1rq(svptrue_b8(), ptr + OFFSET); \
+  }
+
+#define TEST(TYPE) \
+  TEST_OFFSET (TYPE, 0, 0) \
+  TEST_OFFSET (TYPE, 1, 1) \
+  TEST_OFFSET (TYPE, 2, 2) \
+  TEST_OFFSET (TYPE, 16, 16) \
+  TEST_OFFSET (TYPE, 0x10000, 0x10000) \
+  TEST_OFFSET (TYPE, 0x10001, 0x10001) \
+  TEST_OFFSET (TYPE, m1, -1) \
+  TEST_OFFSET (TYPE, m2, -2) \
+  TEST_OFFSET (TYPE, m16, -16) \
+  TEST_OFFSET (TYPE, m0x10000, -0x10000) \
+  TEST_OFFSET (TYPE, m0x10001, -0x10001)
+
+TEST (int8)
+TEST (int16)
+TEST (uint32)
+TEST (uint64)
+
+/* { dg-final { scan-assembler-times {\tld1rqb\t} 11 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tld1rqh\t} 11 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tld1rqw\t} 11 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times {\tld1rqd\t} 11 { target aarch64_little_endian } } } */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-09-14 10:40 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-14 10:40 [gcc r14-3981] aarch64: Coerce addresses to be suitable for LD1RQ Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).