public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ARM/heads/morello)] aarch64: Fix LDP/STP handling for Morello
@ 2022-05-05 12:06 Matthew Malcomson
  0 siblings, 0 replies; only message in thread
From: Matthew Malcomson @ 2022-05-05 12:06 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7edecc9ac1e0e6eee2cfaaba3e148def639cc8c8

commit 7edecc9ac1e0e6eee2cfaaba3e148def639cc8c8
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Wed Mar 30 05:02:43 2022 +0100

    aarch64: Fix LDP/STP handling for Morello
    
    There are two main ways of generating a single LDP/STP:
    
    - combine a vec_concat of two elements followed by a store
      of the result (query type ADDR_QUERY_LDP_STP_N).
    
    - opportunistically match two individual loads or stores to
      nearby locations (query type ADDR_QUERY_LDP_STP).
    
    This wasn't working for purecap (giving a missed optimisation)
    because extract_base_offset_in_addr didn't handle POINTER_PLUS.
    But the optimisation isn't available for alternative bases,
    since there are no LDP/STP forms for those.
    
    As well as matching single LDPs and STPs, we also have code to match
    sequences of 4 loads and stores whose addresses aren't directly valid
    for LDP/STP.  It's then worth paying the cost of doing some extra
    address arithmetic in order to produce 2 LDP/STP pairs.
    
    The optimisation didn't handle purecap correctly because the
    peepholes reserve a register in DImode:
    
    (define_peephole2
      [(match_scratch:DI 8 "r")
       ...
       (match_dup 8)]
    
    etc.  We need to coerce the register to the right base mode
    before using it.
    
    A later patch will handle LDP/STP for capability registers.
    
    There's an unrelated issue with the vectoriser dropping __capability.
    alt-base-pair-2.c works around that by disabling vectorisation.
    
    There should really be 5 zero stores in normal-base-pair-2.c;
    a later patch fixes this.

Diff:
---
 gcc/config/aarch64/aarch64.c                       | 24 +++++++++++----
 .../gcc.target/aarch64/morello/alt-base-pair-1.c   | 21 +++++++++++++
 .../gcc.target/aarch64/morello/alt-base-pair-2.c   | 24 +++++++++++++++
 .../gcc.target/aarch64/morello/alt-base-pair-3.c   | 35 ++++++++++++++++++++++
 .../aarch64/morello/normal-base-pair-1.c           | 21 +++++++++++++
 .../aarch64/morello/normal-base-pair-2.c           | 29 ++++++++++++++++++
 .../aarch64/morello/normal-base-pair-3.c           | 35 ++++++++++++++++++++++
 7 files changed, 183 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 72ba5f4507f..99a6e4169e1 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9811,6 +9811,9 @@ aarch64_classify_address (struct aarch64_address_info *info,
   else
     ldr_str_mode = mode;
 
+  if (alt_base_p && ldp_stp_mode != VOIDmode)
+    return false;
+
   bool allow_reg_index_p = (ldp_stp_mode == VOIDmode
 			    && (known_lt (GET_MODE_SIZE (mode), 16)
 				|| mode == CADImode
@@ -23304,7 +23307,7 @@ extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
       return true;
     }
 
-  if (GET_CODE (addr) == PLUS
+  if (any_plus_p (addr)
       && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
     {
       *base = XEXP (addr, 0);
@@ -23692,6 +23695,12 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
       if (MEM_VOLATILE_P (mem[i]))
 	return false;
 
+      /* The addres must use a normal rather than an alternative
+	 base register.  */
+      if (TARGET_CAPABILITY_HYBRID
+	  && CAPABILITY_MODE_P (mem_address_mode (mem[i])))
+	return false;
+
       /* Check if the addresses are in the form of [base+offset].  */
       extract_base_offset_in_addr (mem[i], base + i, offset + i);
       if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
@@ -23864,13 +23873,15 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
       || new_off_3 > stp_off_upper_limit || new_off_3 < stp_off_lower_limit)
     return false;
 
-  replace_equiv_address_nv (mem_1, plus_constant (Pmode, operands[8],
+  auto addr_mode = mem_address_mode (mem_1);
+  rtx new_base = gen_rtx_REG (addr_mode, REGNO (operands[8]));
+  replace_equiv_address_nv (mem_1, plus_constant (addr_mode, new_base,
 						  new_off_1), true);
-  replace_equiv_address_nv (mem_2, plus_constant (Pmode, operands[8],
+  replace_equiv_address_nv (mem_2, plus_constant (addr_mode, new_base,
 						  new_off_1 + msize), true);
-  replace_equiv_address_nv (mem_3, plus_constant (Pmode, operands[8],
+  replace_equiv_address_nv (mem_3, plus_constant (addr_mode, new_base,
 						  new_off_3), true);
-  replace_equiv_address_nv (mem_4, plus_constant (Pmode, operands[8],
+  replace_equiv_address_nv (mem_4, plus_constant (addr_mode, new_base,
 						  new_off_3 + msize), true);
 
   if (!aarch64_mem_pair_operand (mem_1, mode)
@@ -23916,7 +23927,8 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
     }
 
   /* Emit adjusting instruction.  */
-  emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, base_off)));
+  emit_insn (gen_rtx_SET (new_base, plus_constant (addr_mode,
+						   base, base_off)));
   /* Emit ldp/stp instructions.  */
   t1 = gen_rtx_SET (operands[0], operands[1]);
   t2 = gen_rtx_SET (operands[2], operands[3]);
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-1.c
new file mode 100644
index 00000000000..c137b9090f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-1.c
@@ -0,0 +1,21 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#include <arm_neon.h>
+
+void
+fpr (uint32x4_t *__capability res, uint32x2_t v0, uint32x2_t v1)
+{
+  *res = vcombine_u32 (v0, v1);
+}
+
+void
+gpr (uint32x4_t *__capability res)
+{
+  uint32x2_t v0, v1;
+  asm ("" : "=r" (v0), "=r" (v1));
+  *res = vcombine_u32 (v0, v1);
+}
+
+/* { dg-final { scan-assembler-not {\tstp\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c
new file mode 100644
index 00000000000..959f0b6fdeb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -mstrict-align -save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#include <arm_neon.h>
+
+#define TEST_TYPE(TYPE)					\
+  void							\
+  test_##TYPE (TYPE *__capability ptr, TYPE a, TYPE b)	\
+  {							\
+    ptr[0] = a;						\
+    ptr[1] = b;						\
+    ptr[2] = (TYPE) { 0 };				\
+    ptr[3] = (TYPE) { 0 };				\
+  }
+
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+TEST_TYPE (float)
+TEST_TYPE (double)
+TEST_TYPE (uint32x2_t)
+TEST_TYPE (uint32x4_t)
+
+/* { dg-final { scan-assembler-not {\tstp\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-3.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-3.c
new file mode 100644
index 00000000000..a900a66b969
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-3.c
@@ -0,0 +1,35 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#include <arm_neon.h>
+
+#define TEST_TYPE(TYPE)						\
+  void								\
+  test2_##TYPE (char *__capability vptr, TYPE a, TYPE b)	\
+  {								\
+    TYPE *__capability ptr = (TYPE *__capability) (vptr + 1);	\
+    ptr[0] = a;							\
+    ptr[1] = b;							\
+    ptr[2] = a;							\
+    ptr[3] = b;							\
+  }								\
+								\
+  void								\
+  test3_##TYPE (char *__capability vptr, TYPE a, TYPE b)	\
+  {								\
+    TYPE *__capability ptr = (TYPE *__capability) (vptr + 1);	\
+    ptr[0] = a;							\
+    ptr[1] = b;							\
+    ptr[2] = a;							\
+    ptr[3] = b;							\
+    ptr[4] = a;							\
+    ptr[5] = b;							\
+  }
+
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+TEST_TYPE (float)
+TEST_TYPE (double)
+
+/* { dg-final { scan-assembler-not {\tstp\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-1.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-1.c
new file mode 100644
index 00000000000..bcd693fa62d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-1.c
@@ -0,0 +1,21 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } { "" } }  */
+
+#include <arm_neon.h>
+
+void
+fpr (uint32x4_t *res, uint32x2_t v0, uint32x2_t v1)
+{
+  *res = vcombine_u32 (v0, v1);
+}
+
+void
+gpr (uint32x4_t *res)
+{
+  uint32x2_t v0, v1;
+  asm ("" : "=r" (v0), "=r" (v1));
+  *res = vcombine_u32 (v0, v1);
+}
+
+/* { dg-final { scan-assembler-times {\tstp\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c
new file mode 100644
index 00000000000..1bf9e852bc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c
@@ -0,0 +1,29 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -mstrict-align -save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } }  */
+
+#include <arm_neon.h>
+
+#define TEST_TYPE(TYPE)				\
+  void						\
+  test_##TYPE (TYPE *ptr, TYPE a, TYPE b)	\
+  {						\
+    ptr[0] = a;					\
+    ptr[1] = b;					\
+    ptr[2] = (TYPE) { 0 };			\
+    ptr[3] = (TYPE) { 0 };			\
+  }
+
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+TEST_TYPE (float)
+TEST_TYPE (double)
+TEST_TYPE (uint32x2_t)
+TEST_TYPE (uint32x4_t)
+
+/* { dg-final { scan-assembler-times {\tstp\tw[0-9]+,} 1 } } */
+/* { dg-final { scan-assembler-times {\tstp\tx[0-9]+,} 1 } } */
+/* { dg-final { scan-assembler-times {\tstp\ts[0-9]+,} 1 } } */
+/* { dg-final { scan-assembler-times {\tstp\td[0-9]+,} 2 } } */
+/* { dg-final { scan-assembler-times {\tstp\tq[0-9]+,} 2 } } */
+/* { dg-final { scan-assembler-times {\tstp\t[wx]zr,} 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-3.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-3.c
new file mode 100644
index 00000000000..a9bb13dd85d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-3.c
@@ -0,0 +1,35 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-fpeephole2 -fno-tree-vectorize -save-temps" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } }  */
+
+#include <arm_neon.h>
+
+#define TEST_TYPE(TYPE)				\
+  void						\
+  test2_##TYPE (char *vptr, TYPE a, TYPE b)	\
+  {						\
+    TYPE *ptr = (TYPE *) (vptr + 1);		\
+    ptr[0] = a;					\
+    ptr[1] = b;					\
+    ptr[2] = a;					\
+    ptr[3] = b;					\
+  }						\
+						\
+  void						\
+  test3_##TYPE (char *vptr, TYPE a, TYPE b)	\
+  {						\
+    TYPE *ptr = (TYPE *) (vptr + 1);		\
+    ptr[0] = a;					\
+    ptr[1] = b;					\
+    ptr[2] = a;					\
+    ptr[3] = b;					\
+    ptr[4] = a;					\
+    ptr[5] = b;					\
+  }
+
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+TEST_TYPE (float)
+TEST_TYPE (double)
+
+/* { dg-final { scan-assembler-times {\tstp\t} 16 } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-05 12:06 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-05 12:06 [gcc(refs/vendors/ARM/heads/morello)] aarch64: Fix LDP/STP handling for Morello Matthew Malcomson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).