[gcc(refs/vendors/ARM/heads/morello)] aarch64: Optimise pairs of CADI accesses

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/vendors/ARM/heads/morello)] aarch64: Optimise pairs of CADI accesses
@ 2022-05-05 12:06 Matthew Malcomson
  0 siblings, 0 replies; only message in thread
From: Matthew Malcomson @ 2022-05-05 12:06 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:98a1d8bef1c8ffe320d2eda5b4cdd8de15786b28

commit 98a1d8bef1c8ffe320d2eda5b4cdd8de15786b28
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Fri Apr 8 10:13:31 2022 +0100

    aarch64: Optimise pairs of CADI accesses
    
    After the previous two LDP/STP patches, this one adds support
    for optimising neighbouring LDR/STR Cns into LDP/STP Cns.
    Part of the point is to test that this doesn't/can't happen
    for alternative-base addresses.
    
    The tests require a couple of other fixes:
    
    - the fake movcadi pattern didn't allow direct stores of xzr
    
    - CONST_NULL had a higher cost than a register, so we tried
      to CSE multiple CONST_NULLs using a separate MOV.

Diff:
---
 gcc/config/aarch64/aarch64-morello.md                       |  2 +-
 gcc/config/aarch64/aarch64.c                                | 13 +++++++++++--
 gcc/config/aarch64/predicates.md                            |  5 +++--
 gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c  |  3 +++
 .../gcc.target/aarch64/morello/normal-base-pair-2.c         |  7 +++++--
 5 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-morello.md b/gcc/config/aarch64/aarch64-morello.md
index db3beac3836..3ca049c14c8 100644
--- a/gcc/config/aarch64/aarch64-morello.md
+++ b/gcc/config/aarch64/aarch64-morello.md
@@ -143,7 +143,7 @@
 ; TODO: many more alternatives.
 (define_insn "*movcadi_aarch64"
   [(set (match_operand:CADI 0 "nonimmediate_operand" "=rk,r,r,m,r,r")
-	(match_operand:CADI 1 "aarch64_mov_operand" "rk,Z,m,r,Usa,Ush"))]
+	(match_operand:CADI 1 "aarch64_mov_operand" "rk,Z,m,rZ,Usa,Ush"))]
   "TARGET_CAPABILITY_FAKE"
   "@
    mov\\t%0, %1
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 91431957f6a..1bb5cb6ce86 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -12902,6 +12902,10 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 	}
       return false;
 
+    case CONST_NULL:
+      *cost = 0;
+      return true;
+
     case CONST_INT:
       /* If an instruction can incorporate a constant within the
 	 instruction, the instruction's expression avoids calling
@@ -23493,7 +23497,9 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load)
   if (rclass_1 != rclass_2)
     return false;
 
-  if (msize == 16)
+  bool both_cap = (GET_MODE (mem_1) == CADImode
+		   && GET_MODE (mem_2) == CADImode);
+  if (msize == 16 && !both_cap)
     {
       /* Vector LDPs and STPs must use floating-point registers.  */
       if (rclass_1 != FP_REGS)
@@ -23700,6 +23706,7 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load)
   if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
     return false;
 
+  bool all_cap = true;
   for (int i = 0; i < num_insns; i++)
     {
       /* The mems cannot be volatile.  */
@@ -23720,6 +23727,8 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load)
       extract_base_offset_in_addr (mem[i], base + i, offset + i);
       if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
 	return false;
+
+      all_cap &= (GET_MODE (mem[i]) == CADImode);
     }
 
   /* Check if the registers are of same class.  */
@@ -23738,7 +23747,7 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load)
 	  return false;
       }
 
-  if (msize == 16)
+  if (msize == 16 && !all_cap)
     {
       /* Vector LDPs and STPs must use floating-point registers.  */
       if (rclass != FP_REGS)
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 0f5570d59a4..d9a419c1c83 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -492,7 +492,7 @@
        (match_test "op == CONST1_RTX (GET_MODE (op))")))
 
 (define_predicate "aarch64_simd_or_scalar_imm_zero"
-  (and (match_code "const_int,const_double,const,const_vector")
+  (and (match_code "const_int,const_double,const,const_vector,const_null")
        (match_test "op == CONST0_RTX (GET_MODE (op))")))
 
 (define_predicate "aarch64_simd_imm_minus_one"
@@ -500,7 +500,8 @@
        (match_test "op == CONSTM1_RTX (GET_MODE (op))")))
 
 (define_predicate "aarch64_simd_reg_or_zero"
-  (and (match_code "reg,subreg,const_int,const_double,const,const_vector")
+  (and (match_code "reg,subreg,const_int,const_double,const,const_vector,
+		    const_null")
        (ior (match_operand 0 "register_operand")
 	    (match_test "op == const0_rtx")
 	    (match_operand 0 "aarch64_simd_or_scalar_imm_zero"))))
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c
index 959f0b6fdeb..6ad18dda97c 100644
--- a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c
@@ -4,6 +4,8 @@
 
 #include <arm_neon.h>
 
+typedef __uintcap_t uintcap_t;
+
 #define TEST_TYPE(TYPE)					\
   void							\
   test_##TYPE (TYPE *__capability ptr, TYPE a, TYPE b)	\
@@ -16,6 +18,7 @@
 
 TEST_TYPE (uint32_t)
 TEST_TYPE (uint64_t)
+TEST_TYPE (uintcap_t);
 TEST_TYPE (float)
 TEST_TYPE (double)
 TEST_TYPE (uint32x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c
index 817fa5354f3..60e1e4dd8fd 100644
--- a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c
@@ -4,6 +4,8 @@
 
 #include <arm_neon.h>
 
+typedef __uintcap_t uintcap_t;
+
 #define TEST_TYPE(TYPE)				\
   void						\
   test_##TYPE (TYPE *ptr, TYPE a, TYPE b)	\
@@ -16,14 +18,15 @@
 
 TEST_TYPE (uint32_t)
 TEST_TYPE (uint64_t)
+TEST_TYPE (uintcap_t);
 TEST_TYPE (float)
 TEST_TYPE (double)
 TEST_TYPE (uint32x2_t)
 TEST_TYPE (uint32x4_t)
 
 /* { dg-final { scan-assembler-times {\tstp\tw[0-9]+,} 1 } } */
-/* { dg-final { scan-assembler-times {\tstp\tx[0-9]+,} 1 } } */
+/* { dg-final { scan-assembler-times {\tstp\t[xc][0-9]+,} 2 } } */
 /* { dg-final { scan-assembler-times {\tstp\ts[0-9]+,} 1 } } */
 /* { dg-final { scan-assembler-times {\tstp\td[0-9]+,} 2 } } */
 /* { dg-final { scan-assembler-times {\tstp\tq[0-9]+,} 2 } } */
-/* { dg-final { scan-assembler-times {\tstp\t[wx]zr,} 5 } } */
+/* { dg-final { scan-assembler-times {\tstp\t[wxc]zr,} 6 } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-05 12:06 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-05 12:06 [gcc(refs/vendors/ARM/heads/morello)] aarch64: Optimise pairs of CADI accesses Matthew Malcomson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).