From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <matmal01@sourceware.org>
Received: by sourceware.org (Postfix, from userid 2049)
 id 5AD253856262; Thu,  5 May 2022 12:06:36 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 5AD253856262
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: Matthew Malcomson <matmal01@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/vendors/ARM/heads/morello)] aarch64: Optimise pairs of CADI
 accesses
X-Act-Checkin: gcc
X-Git-Author: Richard Sandiford <richard.sandiford@arm.com>
X-Git-Refname: refs/vendors/ARM/heads/morello
X-Git-Oldrev: e4c4aeb9d2987440cc6428cc910400789e83096f
X-Git-Newrev: 98a1d8bef1c8ffe320d2eda5b4cdd8de15786b28
Message-Id: <20220505120636.5AD253856262@sourceware.org>
Date: Thu,  5 May 2022 12:06:36 +0000 (GMT)
X-BeenThere: gcc-cvs@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-cvs mailing list <gcc-cvs.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-cvs>,
 <mailto:gcc-cvs-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-cvs/>
List-Help: <mailto:gcc-cvs-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-cvs>,
 <mailto:gcc-cvs-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Thu, 05 May 2022 12:06:36 -0000

https://gcc.gnu.org/g:98a1d8bef1c8ffe320d2eda5b4cdd8de15786b28

commit 98a1d8bef1c8ffe320d2eda5b4cdd8de15786b28
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Fri Apr 8 10:13:31 2022 +0100

    aarch64: Optimise pairs of CADI accesses
    
    After the previous two LDP/STP patches, this one adds support
    for optimising neighbouring LDR/STR Cns into LDP/STP Cns.
    Part of the point is to test that this doesn't/can't happen
    for alternative-base addresses.
    
    The tests require a couple of other fixes:
    
    - the fake movcadi pattern didn't allow direct stores of xzr
    
    - CONST_NULL had a higher cost than a register, so we tried
      to CSE multiple CONST_NULLs using a separate MOV.

Diff:
---
 gcc/config/aarch64/aarch64-morello.md                       |  2 +-
 gcc/config/aarch64/aarch64.c                                | 13 +++++++++++--
 gcc/config/aarch64/predicates.md                            |  5 +++--
 gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c  |  3 +++
 .../gcc.target/aarch64/morello/normal-base-pair-2.c         |  7 +++++--
 5 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-morello.md b/gcc/config/aarch64/aarch64-morello.md
index db3beac3836..3ca049c14c8 100644
--- a/gcc/config/aarch64/aarch64-morello.md
+++ b/gcc/config/aarch64/aarch64-morello.md
@@ -143,7 +143,7 @@
 ; TODO: many more alternatives.
 (define_insn "*movcadi_aarch64"
   [(set (match_operand:CADI 0 "nonimmediate_operand" "=rk,r,r,m,r,r")
-	(match_operand:CADI 1 "aarch64_mov_operand" "rk,Z,m,r,Usa,Ush"))]
+	(match_operand:CADI 1 "aarch64_mov_operand" "rk,Z,m,rZ,Usa,Ush"))]
   "TARGET_CAPABILITY_FAKE"
   "@
    mov\\t%0, %1
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 91431957f6a..1bb5cb6ce86 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -12902,6 +12902,10 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 	}
       return false;
 
+    case CONST_NULL:
+      *cost = 0;
+      return true;
+
     case CONST_INT:
       /* If an instruction can incorporate a constant within the
 	 instruction, the instruction's expression avoids calling
@@ -23493,7 +23497,9 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load)
   if (rclass_1 != rclass_2)
     return false;
 
-  if (msize == 16)
+  bool both_cap = (GET_MODE (mem_1) == CADImode
+		   && GET_MODE (mem_2) == CADImode);
+  if (msize == 16 && !both_cap)
     {
       /* Vector LDPs and STPs must use floating-point registers.  */
       if (rclass_1 != FP_REGS)
@@ -23700,6 +23706,7 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load)
   if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
     return false;
 
+  bool all_cap = true;
   for (int i = 0; i < num_insns; i++)
     {
       /* The mems cannot be volatile.  */
@@ -23720,6 +23727,8 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load)
       extract_base_offset_in_addr (mem[i], base + i, offset + i);
       if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
 	return false;
+
+      all_cap &= (GET_MODE (mem[i]) == CADImode);
     }
 
   /* Check if the registers are of same class.  */
@@ -23738,7 +23747,7 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load)
 	  return false;
       }
 
-  if (msize == 16)
+  if (msize == 16 && !all_cap)
     {
       /* Vector LDPs and STPs must use floating-point registers.  */
       if (rclass != FP_REGS)
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 0f5570d59a4..d9a419c1c83 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -492,7 +492,7 @@
        (match_test "op == CONST1_RTX (GET_MODE (op))")))
 
 (define_predicate "aarch64_simd_or_scalar_imm_zero"
-  (and (match_code "const_int,const_double,const,const_vector")
+  (and (match_code "const_int,const_double,const,const_vector,const_null")
        (match_test "op == CONST0_RTX (GET_MODE (op))")))
 
 (define_predicate "aarch64_simd_imm_minus_one"
@@ -500,7 +500,8 @@
        (match_test "op == CONSTM1_RTX (GET_MODE (op))")))
 
 (define_predicate "aarch64_simd_reg_or_zero"
-  (and (match_code "reg,subreg,const_int,const_double,const,const_vector")
+  (and (match_code "reg,subreg,const_int,const_double,const,const_vector,
+		    const_null")
        (ior (match_operand 0 "register_operand")
 	    (match_test "op == const0_rtx")
 	    (match_operand 0 "aarch64_simd_or_scalar_imm_zero"))))
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c
index 959f0b6fdeb..6ad18dda97c 100644
--- a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-pair-2.c
@@ -4,6 +4,8 @@
 
 #include <arm_neon.h>
 
+typedef __uintcap_t uintcap_t;
+
 #define TEST_TYPE(TYPE)					\
   void							\
   test_##TYPE (TYPE *__capability ptr, TYPE a, TYPE b)	\
@@ -16,6 +18,7 @@
 
 TEST_TYPE (uint32_t)
 TEST_TYPE (uint64_t)
+TEST_TYPE (uintcap_t);
 TEST_TYPE (float)
 TEST_TYPE (double)
 TEST_TYPE (uint32x2_t)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c
index 817fa5354f3..60e1e4dd8fd 100644
--- a/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/morello/normal-base-pair-2.c
@@ -4,6 +4,8 @@
 
 #include <arm_neon.h>
 
+typedef __uintcap_t uintcap_t;
+
 #define TEST_TYPE(TYPE)				\
   void						\
   test_##TYPE (TYPE *ptr, TYPE a, TYPE b)	\
@@ -16,14 +18,15 @@
 
 TEST_TYPE (uint32_t)
 TEST_TYPE (uint64_t)
+TEST_TYPE (uintcap_t);
 TEST_TYPE (float)
 TEST_TYPE (double)
 TEST_TYPE (uint32x2_t)
 TEST_TYPE (uint32x4_t)
 
 /* { dg-final { scan-assembler-times {\tstp\tw[0-9]+,} 1 } } */
-/* { dg-final { scan-assembler-times {\tstp\tx[0-9]+,} 1 } } */
+/* { dg-final { scan-assembler-times {\tstp\t[xc][0-9]+,} 2 } } */
 /* { dg-final { scan-assembler-times {\tstp\ts[0-9]+,} 1 } } */
 /* { dg-final { scan-assembler-times {\tstp\td[0-9]+,} 2 } } */
 /* { dg-final { scan-assembler-times {\tstp\tq[0-9]+,} 2 } } */
-/* { dg-final { scan-assembler-times {\tstp\t[wx]zr,} 5 } } */
+/* { dg-final { scan-assembler-times {\tstp\t[wxc]zr,} 6 } } */