[gcc r12-7137] aarch64: Generalise adjacency check for load_pair

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r12-7137] aarch64: Generalise adjacency check for load_pair_lanes
@ 2022-02-09 16:57 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2022-02-09 16:57 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:958448a9441ee54e012c67cfc3cf88083f3d0e4a

commit r12-7137-g958448a9441ee54e012c67cfc3cf88083f3d0e4a
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Wed Feb 9 16:57:03 2022 +0000

    aarch64: Generalise adjacency check for load_pair_lanes
    
    This patch generalises the load_pair_lanes<mode> guard so that
    it uses aarch64_check_consecutive_mems to check for consecutive
    mems.  It also allows the pattern to be used for STRICT_ALIGNMENT
    targets if the alignment is high enough.
    
    The main aim is to avoid an inline test, for the sake of a later patch
    that needs to repeat it.  Reusing aarch64_check_consecutive_mems seemed
    simpler than writing an entirely new function.
    
    gcc/
            * config/aarch64/aarch64-protos.h (aarch64_mergeable_load_pair_p):
            Declare.
            * config/aarch64/aarch64-simd.md (load_pair_lanes<mode>): Use
            aarch64_mergeable_load_pair_p instead of inline check.
            * config/aarch64/aarch64.cc (aarch64_expand_vector_init): Likewise.
            (aarch64_check_consecutive_mems): Allow the reversed parameter
            to be null.
            (aarch64_mergeable_load_pair_p): New function.

Diff:
---
 gcc/config/aarch64/aarch64-protos.h           |  1 +
 gcc/config/aarch64/aarch64-simd.md            |  7 +---
 gcc/config/aarch64/aarch64.cc                 | 54 +++++++++++++++++----------
 gcc/testsuite/gcc.target/aarch64/vec-init-6.c | 12 ++++++
 gcc/testsuite/gcc.target/aarch64/vec-init-7.c | 12 ++++++
 5 files changed, 62 insertions(+), 24 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 26368538a55..b75ed35635b 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1000,6 +1000,7 @@ void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
 int aarch64_ccmp_mode_to_code (machine_mode mode);
 
 bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
+bool aarch64_mergeable_load_pair_p (machine_mode, rtx, rtx);
 bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode);
 bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode);
 void aarch64_swap_ldrstr_operands (rtx *, bool);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 872a3d78269..c5bc2ea658b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4353,11 +4353,8 @@
 	(vec_concat:<VDBL>
 	   (match_operand:VDC 1 "memory_operand" "Utq")
 	   (match_operand:VDC 2 "memory_operand" "m")))]
-  "TARGET_SIMD && !STRICT_ALIGNMENT
-   && rtx_equal_p (XEXP (operands[2], 0),
-		   plus_constant (Pmode,
-				  XEXP (operands[1], 0),
-				  GET_MODE_SIZE (<MODE>mode)))"
+  "TARGET_SIMD
+   && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
   "ldr\\t%q0, %1"
   [(set_attr "type" "neon_load1_1reg_q")]
 )
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 296145e6008..c47543aebf3 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -21063,11 +21063,7 @@ aarch64_expand_vector_init (rtx target, rtx vals)
 		 for store_pair_lanes<mode>.  */
 	      if (memory_operand (x0, inner_mode)
 		  && memory_operand (x1, inner_mode)
-		  && !STRICT_ALIGNMENT
-		  && rtx_equal_p (XEXP (x1, 0),
-				  plus_constant (Pmode,
-						 XEXP (x0, 0),
-						 GET_MODE_SIZE (inner_mode))))
+		  && aarch64_mergeable_load_pair_p (mode, x0, x1))
 		{
 		  rtx t;
 		  if (inner_mode == DFmode)
@@ -24687,14 +24683,20 @@ aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
   return priority;
 }
 
-/* Check if *MEM1 and *MEM2 are consecutive memory references and,
+/* If REVERSED is null, return true if memory reference *MEM2 comes
+   immediately after memory reference *MEM1.  Do not change the references
+   in this case.
+
+   Otherwise, check if *MEM1 and *MEM2 are consecutive memory references and,
    if they are, try to make them use constant offsets from the same base
    register.  Return true on success.  When returning true, set *REVERSED
    to true if *MEM1 comes after *MEM2, false if *MEM1 comes before *MEM2.  */
 static bool
 aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed)
 {
-  *reversed = false;
+  if (reversed)
+    *reversed = false;
+
   if (GET_RTX_CLASS (GET_CODE (XEXP (*mem1, 0))) == RTX_AUTOINC
       || GET_RTX_CLASS (GET_CODE (XEXP (*mem2, 0))) == RTX_AUTOINC)
     return false;
@@ -24723,7 +24725,7 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed)
       if (known_eq (UINTVAL (offset1) + size1, UINTVAL (offset2)))
 	return true;
 
-      if (known_eq (UINTVAL (offset2) + size2, UINTVAL (offset1)))
+      if (known_eq (UINTVAL (offset2) + size2, UINTVAL (offset1)) && reversed)
 	{
 	  *reversed = true;
 	  return true;
@@ -24756,22 +24758,25 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed)
 
       if (known_eq (expr_offset1 + size1, expr_offset2))
 	;
-      else if (known_eq (expr_offset2 + size2, expr_offset1))
+      else if (known_eq (expr_offset2 + size2, expr_offset1) && reversed)
 	*reversed = true;
       else
 	return false;
 
-      if (base2)
+      if (reversed)
 	{
-	  rtx addr1 = plus_constant (Pmode, XEXP (*mem2, 0),
-				     expr_offset1 - expr_offset2);
-	  *mem1 = replace_equiv_address_nv (*mem1, addr1);
-	}
-      else
-	{
-	  rtx addr2 = plus_constant (Pmode, XEXP (*mem1, 0),
-				     expr_offset2 - expr_offset1);
-	  *mem2 = replace_equiv_address_nv (*mem2, addr2);
+	  if (base2)
+	    {
+	      rtx addr1 = plus_constant (Pmode, XEXP (*mem2, 0),
+					 expr_offset1 - expr_offset2);
+	      *mem1 = replace_equiv_address_nv (*mem1, addr1);
+	    }
+	  else
+	    {
+	      rtx addr2 = plus_constant (Pmode, XEXP (*mem1, 0),
+					 expr_offset2 - expr_offset1);
+	      *mem2 = replace_equiv_address_nv (*mem2, addr2);
+	    }
 	}
       return true;
     }
@@ -24779,6 +24784,17 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed)
   return false;
 }
 
+/* Return true if MEM1 and MEM2 can be combined into a single access
+   of mode MODE, with the combined access having the same address as MEM1.  */
+
+bool
+aarch64_mergeable_load_pair_p (machine_mode mode, rtx mem1, rtx mem2)
+{
+  if (STRICT_ALIGNMENT && MEM_ALIGN (mem1) < GET_MODE_ALIGNMENT (mode))
+    return false;
+  return aarch64_check_consecutive_mems (&mem1, &mem2, nullptr);
+}
+
 /* Given OPERANDS of consecutive load/store, check if we can merge
    them into ldp/stp.  LOAD is true if they are load instructions.
    MODE is the mode of memory operands.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-6.c b/gcc/testsuite/gcc.target/aarch64/vec-init-6.c
new file mode 100644
index 00000000000..96450157498
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-6.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+int64_t s64[2];
+float64_t f64[2];
+
+int64x2_t test_s64() { return (int64x2_t) { s64[0], s64[1] }; }
+float64x2_t test_f64() { return (float64x2_t) { f64[0], f64[1] }; }
+
+/* { dg-final { scan-assembler-not {\tins\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-7.c b/gcc/testsuite/gcc.target/aarch64/vec-init-7.c
new file mode 100644
index 00000000000..795895286db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mstrict-align" } */
+
+#include <arm_neon.h>
+
+int64_t s64[2] __attribute__((aligned(16)));
+float64_t f64[2] __attribute__((aligned(16)));
+
+int64x2_t test_s64() { return (int64x2_t) { s64[0], s64[1] }; }
+float64x2_t test_f64() { return (float64x2_t) { f64[0], f64[1] }; }
+
+/* { dg-final { scan-assembler-not {\tins\t} } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-02-09 16:57 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-09 16:57 [gcc r12-7137] aarch64: Generalise adjacency check for load_pair_lanes Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).