public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ARM/heads/morello)] aarch64: Support alternative-base TI loads & stores
@ 2022-05-05 12:06 Matthew Malcomson
  0 siblings, 0 replies; only message in thread
From: Matthew Malcomson @ 2022-05-05 12:06 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1bfedd782d0d3285d3c5fdd7bfd506a7f1438bad

commit 1bfedd782d0d3285d3c5fdd7bfd506a7f1438bad
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Fri Apr 8 12:29:21 2022 +0100

    aarch64: Support alternative-base TI loads & stores
    
    Loading or storing a GPR TImode value at a normal-base address uses
    an LDP or STP of two DIs, but that possibility isn't available for
    alternative-base addresses.  We have to split into two individual DI
    loads and stores instead, just like we have to split a TI GPR->GPR
    register move into two DI moves.
    
    The changes needed are:
    
    - Add a third way of analysing an address: as the first in a
      sequence of split LDR/STR instructions.
    
    - Divide the movti GPR memory alternatives into two: one with
      normal-base addresses (UAn) that behave as previously and one
      with alternative-base addresses (UAa) that need to be split.
    
    - Generalise the existing TI splitters to handle loads and stores.
      The main changes here are:
    
      - Tighten the check for when a 128-bit move needs to be split,
        so that it can handle general operands.
    
      - Use the more general simplify_gen_subreg instead of
        gen_low/highpart.

Diff:
---
 gcc/config/aarch64/aarch64-protos.h                |   2 +
 gcc/config/aarch64/aarch64.c                       |  80 +++++++-
 gcc/config/aarch64/aarch64.md                      |  20 +-
 gcc/config/aarch64/constraints.md                  |  10 +
 gcc/config/aarch64/predicates.md                   |  10 +
 .../aarch64/morello/alt-base-load-ti-1.c           | 210 +++++++++++++++++++++
 .../aarch64/morello/alt-base-store-ti-1.c          | 210 +++++++++++++++++++++
 .../aarch64/morello/alt-base-store-ti-2.c          | 123 ++++++++++++
 8 files changed, 647 insertions(+), 18 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 2e6edb3d9a4..1a6588826fa 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -582,6 +582,8 @@ const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr_rtx (void);
 rtx aarch64_return_addr (int, rtx);
 rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
+bool aarch64_alt_base_address_p (machine_mode, rtx);
+bool aarch64_normal_base_address_p (machine_mode, rtx);
 bool aarch64_ldr_or_alt_ldur_address_p (machine_mode, rtx);
 bool aarch64_simd_mem_operand_p (rtx);
 bool aarch64_sve_ld1r_operand_p (rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 03e16ca365d..fdc1015c0c9 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3689,10 +3689,13 @@ aarch64_split_128bit_move (rtx dst, rtx src)
 	}
     }
 
-  dst_lo = gen_lowpart (word_mode, dst);
-  dst_hi = gen_highpart (word_mode, dst);
-  src_lo = gen_lowpart (word_mode, src);
-  src_hi = gen_highpart_mode (word_mode, mode, src);
+  auto lo_offset = subreg_lowpart_offset (word_mode, mode);
+  auto hi_offset = subreg_highpart_offset (word_mode, mode);
+
+  dst_lo = simplify_gen_subreg (word_mode, dst, mode, lo_offset);
+  dst_hi = simplify_gen_subreg (word_mode, dst, mode, hi_offset);
+  src_lo = simplify_gen_subreg (word_mode, src, mode, lo_offset);
+  src_hi = simplify_gen_subreg (word_mode, src, mode, hi_offset);
 
   /* At most one pairing may overlap.  */
   if (reg_overlap_mentioned_p (dst_lo, src_hi))
@@ -3710,8 +3713,24 @@ aarch64_split_128bit_move (rtx dst, rtx src)
 bool
 aarch64_split_128bit_move_p (rtx dst, rtx src)
 {
-  return (! REG_P (src)
-	  || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
+  machine_mode mode = GET_MODE (dst);
+
+  if (REG_P (dst)
+      && GP_REGNUM_P (REGNO (dst))
+      && !aarch64_normal_base_mem_operand (src, mode))
+    return true;
+
+  if (REG_P (src)
+      && GP_REGNUM_P (REGNO (src))
+      && !aarch64_normal_base_mem_operand (dst, mode))
+    return true;
+
+  if (src == CONST0_RTX (mode)
+      && MEM_P (dst)
+      && !aarch64_normal_base_mem_operand (dst, mode))
+    return true;
+
+  return false;
 }
 
 /* Split a complex SIMD combine.  */
@@ -9739,10 +9758,11 @@ aarch64_classify_address (struct aarch64_address_info *info,
 
   bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
 
-  /* Classify the access as up to two of the following:
+  /* Classify the access as up to three of the following:
 
      - a sequence of LDPs or STPs
      - a single LDR or STR
+     - a sequence of LDRs or STRs, splitting the full access
 
      The LDR/STR can overlap the LDPs/STPs or come after them.
 
@@ -9750,9 +9770,13 @@ aarch64_classify_address (struct aarch64_address_info *info,
      pairs, with each loaded or stored register having mode LDP_STP_MODE.
 
      If LDR_STR_MODE is not VOIDmode, require a valid LDR/STR of that
-     mode at offset LDR_STR_OFFSET from the start of MODE.  */
+     mode at offset LDR_STR_OFFSET from the start of MODE.
+
+     If SPLIT_MODE is not VOIDmode, require a valid LDR/STR sequence
+     of that mode, with the sequence covering the whole of MODE.  */
   machine_mode ldp_stp_mode = VOIDmode;
   machine_mode ldr_str_mode = VOIDmode;
+  machine_mode split_mode = VOIDmode;
   unsigned int num_ldp_stp = 1;
   poly_int64 ldr_str_offset = 0;
   if (type == ADDR_QUERY_LDP_STP)
@@ -9783,7 +9807,10 @@ aarch64_classify_address (struct aarch64_address_info *info,
      We conservatively require an offset representable in either mode.  */
   else if (mode == TImode || mode == TFmode)
     {
-      ldp_stp_mode = DImode;
+      if (alt_base_p)
+	split_mode = DImode;
+      else
+	ldp_stp_mode = DImode;
       ldr_str_mode = mode;
     }
   /* On BE, we use load/store pair for multi-vector load/stores.  */
@@ -9805,6 +9832,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
     return false;
 
   bool allow_reg_index_p = (ldp_stp_mode == VOIDmode
+			    && split_mode == VOIDmode
 			    && (known_lt (GET_MODE_SIZE (mode), 16)
 				|| mode == CADImode
 				|| vec_flags == VEC_ADVSIMD
@@ -9899,6 +9927,20 @@ aarch64_classify_address (struct aarch64_address_info *info,
 		  return false;
 	      }
 
+	  if (split_mode != VOIDmode)
+	    {
+	      unsigned int num_split
+		= exact_div (GET_MODE_SIZE (mode),
+			     GET_MODE_SIZE (split_mode)).to_constant ();
+	      for (unsigned int i = 0; i < num_split; ++i)
+		{
+		  auto suboffset = offset + i * GET_MODE_SIZE (split_mode);
+		  if (!aarch64_valid_ldr_str_offset_p (split_mode, alt_base_p,
+						       suboffset, type))
+		    return false;
+		}
+	    }
+
 	  if (ldr_str_mode != VOIDmode
 	      && !aarch64_valid_ldr_str_offset_p (ldr_str_mode, alt_base_p,
 						  offset + ldr_str_offset,
@@ -19366,6 +19408,26 @@ aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
   return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
 }
 
+/* Return true if X is a valid address for mode MODE and if it has
+   an alternative base register.  */
+
+bool
+aarch64_alt_base_address_p (machine_mode mode, rtx x)
+{
+  struct aarch64_address_info addr;
+  return aarch64_classify_address (&addr, x, mode, false) && addr.alt_base_p;
+}
+
+/* Return true if X is a valid address for mode MODE and if it has
+   a normal (as opposed to alternative) base register.  */
+
+bool
+aarch64_normal_base_address_p (machine_mode mode, rtx x)
+{
+  struct aarch64_address_info addr;
+  return aarch64_classify_address (&addr, x, mode, false) && !addr.alt_base_p;
+}
+
 /* Return true if X is either:
 
    - a valid normal-base memory address for an LDR of mode MODE
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 93a68f848ba..31f39ec0f3a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1392,9 +1392,9 @@
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-	 "nonimmediate_operand" "=   r,w, r,w,r,m,m,w,m")
+	 "nonimmediate_operand" "=   r,w, r,w,  r,UAn,UAn,  r,UAa,w,m")
 	(match_operand:TI 1
-	 "aarch64_mov_operand"  " rUti,r, w,w,m,r,Z,m,w"))]
+	 "aarch64_mov_operand"  " rUti,r, w,w,UAn,  r,  Z,UAa, rZ,m,w"))]
   "(register_operand (operands[0], TImode)
     || aarch64_reg_or_zero (operands[1], TImode))"
   "@
@@ -1405,21 +1405,23 @@
    ldp\\t%0, %H0, %1
    stp\\t%1, %H1, %0
    stp\\txzr, xzr, %0
+   #
+   #
    ldr\\t%q0, %1
    str\\t%q1, %0"
   [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
 		             load_16,store_16,store_16,\
+		             load_16,store_16,\
                              load_16,store_16")
-   (set_attr "length" "8,8,8,4,4,4,4,4,4")
-   (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")]
+   (set_attr "length" "8,8,8,4,4,4,4,8,8,4,4")
+   (set_attr "arch" "*,*,*,simd,*,*,*,*,*,fp,fp")]
 )
 
-;; Split a TImode register-register or register-immediate move into
-;; its component DImode pieces, taking care to handle overlapping
-;; source and dest registers.
+;; Split a TImode GPR move into its component DImode pieces, taking
+;; care to handle overlapping source and dest registers.
 (define_split
-   [(set (match_operand:TI 0 "register_operand" "")
-	 (match_operand:TI 1 "aarch64_reg_or_imm" ""))]
+   [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	 (match_operand:TI 1 "aarch64_mov_operand" ""))]
   "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
   [(const_int 0)]
 {
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 579c89dd4f5..42d92a3bb97 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -373,6 +373,16 @@
    LD[234] and ST[234] patterns)."
   (match_operand 0 "aarch64_sve_struct_memory_operand"))
 
+(define_memory_constraint "UAa"
+  "@internal
+   A general memory operand with an alternative base register"
+  (match_operand 0 "aarch64_alt_base_mem_operand"))
+
+(define_memory_constraint "UAn"
+  "@internal
+   A general memory operand with a normal base register"
+  (match_operand 0 "aarch64_normal_base_mem_operand"))
+
 (define_memory_constraint "UAu"
   "@internal
    Either a general memory operand with a normal base register or
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index e4b3796d0b4..ceca71dc292 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -250,6 +250,16 @@
        (match_test "INTVAL (op) != 0
 		    && (unsigned) exact_log2 (INTVAL (op)) < 64")))
 
+(define_predicate "aarch64_alt_base_mem_operand"
+  (and (match_code "mem")
+       (match_test "aarch64_alt_base_address_p (GET_MODE (op),
+						XEXP (op, 0))")))
+
+(define_predicate "aarch64_normal_base_mem_operand"
+  (and (match_code "mem")
+       (match_test "aarch64_normal_base_address_p (GET_MODE (op),
+						   XEXP (op, 0))")))
+
 (define_predicate "aarch64_ldr_or_alt_ldur_operand"
   (and (match_code "mem")
        (match_test "aarch64_ldr_or_alt_ldur_address_p (GET_MODE (op),
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-ti-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-ti-1.c
new file mode 100644
index 00000000000..c8ddfdf3b09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-ti-1.c
@@ -0,0 +1,210 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+typedef unsigned int ti __attribute__((mode(TI)));
+
+/*
+** load_x10_ti_m257:
+**	sub	(c[0-9]+), c0, #257
+**	ldr	x10, \[\1\]
+**	ldr	x11, \[\1, #?8\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, m257)
+
+/*
+** load_x10_ti_m256:
+**	ldr	x10, \[c0, #?-256\]
+**	ldr	x11, \[c0, #?-248\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, m256)
+
+/*
+** load_x10_ti_m255:
+**	ldr	x10, \[c0, #?-255\]
+**	ldr	x11, \[c0, #?-247\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, m255)
+
+/*
+** load_x10_ti_m1:
+**	ldr	x10, \[c0, #?-1\]
+**	ldr	x11, \[c0, #?7\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, m1)
+
+/*
+** load_x10_ti_1:
+**	ldr	x10, \[c0, #?1\]
+**	ldr	x11, \[c0, #?9\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, 1)
+
+/*
+** load_x10_ti_247:
+**	ldr	x10, \[c0, #?247\]
+**	ldr	x11, \[c0, #?255\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, 247)
+
+/*
+** load_x10_ti_248:
+**	ldr	x10, \[c0, #?248\]
+**	ldr	x11, \[c0, #?256\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, 248)
+
+/*
+** load_x10_ti_249:
+**	add	(c[0-9]+), c0, #?249
+**	ldr	x10, \[\1\]
+**	ldr	x11, \[\1, #?8\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, 249)
+
+/*
+** load_x10_ti_256:
+**	add	(c[0-9]+), c0, #?256
+**	ldr	x10, \[\1\]
+**	ldr	x11, \[\1, #?8\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, 256)
+
+/*
+** load_x10_ti_511:
+**	add	(c[0-9]+), c0, #?511
+**	ldr	x10, \[\1\]
+**	ldr	x11, \[\1, #?8\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, 511)
+
+/*
+** load_x10_ti_512:
+**	add	(c[0-9]+), c0, #?512
+**	ldr	x10, \[\1\]
+**	ldr	x11, \[\1, #?8\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, ti, 512)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+LOAD_REG_INDEX (x10, ti, int32_t, 1)
+LOAD_REG_INDEX (x10, ti, uint32_t, 1)
+LOAD_REG_INDEX (x10, ti, uint64_t, 1)
+
+LOAD_REG_INDEX (x10, ti, int32_t, 2)
+LOAD_REG_INDEX (x10, ti, uint32_t, 2)
+LOAD_REG_INDEX (x10, ti, uint64_t, 2)
+
+LOAD_REG_INDEX (x10, ti, int32_t, 4)
+LOAD_REG_INDEX (x10, ti, uint32_t, 4)
+LOAD_REG_INDEX (x10, ti, uint64_t, 4)
+
+LOAD_REG_INDEX (x10, ti, int32_t, 8)
+LOAD_REG_INDEX (x10, ti, uint32_t, 8)
+LOAD_REG_INDEX (x10, ti, uint64_t, 8)
+
+LOAD_REG_INDEX (x10, ti, int32_t, 16)
+LOAD_REG_INDEX (x10, ti, uint32_t, 16)
+LOAD_REG_INDEX (x10, ti, uint64_t, 16)
+
+/*
+** load_q20_ti_m257:
+**	sub	(c[0-9]+), c0, #257
+**	ldr	q20, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, ti, m257)
+
+/*
+** load_q20_ti_m256:
+**	ldr	q20, \[c0, #?-256\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, ti, m256)
+
+/*
+** load_q20_ti_m255:
+**	ldr	q20, \[c0, #?-255\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, ti, m255)
+
+/*
+** load_q20_ti_m1:
+**	ldr	q20, \[c0, #?-1\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, ti, m1)
+
+/*
+** load_q20_ti_1:
+**	ldr	q20, \[c0, #?1\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, ti, 1)
+
+/*
+** load_q20_ti_247:
+**	ldr	q20, \[c0, #?247\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, ti, 247)
+
+/*
+** load_q20_ti_248:
+**	ldr	q20, \[c0, #?248\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, ti, 248)
+
+/*
+** load_q20_ti_249:
+**	add	(c[0-9]+), c0, #?249
+**	ldr	q20, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, ti, 249)
+
+/*
+** load_q20_ti_256:
+**	add	(c[0-9]+), c0, #?256
+**	ldr	q20, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, ti, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+LOAD_REG_INDEX (q20, ti, int32_t, 1)
+LOAD_REG_INDEX (q20, ti, uint32_t, 1)
+LOAD_REG_INDEX (q20, ti, uint64_t, 1)
+
+LOAD_REG_INDEX (q20, ti, int32_t, 2)
+LOAD_REG_INDEX (q20, ti, uint32_t, 2)
+LOAD_REG_INDEX (q20, ti, uint64_t, 2)
+
+LOAD_REG_INDEX (q20, ti, int32_t, 4)
+LOAD_REG_INDEX (q20, ti, uint32_t, 4)
+LOAD_REG_INDEX (q20, ti, uint64_t, 4)
+
+LOAD_REG_INDEX (q20, ti, int32_t, 8)
+LOAD_REG_INDEX (q20, ti, uint32_t, 8)
+LOAD_REG_INDEX (q20, ti, uint64_t, 8)
+
+LOAD_REG_INDEX (q20, ti, int32_t, 16)
+LOAD_REG_INDEX (q20, ti, uint32_t, 16)
+LOAD_REG_INDEX (q20, ti, uint64_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-1.c
new file mode 100644
index 00000000000..e599bed009a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-1.c
@@ -0,0 +1,210 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+typedef unsigned int ti __attribute__((mode(TI)));
+
+/*
+** store_x10_ti_m257:
+**	sub	(c[0-9]+), c0, #257
+**	str	x10, \[\1\]
+**	str	x11, \[\1, #?8\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, m257)
+
+/*
+** store_x10_ti_m256:
+**	str	x10, \[c0, #?-256\]
+**	str	x11, \[c0, #?-248\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, m256)
+
+/*
+** store_x10_ti_m255:
+**	str	x10, \[c0, #?-255\]
+**	str	x11, \[c0, #?-247\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, m255)
+
+/*
+** store_x10_ti_m1:
+**	str	x10, \[c0, #?-1\]
+**	str	x11, \[c0, #?7\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, m1)
+
+/*
+** store_x10_ti_1:
+**	str	x10, \[c0, #?1\]
+**	str	x11, \[c0, #?9\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, 1)
+
+/*
+** store_x10_ti_247:
+**	str	x10, \[c0, #?247\]
+**	str	x11, \[c0, #?255\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, 247)
+
+/*
+** store_x10_ti_248:
+**	str	x10, \[c0, #?248\]
+**	str	x11, \[c0, #?256\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, 248)
+
+/*
+** store_x10_ti_249:
+**	add	(c[0-9]+), c0, #?249
+**	str	x10, \[\1\]
+**	str	x11, \[\1, #?8\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, 249)
+
+/*
+** store_x10_ti_256:
+**	add	(c[0-9]+), c0, #?256
+**	str	x10, \[\1\]
+**	str	x11, \[\1, #?8\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, 256)
+
+/*
+** store_x10_ti_511:
+**	add	(c[0-9]+), c0, #?511
+**	str	x10, \[\1\]
+**	str	x11, \[\1, #?8\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, 511)
+
+/*
+** store_x10_ti_512:
+**	add	(c[0-9]+), c0, #?512
+**	str	x10, \[\1\]
+**	str	x11, \[\1, #?8\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, ti, 512)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+STORE_REG_INDEX (x10, ti, int32_t, 1)
+STORE_REG_INDEX (x10, ti, uint32_t, 1)
+STORE_REG_INDEX (x10, ti, uint64_t, 1)
+
+STORE_REG_INDEX (x10, ti, int32_t, 2)
+STORE_REG_INDEX (x10, ti, uint32_t, 2)
+STORE_REG_INDEX (x10, ti, uint64_t, 2)
+
+STORE_REG_INDEX (x10, ti, int32_t, 4)
+STORE_REG_INDEX (x10, ti, uint32_t, 4)
+STORE_REG_INDEX (x10, ti, uint64_t, 4)
+
+STORE_REG_INDEX (x10, ti, int32_t, 8)
+STORE_REG_INDEX (x10, ti, uint32_t, 8)
+STORE_REG_INDEX (x10, ti, uint64_t, 8)
+
+STORE_REG_INDEX (x10, ti, int32_t, 16)
+STORE_REG_INDEX (x10, ti, uint32_t, 16)
+STORE_REG_INDEX (x10, ti, uint64_t, 16)
+
+/*
+** store_q20_ti_m257:
+**	sub	(c[0-9]+), c0, #257
+**	str	q20, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, ti, m257)
+
+/*
+** store_q20_ti_m256:
+**	str	q20, \[c0, #?-256\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, ti, m256)
+
+/*
+** store_q20_ti_m255:
+**	str	q20, \[c0, #?-255\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, ti, m255)
+
+/*
+** store_q20_ti_m1:
+**	str	q20, \[c0, #?-1\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, ti, m1)
+
+/*
+** store_q20_ti_1:
+**	str	q20, \[c0, #?1\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, ti, 1)
+
+/*
+** store_q20_ti_247:
+**	str	q20, \[c0, #?247\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, ti, 247)
+
+/*
+** store_q20_ti_248:
+**	str	q20, \[c0, #?248\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, ti, 248)
+
+/*
+** store_q20_ti_249:
+**	add	(c[0-9]+), c0, #?249
+**	str	q20, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, ti, 249)
+
+/*
+** store_q20_ti_256:
+**	add	(c[0-9]+), c0, #?256
+**	str	q20, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, ti, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+STORE_REG_INDEX (q20, ti, int32_t, 1)
+STORE_REG_INDEX (q20, ti, uint32_t, 1)
+STORE_REG_INDEX (q20, ti, uint64_t, 1)
+
+STORE_REG_INDEX (q20, ti, int32_t, 2)
+STORE_REG_INDEX (q20, ti, uint32_t, 2)
+STORE_REG_INDEX (q20, ti, uint64_t, 2)
+
+STORE_REG_INDEX (q20, ti, int32_t, 4)
+STORE_REG_INDEX (q20, ti, uint32_t, 4)
+STORE_REG_INDEX (q20, ti, uint64_t, 4)
+
+STORE_REG_INDEX (q20, ti, int32_t, 8)
+STORE_REG_INDEX (q20, ti, uint32_t, 8)
+STORE_REG_INDEX (q20, ti, uint64_t, 8)
+
+STORE_REG_INDEX (q20, ti, int32_t, 16)
+STORE_REG_INDEX (q20, ti, uint32_t, 16)
+STORE_REG_INDEX (q20, ti, uint64_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-2.c
new file mode 100644
index 00000000000..865cccaf998
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-2.c
@@ -0,0 +1,123 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+typedef unsigned int ti __attribute__((mode(TI)));
+
+/*
+** store_zero_ti_m257:
+**	sub	(c[0-9]+), c0, #257
+**	str	xzr, \[\1\]
+**	str	xzr, \[\1, #?8\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, m257)
+
+/*
+** store_zero_ti_m256:
+**	str	xzr, \[c0, #?-256\]
+**	str	xzr, \[c0, #?-248\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, m256)
+
+/*
+** store_zero_ti_m255:
+**	str	xzr, \[c0, #?-255\]
+**	str	xzr, \[c0, #?-247\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, m255)
+
+/*
+** store_zero_ti_m1:
+**	str	xzr, \[c0, #?-1\]
+**	str	xzr, \[c0, #?7\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, m1)
+
+/*
+** store_zero_ti_1:
+**	str	xzr, \[c0, #?1\]
+**	str	xzr, \[c0, #?9\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, 1)
+
+/*
+** store_zero_ti_247:
+**	str	xzr, \[c0, #?247\]
+**	str	xzr, \[c0, #?255\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, 247)
+
+/*
+** store_zero_ti_248:
+**	str	xzr, \[c0, #?248\]
+**	str	xzr, \[c0, #?256\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, 248)
+
+/*
+** store_zero_ti_249:
+**	add	(c[0-9]+), c0, #?249
+**	str	xzr, \[\1\]
+**	str	xzr, \[\1, #?8\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, 249)
+
+/*
+** store_zero_ti_256:
+**	add	(c[0-9]+), c0, #?256
+**	str	xzr, \[\1\]
+**	str	xzr, \[\1, #?8\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, 256)
+
+/*
+** store_zero_ti_511:
+**	add	(c[0-9]+), c0, #?511
+**	str	xzr, \[\1\]
+**	str	xzr, \[\1, #?8\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, 511)
+
+/*
+** store_zero_ti_512:
+**	add	(c[0-9]+), c0, #?512
+**	str	xzr, \[\1\]
+**	str	xzr, \[\1, #?8\]
+**	ret
+*/
+STORE_ZERO_OFFSET (ti, 512)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+STORE_ZERO_INDEX (ti, int32_t, 1)
+STORE_ZERO_INDEX (ti, uint32_t, 1)
+STORE_ZERO_INDEX (ti, uint64_t, 1)
+
+STORE_ZERO_INDEX (ti, int32_t, 2)
+STORE_ZERO_INDEX (ti, uint32_t, 2)
+STORE_ZERO_INDEX (ti, uint64_t, 2)
+
+STORE_ZERO_INDEX (ti, int32_t, 4)
+STORE_ZERO_INDEX (ti, uint32_t, 4)
+STORE_ZERO_INDEX (ti, uint64_t, 4)
+
+STORE_ZERO_INDEX (ti, int32_t, 8)
+STORE_ZERO_INDEX (ti, uint32_t, 8)
+STORE_ZERO_INDEX (ti, uint64_t, 8)
+
+STORE_ZERO_INDEX (ti, int32_t, 16)
+STORE_ZERO_INDEX (ti, uint32_t, 16)
+STORE_ZERO_INDEX (ti, uint64_t, 16)


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-05 12:06 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-05 12:06 [gcc(refs/vendors/ARM/heads/morello)] aarch64: Support alternative-base TI loads & stores Matthew Malcomson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).