public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ARM/heads/morello)] aarch64: Support alternative-base TI loads & stores
@ 2022-05-05 12:06 Matthew Malcomson
0 siblings, 0 replies; only message in thread
From: Matthew Malcomson @ 2022-05-05 12:06 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:1bfedd782d0d3285d3c5fdd7bfd506a7f1438bad
commit 1bfedd782d0d3285d3c5fdd7bfd506a7f1438bad
Author: Richard Sandiford <richard.sandiford@arm.com>
Date: Fri Apr 8 12:29:21 2022 +0100
aarch64: Support alternative-base TI loads & stores
Loading or storing a GPR TImode value at a normal-base address uses
an LDP or STP of two DIs, but that possibility isn't available for
alternative-base addresses. We have to split into two individual DI
loads and stores instead, just like we have to split a TI GPR->GPR
register move into two DI moves.
The changes needed are:
- Add a third way of analysing an address: as the first in a
sequence of split LDR/STR instructions.
- Divide the movti GPR memory alternatives into two: one with
normal-base addresses (UAn) that behave as previously and one
with alternative-base addresses (UAa) that need to be split.
- Generalise the existing TI splitters to handle loads and stores.
The main changes here are:
- Tighten the check for when a 128-bit move needs to be split,
so that it can handle general operands.
- Use the more general simplify_gen_subreg instead of
gen_low/highpart.
Diff:
---
gcc/config/aarch64/aarch64-protos.h | 2 +
gcc/config/aarch64/aarch64.c | 80 +++++++-
gcc/config/aarch64/aarch64.md | 20 +-
gcc/config/aarch64/constraints.md | 10 +
gcc/config/aarch64/predicates.md | 10 +
.../aarch64/morello/alt-base-load-ti-1.c | 210 +++++++++++++++++++++
.../aarch64/morello/alt-base-store-ti-1.c | 210 +++++++++++++++++++++
.../aarch64/morello/alt-base-store-ti-2.c | 123 ++++++++++++
8 files changed, 647 insertions(+), 18 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 2e6edb3d9a4..1a6588826fa 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -582,6 +582,8 @@ const char *aarch64_output_move_struct (rtx *operands);
rtx aarch64_return_addr_rtx (void);
rtx aarch64_return_addr (int, rtx);
rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT);
+bool aarch64_alt_base_address_p (machine_mode, rtx);
+bool aarch64_normal_base_address_p (machine_mode, rtx);
bool aarch64_ldr_or_alt_ldur_address_p (machine_mode, rtx);
bool aarch64_simd_mem_operand_p (rtx);
bool aarch64_sve_ld1r_operand_p (rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 03e16ca365d..fdc1015c0c9 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3689,10 +3689,13 @@ aarch64_split_128bit_move (rtx dst, rtx src)
}
}
- dst_lo = gen_lowpart (word_mode, dst);
- dst_hi = gen_highpart (word_mode, dst);
- src_lo = gen_lowpart (word_mode, src);
- src_hi = gen_highpart_mode (word_mode, mode, src);
+ auto lo_offset = subreg_lowpart_offset (word_mode, mode);
+ auto hi_offset = subreg_highpart_offset (word_mode, mode);
+
+ dst_lo = simplify_gen_subreg (word_mode, dst, mode, lo_offset);
+ dst_hi = simplify_gen_subreg (word_mode, dst, mode, hi_offset);
+ src_lo = simplify_gen_subreg (word_mode, src, mode, lo_offset);
+ src_hi = simplify_gen_subreg (word_mode, src, mode, hi_offset);
/* At most one pairing may overlap. */
if (reg_overlap_mentioned_p (dst_lo, src_hi))
@@ -3710,8 +3713,24 @@ aarch64_split_128bit_move (rtx dst, rtx src)
bool
aarch64_split_128bit_move_p (rtx dst, rtx src)
{
- return (! REG_P (src)
- || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
+ machine_mode mode = GET_MODE (dst);
+
+ if (REG_P (dst)
+ && GP_REGNUM_P (REGNO (dst))
+ && !aarch64_normal_base_mem_operand (src, mode))
+ return true;
+
+ if (REG_P (src)
+ && GP_REGNUM_P (REGNO (src))
+ && !aarch64_normal_base_mem_operand (dst, mode))
+ return true;
+
+ if (src == CONST0_RTX (mode)
+ && MEM_P (dst)
+ && !aarch64_normal_base_mem_operand (dst, mode))
+ return true;
+
+ return false;
}
/* Split a complex SIMD combine. */
@@ -9739,10 +9758,11 @@ aarch64_classify_address (struct aarch64_address_info *info,
bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
- /* Classify the access as up to two of the following:
+ /* Classify the access as up to three of the following:
- a sequence of LDPs or STPs
- a single LDR or STR
+ - a sequence of LDRs or STRs, splitting the full access
The LDR/STR can overlap the LDPs/STPs or come after them.
@@ -9750,9 +9770,13 @@ aarch64_classify_address (struct aarch64_address_info *info,
pairs, with each loaded or stored register having mode LDP_STP_MODE.
If LDR_STR_MODE is not VOIDmode, require a valid LDR/STR of that
- mode at offset LDR_STR_OFFSET from the start of MODE. */
+ mode at offset LDR_STR_OFFSET from the start of MODE.
+
+ If SPLIT_MODE is not VOIDmode, require a valid LDR/STR sequence
+ of that mode, with the sequence covering the whole of MODE. */
machine_mode ldp_stp_mode = VOIDmode;
machine_mode ldr_str_mode = VOIDmode;
+ machine_mode split_mode = VOIDmode;
unsigned int num_ldp_stp = 1;
poly_int64 ldr_str_offset = 0;
if (type == ADDR_QUERY_LDP_STP)
@@ -9783,7 +9807,10 @@ aarch64_classify_address (struct aarch64_address_info *info,
We conservatively require an offset representable in either mode. */
else if (mode == TImode || mode == TFmode)
{
- ldp_stp_mode = DImode;
+ if (alt_base_p)
+ split_mode = DImode;
+ else
+ ldp_stp_mode = DImode;
ldr_str_mode = mode;
}
/* On BE, we use load/store pair for multi-vector load/stores. */
@@ -9805,6 +9832,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
return false;
bool allow_reg_index_p = (ldp_stp_mode == VOIDmode
+ && split_mode == VOIDmode
&& (known_lt (GET_MODE_SIZE (mode), 16)
|| mode == CADImode
|| vec_flags == VEC_ADVSIMD
@@ -9899,6 +9927,20 @@ aarch64_classify_address (struct aarch64_address_info *info,
return false;
}
+ if (split_mode != VOIDmode)
+ {
+ unsigned int num_split
+ = exact_div (GET_MODE_SIZE (mode),
+ GET_MODE_SIZE (split_mode)).to_constant ();
+ for (unsigned int i = 0; i < num_split; ++i)
+ {
+ auto suboffset = offset + i * GET_MODE_SIZE (split_mode);
+ if (!aarch64_valid_ldr_str_offset_p (split_mode, alt_base_p,
+ suboffset, type))
+ return false;
+ }
+ }
+
if (ldr_str_mode != VOIDmode
&& !aarch64_valid_ldr_str_offset_p (ldr_str_mode, alt_base_p,
offset + ldr_str_offset,
@@ -19366,6 +19408,26 @@ aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
}
+/* Return true if X is a valid address for mode MODE and if it has
+ an alternative base register. */
+
+bool
+aarch64_alt_base_address_p (machine_mode mode, rtx x)
+{
+ struct aarch64_address_info addr;
+ return aarch64_classify_address (&addr, x, mode, false) && addr.alt_base_p;
+}
+
+/* Return true if X is a valid address for mode MODE and if it has
+ a normal (as opposed to alternative) base register. */
+
+bool
+aarch64_normal_base_address_p (machine_mode mode, rtx x)
+{
+ struct aarch64_address_info addr;
+ return aarch64_classify_address (&addr, x, mode, false) && !addr.alt_base_p;
+}
+
/* Return true if X is either:
- a valid normal-base memory address for an LDR of mode MODE
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 93a68f848ba..31f39ec0f3a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1392,9 +1392,9 @@
(define_insn "*movti_aarch64"
[(set (match_operand:TI 0
- "nonimmediate_operand" "= r,w, r,w,r,m,m,w,m")
+ "nonimmediate_operand" "= r,w, r,w, r,UAn,UAn, r,UAa,w,m")
(match_operand:TI 1
- "aarch64_mov_operand" " rUti,r, w,w,m,r,Z,m,w"))]
+ "aarch64_mov_operand" " rUti,r, w,w,UAn, r, Z,UAa, rZ,m,w"))]
"(register_operand (operands[0], TImode)
|| aarch64_reg_or_zero (operands[1], TImode))"
"@
@@ -1405,21 +1405,23 @@
ldp\\t%0, %H0, %1
stp\\t%1, %H1, %0
stp\\txzr, xzr, %0
+ #
+ #
ldr\\t%q0, %1
str\\t%q1, %0"
[(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
load_16,store_16,store_16,\
+ load_16,store_16,\
load_16,store_16")
- (set_attr "length" "8,8,8,4,4,4,4,4,4")
- (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")]
+ (set_attr "length" "8,8,8,4,4,4,4,8,8,4,4")
+ (set_attr "arch" "*,*,*,simd,*,*,*,*,*,fp,fp")]
)
-;; Split a TImode register-register or register-immediate move into
-;; its component DImode pieces, taking care to handle overlapping
-;; source and dest registers.
+;; Split a TImode GPR move into its component DImode pieces, taking
+;; care to handle overlapping source and dest registers.
(define_split
- [(set (match_operand:TI 0 "register_operand" "")
- (match_operand:TI 1 "aarch64_reg_or_imm" ""))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "aarch64_mov_operand" ""))]
"reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
[(const_int 0)]
{
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 579c89dd4f5..42d92a3bb97 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -373,6 +373,16 @@
LD[234] and ST[234] patterns)."
(match_operand 0 "aarch64_sve_struct_memory_operand"))
+(define_memory_constraint "UAa"
+ "@internal
+ A general memory operand with an alternative base register"
+ (match_operand 0 "aarch64_alt_base_mem_operand"))
+
+(define_memory_constraint "UAn"
+ "@internal
+ A general memory operand with a normal base register"
+ (match_operand 0 "aarch64_normal_base_mem_operand"))
+
(define_memory_constraint "UAu"
"@internal
Either a general memory operand with a normal base register or
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index e4b3796d0b4..ceca71dc292 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -250,6 +250,16 @@
(match_test "INTVAL (op) != 0
&& (unsigned) exact_log2 (INTVAL (op)) < 64")))
+(define_predicate "aarch64_alt_base_mem_operand"
+ (and (match_code "mem")
+ (match_test "aarch64_alt_base_address_p (GET_MODE (op),
+ XEXP (op, 0))")))
+
+(define_predicate "aarch64_normal_base_mem_operand"
+ (and (match_code "mem")
+ (match_test "aarch64_normal_base_address_p (GET_MODE (op),
+ XEXP (op, 0))")))
+
(define_predicate "aarch64_ldr_or_alt_ldur_operand"
(and (match_code "mem")
(match_test "aarch64_ldr_or_alt_ldur_address_p (GET_MODE (op),
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-ti-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-ti-1.c
new file mode 100644
index 00000000000..c8ddfdf3b09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-ti-1.c
@@ -0,0 +1,210 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+typedef unsigned int ti __attribute__((mode(TI)));
+
+/*
+** load_x10_ti_m257:
+** sub (c[0-9]+), c0, #257
+** ldr x10, \[\1\]
+** ldr x11, \[\1, #?8\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, m257)
+
+/*
+** load_x10_ti_m256:
+** ldr x10, \[c0, #?-256\]
+** ldr x11, \[c0, #?-248\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, m256)
+
+/*
+** load_x10_ti_m255:
+** ldr x10, \[c0, #?-255\]
+** ldr x11, \[c0, #?-247\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, m255)
+
+/*
+** load_x10_ti_m1:
+** ldr x10, \[c0, #?-1\]
+** ldr x11, \[c0, #?7\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, m1)
+
+/*
+** load_x10_ti_1:
+** ldr x10, \[c0, #?1\]
+** ldr x11, \[c0, #?9\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, 1)
+
+/*
+** load_x10_ti_247:
+** ldr x10, \[c0, #?247\]
+** ldr x11, \[c0, #?255\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, 247)
+
+/*
+** load_x10_ti_248:
+** ldr x10, \[c0, #?248\]
+** ldr x11, \[c0, #?256\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, 248)
+
+/*
+** load_x10_ti_249:
+** add (c[0-9]+), c0, #?249
+** ldr x10, \[\1\]
+** ldr x11, \[\1, #?8\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, 249)
+
+/*
+** load_x10_ti_256:
+** add (c[0-9]+), c0, #?256
+** ldr x10, \[\1\]
+** ldr x11, \[\1, #?8\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, 256)
+
+/*
+** load_x10_ti_511:
+** add (c[0-9]+), c0, #?511
+** ldr x10, \[\1\]
+** ldr x11, \[\1, #?8\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, 511)
+
+/*
+** load_x10_ti_512:
+** add (c[0-9]+), c0, #?512
+** ldr x10, \[\1\]
+** ldr x11, \[\1, #?8\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, ti, 512)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+LOAD_REG_INDEX (x10, ti, int32_t, 1)
+LOAD_REG_INDEX (x10, ti, uint32_t, 1)
+LOAD_REG_INDEX (x10, ti, uint64_t, 1)
+
+LOAD_REG_INDEX (x10, ti, int32_t, 2)
+LOAD_REG_INDEX (x10, ti, uint32_t, 2)
+LOAD_REG_INDEX (x10, ti, uint64_t, 2)
+
+LOAD_REG_INDEX (x10, ti, int32_t, 4)
+LOAD_REG_INDEX (x10, ti, uint32_t, 4)
+LOAD_REG_INDEX (x10, ti, uint64_t, 4)
+
+LOAD_REG_INDEX (x10, ti, int32_t, 8)
+LOAD_REG_INDEX (x10, ti, uint32_t, 8)
+LOAD_REG_INDEX (x10, ti, uint64_t, 8)
+
+LOAD_REG_INDEX (x10, ti, int32_t, 16)
+LOAD_REG_INDEX (x10, ti, uint32_t, 16)
+LOAD_REG_INDEX (x10, ti, uint64_t, 16)
+
+/*
+** load_q20_ti_m257:
+** sub (c[0-9]+), c0, #257
+** ldr q20, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, ti, m257)
+
+/*
+** load_q20_ti_m256:
+** ldr q20, \[c0, #?-256\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, ti, m256)
+
+/*
+** load_q20_ti_m255:
+** ldr q20, \[c0, #?-255\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, ti, m255)
+
+/*
+** load_q20_ti_m1:
+** ldr q20, \[c0, #?-1\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, ti, m1)
+
+/*
+** load_q20_ti_1:
+** ldr q20, \[c0, #?1\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, ti, 1)
+
+/*
+** load_q20_ti_247:
+** ldr q20, \[c0, #?247\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, ti, 247)
+
+/*
+** load_q20_ti_248:
+** ldr q20, \[c0, #?248\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, ti, 248)
+
+/*
+** load_q20_ti_249:
+** add (c[0-9]+), c0, #?249
+** ldr q20, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, ti, 249)
+
+/*
+** load_q20_ti_256:
+** add (c[0-9]+), c0, #?256
+** ldr q20, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, ti, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+LOAD_REG_INDEX (q20, ti, int32_t, 1)
+LOAD_REG_INDEX (q20, ti, uint32_t, 1)
+LOAD_REG_INDEX (q20, ti, uint64_t, 1)
+
+LOAD_REG_INDEX (q20, ti, int32_t, 2)
+LOAD_REG_INDEX (q20, ti, uint32_t, 2)
+LOAD_REG_INDEX (q20, ti, uint64_t, 2)
+
+LOAD_REG_INDEX (q20, ti, int32_t, 4)
+LOAD_REG_INDEX (q20, ti, uint32_t, 4)
+LOAD_REG_INDEX (q20, ti, uint64_t, 4)
+
+LOAD_REG_INDEX (q20, ti, int32_t, 8)
+LOAD_REG_INDEX (q20, ti, uint32_t, 8)
+LOAD_REG_INDEX (q20, ti, uint64_t, 8)
+
+LOAD_REG_INDEX (q20, ti, int32_t, 16)
+LOAD_REG_INDEX (q20, ti, uint32_t, 16)
+LOAD_REG_INDEX (q20, ti, uint64_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-1.c
new file mode 100644
index 00000000000..e599bed009a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-1.c
@@ -0,0 +1,210 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+typedef unsigned int ti __attribute__((mode(TI)));
+
+/*
+** store_x10_ti_m257:
+** sub (c[0-9]+), c0, #257
+** str x10, \[\1\]
+** str x11, \[\1, #?8\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, m257)
+
+/*
+** store_x10_ti_m256:
+** str x10, \[c0, #?-256\]
+** str x11, \[c0, #?-248\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, m256)
+
+/*
+** store_x10_ti_m255:
+** str x10, \[c0, #?-255\]
+** str x11, \[c0, #?-247\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, m255)
+
+/*
+** store_x10_ti_m1:
+** str x10, \[c0, #?-1\]
+** str x11, \[c0, #?7\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, m1)
+
+/*
+** store_x10_ti_1:
+** str x10, \[c0, #?1\]
+** str x11, \[c0, #?9\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, 1)
+
+/*
+** store_x10_ti_247:
+** str x10, \[c0, #?247\]
+** str x11, \[c0, #?255\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, 247)
+
+/*
+** store_x10_ti_248:
+** str x10, \[c0, #?248\]
+** str x11, \[c0, #?256\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, 248)
+
+/*
+** store_x10_ti_249:
+** add (c[0-9]+), c0, #?249
+** str x10, \[\1\]
+** str x11, \[\1, #?8\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, 249)
+
+/*
+** store_x10_ti_256:
+** add (c[0-9]+), c0, #?256
+** str x10, \[\1\]
+** str x11, \[\1, #?8\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, 256)
+
+/*
+** store_x10_ti_511:
+** add (c[0-9]+), c0, #?511
+** str x10, \[\1\]
+** str x11, \[\1, #?8\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, 511)
+
+/*
+** store_x10_ti_512:
+** add (c[0-9]+), c0, #?512
+** str x10, \[\1\]
+** str x11, \[\1, #?8\]
+** ret
+*/
+STORE_REG_OFFSET (x10, ti, 512)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+STORE_REG_INDEX (x10, ti, int32_t, 1)
+STORE_REG_INDEX (x10, ti, uint32_t, 1)
+STORE_REG_INDEX (x10, ti, uint64_t, 1)
+
+STORE_REG_INDEX (x10, ti, int32_t, 2)
+STORE_REG_INDEX (x10, ti, uint32_t, 2)
+STORE_REG_INDEX (x10, ti, uint64_t, 2)
+
+STORE_REG_INDEX (x10, ti, int32_t, 4)
+STORE_REG_INDEX (x10, ti, uint32_t, 4)
+STORE_REG_INDEX (x10, ti, uint64_t, 4)
+
+STORE_REG_INDEX (x10, ti, int32_t, 8)
+STORE_REG_INDEX (x10, ti, uint32_t, 8)
+STORE_REG_INDEX (x10, ti, uint64_t, 8)
+
+STORE_REG_INDEX (x10, ti, int32_t, 16)
+STORE_REG_INDEX (x10, ti, uint32_t, 16)
+STORE_REG_INDEX (x10, ti, uint64_t, 16)
+
+/*
+** store_q20_ti_m257:
+** sub (c[0-9]+), c0, #257
+** str q20, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (q20, ti, m257)
+
+/*
+** store_q20_ti_m256:
+** str q20, \[c0, #?-256\]
+** ret
+*/
+STORE_REG_OFFSET (q20, ti, m256)
+
+/*
+** store_q20_ti_m255:
+** str q20, \[c0, #?-255\]
+** ret
+*/
+STORE_REG_OFFSET (q20, ti, m255)
+
+/*
+** store_q20_ti_m1:
+** str q20, \[c0, #?-1\]
+** ret
+*/
+STORE_REG_OFFSET (q20, ti, m1)
+
+/*
+** store_q20_ti_1:
+** str q20, \[c0, #?1\]
+** ret
+*/
+STORE_REG_OFFSET (q20, ti, 1)
+
+/*
+** store_q20_ti_247:
+** str q20, \[c0, #?247\]
+** ret
+*/
+STORE_REG_OFFSET (q20, ti, 247)
+
+/*
+** store_q20_ti_248:
+** str q20, \[c0, #?248\]
+** ret
+*/
+STORE_REG_OFFSET (q20, ti, 248)
+
+/*
+** store_q20_ti_249:
+** add (c[0-9]+), c0, #?249
+** str q20, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (q20, ti, 249)
+
+/*
+** store_q20_ti_256:
+** add (c[0-9]+), c0, #?256
+** str q20, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (q20, ti, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+STORE_REG_INDEX (q20, ti, int32_t, 1)
+STORE_REG_INDEX (q20, ti, uint32_t, 1)
+STORE_REG_INDEX (q20, ti, uint64_t, 1)
+
+STORE_REG_INDEX (q20, ti, int32_t, 2)
+STORE_REG_INDEX (q20, ti, uint32_t, 2)
+STORE_REG_INDEX (q20, ti, uint64_t, 2)
+
+STORE_REG_INDEX (q20, ti, int32_t, 4)
+STORE_REG_INDEX (q20, ti, uint32_t, 4)
+STORE_REG_INDEX (q20, ti, uint64_t, 4)
+
+STORE_REG_INDEX (q20, ti, int32_t, 8)
+STORE_REG_INDEX (q20, ti, uint32_t, 8)
+STORE_REG_INDEX (q20, ti, uint64_t, 8)
+
+STORE_REG_INDEX (q20, ti, int32_t, 16)
+STORE_REG_INDEX (q20, ti, uint32_t, 16)
+STORE_REG_INDEX (q20, ti, uint64_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-2.c
new file mode 100644
index 00000000000..865cccaf998
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-ti-2.c
@@ -0,0 +1,123 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+typedef unsigned int ti __attribute__((mode(TI)));
+
+/*
+** store_zero_ti_m257:
+** sub (c[0-9]+), c0, #257
+** str xzr, \[\1\]
+** str xzr, \[\1, #?8\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, m257)
+
+/*
+** store_zero_ti_m256:
+** str xzr, \[c0, #?-256\]
+** str xzr, \[c0, #?-248\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, m256)
+
+/*
+** store_zero_ti_m255:
+** str xzr, \[c0, #?-255\]
+** str xzr, \[c0, #?-247\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, m255)
+
+/*
+** store_zero_ti_m1:
+** str xzr, \[c0, #?-1\]
+** str xzr, \[c0, #?7\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, m1)
+
+/*
+** store_zero_ti_1:
+** str xzr, \[c0, #?1\]
+** str xzr, \[c0, #?9\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, 1)
+
+/*
+** store_zero_ti_247:
+** str xzr, \[c0, #?247\]
+** str xzr, \[c0, #?255\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, 247)
+
+/*
+** store_zero_ti_248:
+** str xzr, \[c0, #?248\]
+** str xzr, \[c0, #?256\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, 248)
+
+/*
+** store_zero_ti_249:
+** add (c[0-9]+), c0, #?249
+** str xzr, \[\1\]
+** str xzr, \[\1, #?8\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, 249)
+
+/*
+** store_zero_ti_256:
+** add (c[0-9]+), c0, #?256
+** str xzr, \[\1\]
+** str xzr, \[\1, #?8\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, 256)
+
+/*
+** store_zero_ti_511:
+** add (c[0-9]+), c0, #?511
+** str xzr, \[\1\]
+** str xzr, \[\1, #?8\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, 511)
+
+/*
+** store_zero_ti_512:
+** add (c[0-9]+), c0, #?512
+** str xzr, \[\1\]
+** str xzr, \[\1, #?8\]
+** ret
+*/
+STORE_ZERO_OFFSET (ti, 512)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+STORE_ZERO_INDEX (ti, int32_t, 1)
+STORE_ZERO_INDEX (ti, uint32_t, 1)
+STORE_ZERO_INDEX (ti, uint64_t, 1)
+
+STORE_ZERO_INDEX (ti, int32_t, 2)
+STORE_ZERO_INDEX (ti, uint32_t, 2)
+STORE_ZERO_INDEX (ti, uint64_t, 2)
+
+STORE_ZERO_INDEX (ti, int32_t, 4)
+STORE_ZERO_INDEX (ti, uint32_t, 4)
+STORE_ZERO_INDEX (ti, uint64_t, 4)
+
+STORE_ZERO_INDEX (ti, int32_t, 8)
+STORE_ZERO_INDEX (ti, uint32_t, 8)
+STORE_ZERO_INDEX (ti, uint64_t, 8)
+
+STORE_ZERO_INDEX (ti, int32_t, 16)
+STORE_ZERO_INDEX (ti, uint32_t, 16)
+STORE_ZERO_INDEX (ti, uint64_t, 16)
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-05-05 12:06 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-05 12:06 [gcc(refs/vendors/ARM/heads/morello)] aarch64: Support alternative-base TI loads & stores Matthew Malcomson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).