From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2049) id ECCB3385DC04; Thu, 5 May 2022 12:07:16 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org ECCB3385DC04 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Matthew Malcomson To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/ARM/heads/morello)] aarch64: Alternative-base support for structure loads and stores X-Act-Checkin: gcc X-Git-Author: Richard Sandiford X-Git-Refname: refs/vendors/ARM/heads/morello X-Git-Oldrev: 10533b003b7f23a6028a3d0a226c75af8712e122 X-Git-Newrev: 07070a5388ca6a76e81d17da460f57b30afe7218 Message-Id: <20220505120716.ECCB3385DC04@sourceware.org> Date: Thu, 5 May 2022 12:07:16 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 05 May 2022 12:07:17 -0000 https://gcc.gnu.org/g:07070a5388ca6a76e81d17da460f57b30afe7218 commit 07070a5388ca6a76e81d17da460f57b30afe7218 Author: Richard Sandiford Date: Fri Apr 8 15:08:24 2022 +0100 aarch64: Alternative-base support for structure loads and stores OI, CI and XI represent tuples of 2, 3 and 4 vectors respectively. They are loaded and stored using multi-register forms of LD1 and ST1. However, there are no alternative-base forms of those instructions, so we need to split them into individual LDR Qs and STR Qs instead. This is similar to what we do for TI and TF, but much simpler, since there is no risk of a loaded Q register overlapping the addresses. On trunk, OI, CI and XI have been replaced by real vector modes, but this code should scale naturally to that. Diff: --- gcc/config/aarch64/aarch64-simd.md | 34 +++++- gcc/config/aarch64/aarch64.c | 26 ++++- .../aarch64/morello/alt-base-load-v256-1.c | 106 ++++++++++++++++++ .../aarch64/morello/alt-base-load-v384-1.c | 115 +++++++++++++++++++ .../aarch64/morello/alt-base-load-v512-1.c | 124 +++++++++++++++++++++ .../aarch64/morello/alt-base-store-v256-1.c | 106 ++++++++++++++++++ .../aarch64/morello/alt-base-store-v256-2.c | 38 +++++++ .../aarch64/morello/alt-base-store-v384-1.c | 115 +++++++++++++++++++ .../aarch64/morello/alt-base-store-v384-2.c | 38 +++++++ .../aarch64/morello/alt-base-store-v512-1.c | 124 +++++++++++++++++++++ .../aarch64/morello/alt-base-store-v512-2.c | 38 +++++++ 11 files changed, 853 insertions(+), 11 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index a82c662e867..1b1e5c02894 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5334,18 +5334,22 @@ ) (define_insn "*aarch64_mov" - [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") - (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] + [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Utv,w,UAa,w") + (match_operand:VSTRUCT 1 "general_operand" " w,w,Utv,w,UAa"))] "TARGET_SIMD && !BYTES_BIG_ENDIAN && (register_operand (operands[0], mode) || register_operand (operands[1], mode))" "@ # st1\\t{%S1.16b - %1.16b}, %0 - ld1\\t{%S0.16b - %0.16b}, %1" - [(set_attr "type" "multiple,neon_store_reg_q,\ + ld1\\t{%S0.16b - %0.16b}, %1 + # + #" + [(set_attr "type" "multiple,neon_store_reg_q, + neon_load_reg_q, + neon_store_reg_q, neon_load_reg_q") - (set_attr "length" ",4,4")] + (set_attr "length" ",4,4,,")] ) (define_insn "aarch64_be_ld1" @@ -5403,6 +5407,26 @@ (set_attr "length" "16,4,4")] ) +(define_split + [(set (match_operand:VSTRUCT 0 "nonimmediate_operand") + (match_operand:VSTRUCT 1 "general_operand"))] + "TARGET_SIMD + && reload_completed + && (aarch64_alt_base_mem_operand (operands[0], mode) + || aarch64_alt_base_mem_operand (operands[1], mode))" + [(const_int 0)] +{ + for (unsigned int i = 0; i < / 4; ++i) + { + machine_mode new_mode = V16QImode; + auto byte = i * GET_MODE_SIZE (new_mode); + rtx dst = simplify_gen_subreg (new_mode, operands[0], mode, byte); + rtx src = simplify_gen_subreg (new_mode, operands[1], mode, byte); + emit_move_insn (dst, src); + } + DONE; +}) + (define_split [(set (match_operand:OI 0 "register_operand") (match_operand:OI 1 "register_operand"))] diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 8d268af60eb..f2a72a0ca2f 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2604,6 +2604,17 @@ aarch64_mem_addr_mode_p (machine_mode mode) return mode == Pmode; } +/* Return true if MODE is the mode of "normal" (as opposed to alternative) + base registers. */ + +static bool +aarch64_normal_base_mode_p (machine_mode mode) +{ + if (!CAPABILITY_MODE_P (Pmode) && mode == VOIDmode) + return true; + return mode == Pmode; +} + /* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations, prefer to use the first arithmetic operand as the else value if the else value doesn't matter, since that exactly matches the SVE @@ -9801,6 +9812,10 @@ aarch64_classify_address (struct aarch64_address_info *info, ldp_stp_mode = DImode; ldr_str_mode = mode; } + /* There are no alternative-base forms of multi-register LD1 and ST1, + so we need to split structure moves into individual LDRs and STRs. */ + else if (alt_base_p && advsimd_struct_p) + split_mode = V16QImode; /* On BE, we use load/store pair for multi-vector load/stores. */ else if (BYTES_BIG_ENDIAN && advsimd_struct_p) { @@ -9851,9 +9866,9 @@ aarch64_classify_address (struct aarch64_address_info *info, if (code != POST_INC) return false; } - /* On LE, for AdvSIMD, don't support anything other than POST_INC or - REG addressing. */ - else if (!BYTES_BIG_ENDIAN && code != REG) + /* Don't support anything other than POST_INC or REG addressing if + we can use LD1 and ST1. */ + else if (!alt_base_p && !BYTES_BIG_ENDIAN && code != REG) return false; } @@ -19447,7 +19462,7 @@ aarch64_simd_mem_operand_p (rtx op) return (MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC || REG_P (XEXP (op, 0))) - && aarch64_mem_addr_mode_p (GET_MODE (XEXP (op, 0)))); + && aarch64_normal_base_mode_p (mem_address_mode (op))); } /* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */ @@ -23783,8 +23798,7 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load) /* The addres must use a normal rather than an alternative base register. */ - if (TARGET_CAPABILITY_HYBRID - && CAPABILITY_MODE_P (mem_address_mode (mem[i]))) + if (!aarch64_normal_base_mode_p (mem_address_mode (mem[i]))) return false; /* Check if the addresses are in the form of [base+offset]. */ diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v256-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v256-1.c new file mode 100644 index 00000000000..772400f92ab --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v256-1.c @@ -0,0 +1,106 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#define ALT_BASE +#include "load-store-utils.h" + +#include + +/* +** load_q20_int8x16x2_t_m257: +** sub (c[0-9]+), c0, #257 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x2_t, m257) + +/* +** load_q20_int8x16x2_t_m256: +** ldr q20, \[c0, #?-256\] +** ldr q21, \[c0, #?-240\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x2_t, m256) + +/* +** load_q20_int8x16x2_t_m255: +** ldr q20, \[c0, #?-255\] +** ldr q21, \[c0, #?-239\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x2_t, m255) + +/* +** load_q20_int8x16x2_t_m1: +** ldr q20, \[c0, #?-1\] +** ldr q21, \[c0, #?15\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x2_t, m1) + +/* +** load_q20_int8x16x2_t_1: +** ldr q20, \[c0, #?1\] +** ldr q21, \[c0, #?17\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x2_t, 1) + +/* +** load_q20_int8x16x2_t_239: +** ldr q20, \[c0, #?239\] +** ldr q21, \[c0, #?255\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x2_t, 239) + +/* +** load_q20_int8x16x2_t_240: +** add (c[0-9]+), c0, #?240 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x2_t, 240) + +/* +** load_q20_int8x16x2_t_241: +** add (c[0-9]+), c0, #?241 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x2_t, 241) + +/* +** load_q20_int8x16x2_t_256: +** add (c[0-9]+), c0, #?256 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x2_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +LOAD_REG_INDEX (q20, int8x16x2_t, int32_t, 1) +LOAD_REG_INDEX (q20, int8x16x2_t, uint32_t, 1) +LOAD_REG_INDEX (q20, int8x16x2_t, uint64_t, 1) + +LOAD_REG_INDEX (q20, int8x16x2_t, int32_t, 2) +LOAD_REG_INDEX (q20, int8x16x2_t, uint32_t, 2) +LOAD_REG_INDEX (q20, int8x16x2_t, uint64_t, 2) + +LOAD_REG_INDEX (q20, int8x16x2_t, int32_t, 4) +LOAD_REG_INDEX (q20, int8x16x2_t, uint32_t, 4) +LOAD_REG_INDEX (q20, int8x16x2_t, uint64_t, 4) + +LOAD_REG_INDEX (q20, int8x16x2_t, int32_t, 8) +LOAD_REG_INDEX (q20, int8x16x2_t, uint32_t, 8) +LOAD_REG_INDEX (q20, int8x16x2_t, uint64_t, 8) + +LOAD_REG_INDEX (q20, int8x16x2_t, int32_t, 16) +LOAD_REG_INDEX (q20, int8x16x2_t, uint32_t, 16) +LOAD_REG_INDEX (q20, int8x16x2_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v384-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v384-1.c new file mode 100644 index 00000000000..adf0365794d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v384-1.c @@ -0,0 +1,115 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#define ALT_BASE +#include "load-store-utils.h" + +#include + +/* +** load_q20_int8x16x3_t_m257: +** sub (c[0-9]+), c0, #257 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ldr q22, \[\1, #?32\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x3_t, m257) + +/* +** load_q20_int8x16x3_t_m256: +** ldr q20, \[c0, #?-256\] +** ldr q21, \[c0, #?-240\] +** ldr q22, \[c0, #?-224\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x3_t, m256) + +/* +** load_q20_int8x16x3_t_m255: +** ldr q20, \[c0, #?-255\] +** ldr q21, \[c0, #?-239\] +** ldr q22, \[c0, #?-223\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x3_t, m255) + +/* +** load_q20_int8x16x3_t_m1: +** ldr q20, \[c0, #?-1\] +** ldr q21, \[c0, #?15\] +** ldr q22, \[c0, #?31\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x3_t, m1) + +/* +** load_q20_int8x16x3_t_1: +** ldr q20, \[c0, #?1\] +** ldr q21, \[c0, #?17\] +** ldr q22, \[c0, #?33\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x3_t, 1) + +/* +** load_q20_int8x16x3_t_223: +** ldr q20, \[c0, #?223\] +** ldr q21, \[c0, #?239\] +** ldr q22, \[c0, #?255\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x3_t, 223) + +/* +** load_q20_int8x16x3_t_224: +** add (c[0-9]+), c0, #?224 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ldr q22, \[\1, #?32\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x3_t, 224) + +/* +** load_q20_int8x16x3_t_225: +** add (c[0-9]+), c0, #?225 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ldr q22, \[\1, #?32\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x3_t, 225) + +/* +** load_q20_int8x16x3_t_256: +** add (c[0-9]+), c0, #?256 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ldr q22, \[\1, #?32\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x3_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +LOAD_REG_INDEX (q20, int8x16x3_t, int32_t, 1) +LOAD_REG_INDEX (q20, int8x16x3_t, uint32_t, 1) +LOAD_REG_INDEX (q20, int8x16x3_t, uint64_t, 1) + +LOAD_REG_INDEX (q20, int8x16x3_t, int32_t, 2) +LOAD_REG_INDEX (q20, int8x16x3_t, uint32_t, 2) +LOAD_REG_INDEX (q20, int8x16x3_t, uint64_t, 2) + +LOAD_REG_INDEX (q20, int8x16x3_t, int32_t, 4) +LOAD_REG_INDEX (q20, int8x16x3_t, uint32_t, 4) +LOAD_REG_INDEX (q20, int8x16x3_t, uint64_t, 4) + +LOAD_REG_INDEX (q20, int8x16x3_t, int32_t, 8) +LOAD_REG_INDEX (q20, int8x16x3_t, uint32_t, 8) +LOAD_REG_INDEX (q20, int8x16x3_t, uint64_t, 8) + +LOAD_REG_INDEX (q20, int8x16x3_t, int32_t, 16) +LOAD_REG_INDEX (q20, int8x16x3_t, uint32_t, 16) +LOAD_REG_INDEX (q20, int8x16x3_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v512-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v512-1.c new file mode 100644 index 00000000000..db91ffa9bfd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v512-1.c @@ -0,0 +1,124 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#define ALT_BASE +#include "load-store-utils.h" + +#include + +/* +** load_q20_int8x16x4_t_m257: +** sub (c[0-9]+), c0, #257 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ldr q22, \[\1, #?32\] +** ldr q23, \[\1, #?48\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x4_t, m257) + +/* +** load_q20_int8x16x4_t_m256: +** ldr q20, \[c0, #?-256\] +** ldr q21, \[c0, #?-240\] +** ldr q22, \[c0, #?-224\] +** ldr q23, \[c0, #?-208\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x4_t, m256) + +/* +** load_q20_int8x16x4_t_m255: +** ldr q20, \[c0, #?-255\] +** ldr q21, \[c0, #?-239\] +** ldr q22, \[c0, #?-223\] +** ldr q23, \[c0, #?-207\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x4_t, m255) + +/* +** load_q20_int8x16x4_t_m1: +** ldr q20, \[c0, #?-1\] +** ldr q21, \[c0, #?15\] +** ldr q22, \[c0, #?31\] +** ldr q23, \[c0, #?47\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x4_t, m1) + +/* +** load_q20_int8x16x4_t_1: +** ldr q20, \[c0, #?1\] +** ldr q21, \[c0, #?17\] +** ldr q22, \[c0, #?33\] +** ldr q23, \[c0, #?49\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x4_t, 1) + +/* +** load_q20_int8x16x4_t_207: +** ldr q20, \[c0, #?207\] +** ldr q21, \[c0, #?223\] +** ldr q22, \[c0, #?239\] +** ldr q23, \[c0, #?255\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x4_t, 207) + +/* +** load_q20_int8x16x4_t_208: +** add (c[0-9]+), c0, #?208 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ldr q22, \[\1, #?32\] +** ldr q23, \[\1, #?48\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x4_t, 208) + +/* +** load_q20_int8x16x4_t_209: +** add (c[0-9]+), c0, #?209 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ldr q22, \[\1, #?32\] +** ldr q23, \[\1, #?48\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x4_t, 209) + +/* +** load_q20_int8x16x4_t_256: +** add (c[0-9]+), c0, #?256 +** ldr q20, \[\1\] +** ldr q21, \[\1, #?16\] +** ldr q22, \[\1, #?32\] +** ldr q23, \[\1, #?48\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16x4_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +LOAD_REG_INDEX (q20, int8x16x4_t, int32_t, 1) +LOAD_REG_INDEX (q20, int8x16x4_t, uint32_t, 1) +LOAD_REG_INDEX (q20, int8x16x4_t, uint64_t, 1) + +LOAD_REG_INDEX (q20, int8x16x4_t, int32_t, 2) +LOAD_REG_INDEX (q20, int8x16x4_t, uint32_t, 2) +LOAD_REG_INDEX (q20, int8x16x4_t, uint64_t, 2) + +LOAD_REG_INDEX (q20, int8x16x4_t, int32_t, 4) +LOAD_REG_INDEX (q20, int8x16x4_t, uint32_t, 4) +LOAD_REG_INDEX (q20, int8x16x4_t, uint64_t, 4) + +LOAD_REG_INDEX (q20, int8x16x4_t, int32_t, 8) +LOAD_REG_INDEX (q20, int8x16x4_t, uint32_t, 8) +LOAD_REG_INDEX (q20, int8x16x4_t, uint64_t, 8) + +LOAD_REG_INDEX (q20, int8x16x4_t, int32_t, 16) +LOAD_REG_INDEX (q20, int8x16x4_t, uint32_t, 16) +LOAD_REG_INDEX (q20, int8x16x4_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v256-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v256-1.c new file mode 100644 index 00000000000..07a1d5a79d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v256-1.c @@ -0,0 +1,106 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#define ALT_BASE +#include "load-store-utils.h" + +#include + +/* +** store_q20_int8x16x2_t_m257: +** sub (c[0-9]+), c0, #257 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x2_t, m257) + +/* +** store_q20_int8x16x2_t_m256: +** str q20, \[c0, #?-256\] +** str q21, \[c0, #?-240\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x2_t, m256) + +/* +** store_q20_int8x16x2_t_m255: +** str q20, \[c0, #?-255\] +** str q21, \[c0, #?-239\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x2_t, m255) + +/* +** store_q20_int8x16x2_t_m1: +** str q20, \[c0, #?-1\] +** str q21, \[c0, #?15\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x2_t, m1) + +/* +** store_q20_int8x16x2_t_1: +** str q20, \[c0, #?1\] +** str q21, \[c0, #?17\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x2_t, 1) + +/* +** store_q20_int8x16x2_t_239: +** str q20, \[c0, #?239\] +** str q21, \[c0, #?255\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x2_t, 239) + +/* +** store_q20_int8x16x2_t_240: +** add (c[0-9]+), c0, #?240 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x2_t, 240) + +/* +** store_q20_int8x16x2_t_241: +** add (c[0-9]+), c0, #?241 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x2_t, 241) + +/* +** store_q20_int8x16x2_t_256: +** add (c[0-9]+), c0, #?256 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x2_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_REG_INDEX (q20, int8x16x2_t, int32_t, 1) +STORE_REG_INDEX (q20, int8x16x2_t, uint32_t, 1) +STORE_REG_INDEX (q20, int8x16x2_t, uint64_t, 1) + +STORE_REG_INDEX (q20, int8x16x2_t, int32_t, 2) +STORE_REG_INDEX (q20, int8x16x2_t, uint32_t, 2) +STORE_REG_INDEX (q20, int8x16x2_t, uint64_t, 2) + +STORE_REG_INDEX (q20, int8x16x2_t, int32_t, 4) +STORE_REG_INDEX (q20, int8x16x2_t, uint32_t, 4) +STORE_REG_INDEX (q20, int8x16x2_t, uint64_t, 4) + +STORE_REG_INDEX (q20, int8x16x2_t, int32_t, 8) +STORE_REG_INDEX (q20, int8x16x2_t, uint32_t, 8) +STORE_REG_INDEX (q20, int8x16x2_t, uint64_t, 8) + +STORE_REG_INDEX (q20, int8x16x2_t, int32_t, 16) +STORE_REG_INDEX (q20, int8x16x2_t, uint32_t, 16) +STORE_REG_INDEX (q20, int8x16x2_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v256-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v256-2.c new file mode 100644 index 00000000000..8a9ab893e6a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v256-2.c @@ -0,0 +1,38 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ + +#define ALT_BASE +#include "load-store-utils.h" + +#include + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_ZERO_OFFSET (int8x16x2_t, m257) +STORE_ZERO_OFFSET (int8x16x2_t, m256) +STORE_ZERO_OFFSET (int8x16x2_t, m255) +STORE_ZERO_OFFSET (int8x16x2_t, m1) +STORE_ZERO_OFFSET (int8x16x2_t, 1) +STORE_ZERO_OFFSET (int8x16x2_t, 239) +STORE_ZERO_OFFSET (int8x16x2_t, 240) +STORE_ZERO_OFFSET (int8x16x2_t, 241) +STORE_ZERO_OFFSET (int8x16x2_t, 256) + +STORE_ZERO_INDEX (int8x16x2_t, int32_t, 1) +STORE_ZERO_INDEX (int8x16x2_t, uint32_t, 1) +STORE_ZERO_INDEX (int8x16x2_t, uint64_t, 1) + +STORE_ZERO_INDEX (int8x16x2_t, int32_t, 2) +STORE_ZERO_INDEX (int8x16x2_t, uint32_t, 2) +STORE_ZERO_INDEX (int8x16x2_t, uint64_t, 2) + +STORE_ZERO_INDEX (int8x16x2_t, int32_t, 4) +STORE_ZERO_INDEX (int8x16x2_t, uint32_t, 4) +STORE_ZERO_INDEX (int8x16x2_t, uint64_t, 4) + +STORE_ZERO_INDEX (int8x16x2_t, int32_t, 8) +STORE_ZERO_INDEX (int8x16x2_t, uint32_t, 8) +STORE_ZERO_INDEX (int8x16x2_t, uint64_t, 8) + +STORE_ZERO_INDEX (int8x16x2_t, int32_t, 16) +STORE_ZERO_INDEX (int8x16x2_t, uint32_t, 16) +STORE_ZERO_INDEX (int8x16x2_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v384-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v384-1.c new file mode 100644 index 00000000000..06d2b481f13 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v384-1.c @@ -0,0 +1,115 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#define ALT_BASE +#include "load-store-utils.h" + +#include + +/* +** store_q20_int8x16x3_t_m257: +** sub (c[0-9]+), c0, #257 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** str q22, \[\1, #?32\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x3_t, m257) + +/* +** store_q20_int8x16x3_t_m256: +** str q20, \[c0, #?-256\] +** str q21, \[c0, #?-240\] +** str q22, \[c0, #?-224\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x3_t, m256) + +/* +** store_q20_int8x16x3_t_m255: +** str q20, \[c0, #?-255\] +** str q21, \[c0, #?-239\] +** str q22, \[c0, #?-223\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x3_t, m255) + +/* +** store_q20_int8x16x3_t_m1: +** str q20, \[c0, #?-1\] +** str q21, \[c0, #?15\] +** str q22, \[c0, #?31\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x3_t, m1) + +/* +** store_q20_int8x16x3_t_1: +** str q20, \[c0, #?1\] +** str q21, \[c0, #?17\] +** str q22, \[c0, #?33\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x3_t, 1) + +/* +** store_q20_int8x16x3_t_223: +** str q20, \[c0, #?223\] +** str q21, \[c0, #?239\] +** str q22, \[c0, #?255\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x3_t, 223) + +/* +** store_q20_int8x16x3_t_224: +** add (c[0-9]+), c0, #?224 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** str q22, \[\1, #?32\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x3_t, 224) + +/* +** store_q20_int8x16x3_t_225: +** add (c[0-9]+), c0, #?225 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** str q22, \[\1, #?32\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x3_t, 225) + +/* +** store_q20_int8x16x3_t_256: +** add (c[0-9]+), c0, #?256 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** str q22, \[\1, #?32\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x3_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_REG_INDEX (q20, int8x16x3_t, int32_t, 1) +STORE_REG_INDEX (q20, int8x16x3_t, uint32_t, 1) +STORE_REG_INDEX (q20, int8x16x3_t, uint64_t, 1) + +STORE_REG_INDEX (q20, int8x16x3_t, int32_t, 2) +STORE_REG_INDEX (q20, int8x16x3_t, uint32_t, 2) +STORE_REG_INDEX (q20, int8x16x3_t, uint64_t, 2) + +STORE_REG_INDEX (q20, int8x16x3_t, int32_t, 4) +STORE_REG_INDEX (q20, int8x16x3_t, uint32_t, 4) +STORE_REG_INDEX (q20, int8x16x3_t, uint64_t, 4) + +STORE_REG_INDEX (q20, int8x16x3_t, int32_t, 8) +STORE_REG_INDEX (q20, int8x16x3_t, uint32_t, 8) +STORE_REG_INDEX (q20, int8x16x3_t, uint64_t, 8) + +STORE_REG_INDEX (q20, int8x16x3_t, int32_t, 16) +STORE_REG_INDEX (q20, int8x16x3_t, uint32_t, 16) +STORE_REG_INDEX (q20, int8x16x3_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v384-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v384-2.c new file mode 100644 index 00000000000..6eab2193574 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v384-2.c @@ -0,0 +1,38 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ + +#define ALT_BASE +#include "load-store-utils.h" + +#include + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_ZERO_OFFSET (int8x16x3_t, m257) +STORE_ZERO_OFFSET (int8x16x3_t, m256) +STORE_ZERO_OFFSET (int8x16x3_t, m255) +STORE_ZERO_OFFSET (int8x16x3_t, m1) +STORE_ZERO_OFFSET (int8x16x3_t, 1) +STORE_ZERO_OFFSET (int8x16x3_t, 223) +STORE_ZERO_OFFSET (int8x16x3_t, 224) +STORE_ZERO_OFFSET (int8x16x3_t, 225) +STORE_ZERO_OFFSET (int8x16x3_t, 256) + +STORE_ZERO_INDEX (int8x16x3_t, int32_t, 1) +STORE_ZERO_INDEX (int8x16x3_t, uint32_t, 1) +STORE_ZERO_INDEX (int8x16x3_t, uint64_t, 1) + +STORE_ZERO_INDEX (int8x16x3_t, int32_t, 2) +STORE_ZERO_INDEX (int8x16x3_t, uint32_t, 2) +STORE_ZERO_INDEX (int8x16x3_t, uint64_t, 2) + +STORE_ZERO_INDEX (int8x16x3_t, int32_t, 4) +STORE_ZERO_INDEX (int8x16x3_t, uint32_t, 4) +STORE_ZERO_INDEX (int8x16x3_t, uint64_t, 4) + +STORE_ZERO_INDEX (int8x16x3_t, int32_t, 8) +STORE_ZERO_INDEX (int8x16x3_t, uint32_t, 8) +STORE_ZERO_INDEX (int8x16x3_t, uint64_t, 8) + +STORE_ZERO_INDEX (int8x16x3_t, int32_t, 16) +STORE_ZERO_INDEX (int8x16x3_t, uint32_t, 16) +STORE_ZERO_INDEX (int8x16x3_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v512-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v512-1.c new file mode 100644 index 00000000000..37d54f1b762 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v512-1.c @@ -0,0 +1,124 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#define ALT_BASE +#include "load-store-utils.h" + +#include + +/* +** store_q20_int8x16x4_t_m257: +** sub (c[0-9]+), c0, #257 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** str q22, \[\1, #?32\] +** str q23, \[\1, #?48\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x4_t, m257) + +/* +** store_q20_int8x16x4_t_m256: +** str q20, \[c0, #?-256\] +** str q21, \[c0, #?-240\] +** str q22, \[c0, #?-224\] +** str q23, \[c0, #?-208\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x4_t, m256) + +/* +** store_q20_int8x16x4_t_m255: +** str q20, \[c0, #?-255\] +** str q21, \[c0, #?-239\] +** str q22, \[c0, #?-223\] +** str q23, \[c0, #?-207\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x4_t, m255) + +/* +** store_q20_int8x16x4_t_m1: +** str q20, \[c0, #?-1\] +** str q21, \[c0, #?15\] +** str q22, \[c0, #?31\] +** str q23, \[c0, #?47\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x4_t, m1) + +/* +** store_q20_int8x16x4_t_1: +** str q20, \[c0, #?1\] +** str q21, \[c0, #?17\] +** str q22, \[c0, #?33\] +** str q23, \[c0, #?49\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x4_t, 1) + +/* +** store_q20_int8x16x4_t_207: +** str q20, \[c0, #?207\] +** str q21, \[c0, #?223\] +** str q22, \[c0, #?239\] +** str q23, \[c0, #?255\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x4_t, 207) + +/* +** store_q20_int8x16x4_t_208: +** add (c[0-9]+), c0, #?208 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** str q22, \[\1, #?32\] +** str q23, \[\1, #?48\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x4_t, 208) + +/* +** store_q20_int8x16x4_t_209: +** add (c[0-9]+), c0, #?209 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** str q22, \[\1, #?32\] +** str q23, \[\1, #?48\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x4_t, 209) + +/* +** store_q20_int8x16x4_t_256: +** add (c[0-9]+), c0, #?256 +** str q20, \[\1\] +** str q21, \[\1, #?16\] +** str q22, \[\1, #?32\] +** str q23, \[\1, #?48\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16x4_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_REG_INDEX (q20, int8x16x4_t, int32_t, 1) +STORE_REG_INDEX (q20, int8x16x4_t, uint32_t, 1) +STORE_REG_INDEX (q20, int8x16x4_t, uint64_t, 1) + +STORE_REG_INDEX (q20, int8x16x4_t, int32_t, 2) +STORE_REG_INDEX (q20, int8x16x4_t, uint32_t, 2) +STORE_REG_INDEX (q20, int8x16x4_t, uint64_t, 2) + +STORE_REG_INDEX (q20, int8x16x4_t, int32_t, 4) +STORE_REG_INDEX (q20, int8x16x4_t, uint32_t, 4) +STORE_REG_INDEX (q20, int8x16x4_t, uint64_t, 4) + +STORE_REG_INDEX (q20, int8x16x4_t, int32_t, 8) +STORE_REG_INDEX (q20, int8x16x4_t, uint32_t, 8) +STORE_REG_INDEX (q20, int8x16x4_t, uint64_t, 8) + +STORE_REG_INDEX (q20, int8x16x4_t, int32_t, 16) +STORE_REG_INDEX (q20, int8x16x4_t, uint32_t, 16) +STORE_REG_INDEX (q20, int8x16x4_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v512-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v512-2.c new file mode 100644 index 00000000000..8d4f6864491 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v512-2.c @@ -0,0 +1,38 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ + +#define ALT_BASE +#include "load-store-utils.h" + +#include + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_ZERO_OFFSET (int8x16x4_t, m257) +STORE_ZERO_OFFSET (int8x16x4_t, m256) +STORE_ZERO_OFFSET (int8x16x4_t, m255) +STORE_ZERO_OFFSET (int8x16x4_t, m1) +STORE_ZERO_OFFSET (int8x16x4_t, 1) +STORE_ZERO_OFFSET (int8x16x4_t, 207) +STORE_ZERO_OFFSET (int8x16x4_t, 208) +STORE_ZERO_OFFSET (int8x16x4_t, 209) +STORE_ZERO_OFFSET (int8x16x4_t, 256) + +STORE_ZERO_INDEX (int8x16x4_t, int32_t, 1) +STORE_ZERO_INDEX (int8x16x4_t, uint32_t, 1) +STORE_ZERO_INDEX (int8x16x4_t, uint64_t, 1) + +STORE_ZERO_INDEX (int8x16x4_t, int32_t, 2) +STORE_ZERO_INDEX (int8x16x4_t, uint32_t, 2) +STORE_ZERO_INDEX (int8x16x4_t, uint64_t, 2) + +STORE_ZERO_INDEX (int8x16x4_t, int32_t, 4) +STORE_ZERO_INDEX (int8x16x4_t, uint32_t, 4) +STORE_ZERO_INDEX (int8x16x4_t, uint64_t, 4) + +STORE_ZERO_INDEX (int8x16x4_t, int32_t, 8) +STORE_ZERO_INDEX (int8x16x4_t, uint32_t, 8) +STORE_ZERO_INDEX (int8x16x4_t, uint64_t, 8) + +STORE_ZERO_INDEX (int8x16x4_t, int32_t, 16) +STORE_ZERO_INDEX (int8x16x4_t, uint32_t, 16) +STORE_ZERO_INDEX (int8x16x4_t, uint64_t, 16)