From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2049) id DA0073856275; Thu, 5 May 2022 12:07:11 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org DA0073856275 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Matthew Malcomson To: gcc-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/ARM/heads/morello)] aarch64: Alternative-base support for 128-bit vectors X-Act-Checkin: gcc X-Git-Author: Richard Sandiford X-Git-Refname: refs/vendors/ARM/heads/morello X-Git-Oldrev: 67417a0967f872d1d930f6bdf05276f657f16391 X-Git-Newrev: 10533b003b7f23a6028a3d0a226c75af8712e122 Message-Id: <20220505120711.DA0073856275@sourceware.org> Date: Thu, 5 May 2022 12:07:11 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 05 May 2022 12:07:12 -0000 https://gcc.gnu.org/g:10533b003b7f23a6028a3d0a226c75af8712e122 commit 10533b003b7f23a6028a3d0a226c75af8712e122 Author: Richard Sandiford Date: Fri Apr 8 13:49:01 2022 +0100 aarch64: Alternative-base support for 128-bit vectors The 128-bit vector move patterns don't allow direct GPR loads and stores, but they do allow storing zero as a pair of XZRs. For alternative-base addresses we need to split that into two stores. (It might be more efficient in some cases to zero a Q register and store that, but that should be handled by the cost model.) TI and TF addresses are required to be valid for both GPRs and FPRs, but 128-bit vector addresses are only required to be valid for FPRs (which makes sense given the use case). We can't therefore split a zero store for all valid addresses: we need to require an address whose DImode halves are both legitimate, as for TImode. Diff: --- gcc/config/aarch64/aarch64-simd.md | 11 +- gcc/config/aarch64/constraints.md | 6 + .../aarch64/morello/alt-base-load-v128-1.c | 128 +++++++++++++ .../aarch64/morello/alt-base-load-v64-1.c | 211 +++++++++++++++++++++ .../aarch64/morello/alt-base-store-v128-1.c | 128 +++++++++++++ .../aarch64/morello/alt-base-store-v128-2.c | 40 ++++ .../aarch64/morello/alt-base-store-v64-1.c | 211 +++++++++++++++++++++ .../aarch64/morello/alt-base-store-v64-2.c | 110 +++++++++++ .../gcc.target/aarch64/morello/load-store-utils.h | 4 +- 9 files changed, 842 insertions(+), 7 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 47f84773d9e..a82c662e867 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -133,9 +133,9 @@ (define_insn "*aarch64_simd_mov" [(set (match_operand:VQMOV 0 "nonimmediate_operand" - "=w, Umn, m, w, ?r, ?w, ?r, w") + "=w, Umn, m, w, ?r, ?w, ?r, ?UAt, w") (match_operand:VQMOV 1 "general_operand" - "m, Dz, w, w, w, r, r, Dn"))] + "m, Dz, w, w, w, r, r, Dz, Dn"))] "TARGET_SIMD && (register_operand (operands[0], mode) || aarch64_simd_reg_or_zero (operands[1], mode))" @@ -153,8 +153,9 @@ case 4: case 5: case 6: - return "#"; case 7: + return "#"; + case 8: return aarch64_output_simd_mov_immediate (operands[1], 128); default: gcc_unreachable (); @@ -162,8 +163,8 @@ } [(set_attr "type" "neon_load1_1reg, store_16, neon_store1_1reg,\ neon_logic, multiple, multiple,\ - multiple, neon_move") - (set_attr "length" "4,4,4,4,8,8,8,4")] + multiple, store_16, neon_move") + (set_attr "length" "4,4,4,4,8,8,8,8,4")] ) ;; When storing lane zero we can use the normal STR and its more permissive diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 42d92a3bb97..c80a5fed2db 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -383,6 +383,12 @@ A general memory operand with a normal base register" (match_operand 0 "aarch64_normal_base_mem_operand")) +(define_memory_constraint "UAt" + "@internal + A TI memory operand with an alternative base register" + (and (match_code "mem") + (match_test "aarch64_alt_base_address_p (TImode, XEXP (op, 0))"))) + (define_memory_constraint "UAu" "@internal Either a general memory operand with a normal base register or diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v128-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v128-1.c new file mode 100644 index 00000000000..81cd42e002a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v128-1.c @@ -0,0 +1,128 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#include + +#define ALT_BASE +#include "load-store-utils.h" + +/* Check for valid asm, but don't mandate a parint8x16_tcular sequence. */ +LOAD_REG_OFFSET (x10, int8x16_t, m257) +LOAD_REG_OFFSET (x10, int16x8_t, m256) +LOAD_REG_OFFSET (x10, int32x2_t, m255) +LOAD_REG_OFFSET (x10, int64x2_t, m1) +LOAD_REG_OFFSET (x10, float16x4_t, 1) +LOAD_REG_OFFSET (x10, bfloat16x4_t, 247) +LOAD_REG_OFFSET (x10, float32x4_t, 248) +LOAD_REG_OFFSET (x10, float64x2_t, 249) +LOAD_REG_OFFSET (x10, int8x16_t, 256) +LOAD_REG_OFFSET (x10, int8x16_t, 511) +LOAD_REG_OFFSET (x10, int8x16_t, 512) + +LOAD_REG_INDEX (x10, int8x16_t, int32_t, 1) +LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 1) +LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 1) + +LOAD_REG_INDEX (x10, int8x16_t, int32_t, 2) +LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 2) +LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 2) + +LOAD_REG_INDEX (x10, int8x16_t, int32_t, 4) +LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 4) +LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 4) + +LOAD_REG_INDEX (x10, int8x16_t, int32_t, 8) +LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 8) +LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 8) + +LOAD_REG_INDEX (x10, int8x16_t, int32_t, 16) +LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 16) +LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 16) + +/* +** load_q20_int8x16_t_m257: +** sub (c[0-9]+), c0, #257 +** ldr q20, \[\1\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16_t, m257) + +/* +** load_q20_int16x8_t_m256: +** ldr q20, \[c0, #?-256\] +** ret +*/ +LOAD_REG_OFFSET (q20, int16x8_t, m256) + +/* +** load_q20_int32x4_t_m255: +** ldr q20, \[c0, #?-255\] +** ret +*/ +LOAD_REG_OFFSET (q20, int32x4_t, m255) + +/* +** load_q20_int64x2_t_m1: +** ldr q20, \[c0, #?-1\] +** ret +*/ +LOAD_REG_OFFSET (q20, int64x2_t, m1) + +/* +** load_q20_float16x8_t_1: +** ldr q20, \[c0, #?1\] +** ret +*/ +LOAD_REG_OFFSET (q20, float16x8_t, 1) + +/* +** load_q20_bfloat16x8_t_247: +** ldr q20, \[c0, #?247\] +** ret +*/ +LOAD_REG_OFFSET (q20, bfloat16x8_t, 247) + +/* +** load_q20_float32x4_t_248: +** ldr q20, \[c0, #?248\] +** ret +*/ +LOAD_REG_OFFSET (q20, float32x4_t, 248) + +/* +** load_q20_float64x2_t_249: +** ldr q20, \[c0, #?249\] +** ret +*/ +LOAD_REG_OFFSET (q20, float64x2_t, 249) + +/* +** load_q20_int8x16_t_256: +** add (c[0-9]+), c0, #?256 +** ldr q20, \[\1\] +** ret +*/ +LOAD_REG_OFFSET (q20, int8x16_t, 256) + +/* Check for valid asm, but don't mandate a parint8x16_tcular sequence. */ +LOAD_REG_INDEX (q20, int8x16_t, int32_t, 1) +LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 1) +LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 1) + +LOAD_REG_INDEX (q20, int8x16_t, int32_t, 2) +LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 2) +LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 2) + +LOAD_REG_INDEX (q20, int8x16_t, int32_t, 4) +LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 4) +LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 4) + +LOAD_REG_INDEX (q20, int8x16_t, int32_t, 8) +LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 8) +LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 8) + +LOAD_REG_INDEX (q20, int8x16_t, int32_t, 16) +LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 16) +LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v64-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v64-1.c new file mode 100644 index 00000000000..424432694a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v64-1.c @@ -0,0 +1,211 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#include + +#define ALT_BASE +#include "load-store-utils.h" + +/* +** load_x10_int8x8_t_m264: +** sub (c[0-9]+), c0, #264 +** ldr x10, \[\1\] +** ret +*/ +LOAD_REG_OFFSET (x10, int8x8_t, m264) + +/* +** load_x10_int16x4_t_m257: +** sub (c[0-9]+), c0, #257 +** ldr x10, \[\1\] +** ret +*/ +LOAD_REG_OFFSET (x10, int16x4_t, m257) + +/* +** load_x10_int32x2_t_m256: +** ldr x10, \[c0, #?-256\] +** ret +*/ +LOAD_REG_OFFSET (x10, int32x2_t, m256) + +/* +** load_x10_int64x1_t_m248: +** ldr x10, \[c0, #?-248\] +** ret +*/ +LOAD_REG_OFFSET (x10, int64x1_t, m248) + +/* +** load_x10_float16x4_t_m8: +** ldr x10, \[c0, #?-8\] +** ret +*/ +LOAD_REG_OFFSET (x10, float16x4_t, m8) + +/* +** load_x10_bfloat16x4_t_m1: +** ldr x10, \[c0, #?-1\] +** ret +*/ +LOAD_REG_OFFSET (x10, bfloat16x4_t, m1) + +/* +** load_x10_float32x2_t_1: +** ldr x10, \[c0, #?1\] +** ret +*/ +LOAD_REG_OFFSET (x10, float32x2_t, 1) + +/* +** load_x10_float64x1_t_8: +** ldr x10, \[c0, #?8\] +** ret +*/ +LOAD_REG_OFFSET (x10, float64x1_t, 8) + +/* +** load_x10_int8x8_t_248: +** ldr x10, \[c0, #?248\] +** ret +*/ +LOAD_REG_OFFSET (x10, int8x8_t, 248) + +/* +** load_x10_int8x8_t_255: +** ldr x10, \[c0, #?255\] +** ret +*/ +LOAD_REG_OFFSET (x10, int8x8_t, 255) + +/* +** load_x10_int8x8_t_256: +** add (c[0-9]+), c0, #?256 +** ldr x10, \[\1\] +** ret +*/ +LOAD_REG_OFFSET (x10, int8x8_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +LOAD_REG_INDEX (x10, int8x8_t, int32_t, 1) +LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 1) +LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 1) + +LOAD_REG_INDEX (x10, int8x8_t, int32_t, 2) +LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 2) +LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 2) + +LOAD_REG_INDEX (x10, int8x8_t, int32_t, 4) +LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 4) +LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 4) + +LOAD_REG_INDEX (x10, int8x8_t, int32_t, 8) +LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 8) +LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 8) + +LOAD_REG_INDEX (x10, int8x8_t, int32_t, 16) +LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 16) +LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 16) + +/* +** load_d20_int8x8_t_m264: +** sub (c[0-9]+), c0, #264 +** ldr d20, \[\1\] +** ret +*/ +LOAD_REG_OFFSET (d20, int8x8_t, m264) + +/* +** load_d20_int16x4_t_m257: +** sub (c[0-9]+), c0, #257 +** ldr d20, \[\1\] +** ret +*/ +LOAD_REG_OFFSET (d20, int16x4_t, m257) + +/* +** load_d20_int32x2_t_m256: +** ldr d20, \[c0, #?-256\] +** ret +*/ +LOAD_REG_OFFSET (d20, int32x2_t, m256) + +/* +** load_d20_int64x1_t_m248: +** ldr d20, \[c0, #?-248\] +** ret +*/ +LOAD_REG_OFFSET (d20, int64x1_t, m248) + +/* +** load_d20_float16x4_t_m8: +** ldr d20, \[c0, #?-8\] +** ret +*/ +LOAD_REG_OFFSET (d20, float16x4_t, m8) + +/* +** load_d20_bfloat16x4_t_m1: +** ldr d20, \[c0, #?-1\] +** ret +*/ +LOAD_REG_OFFSET (d20, bfloat16x4_t, m1) + +/* +** load_d20_float32x2_t_1: +** ldr d20, \[c0, #?1\] +** ret +*/ +LOAD_REG_OFFSET (d20, float32x2_t, 1) + +/* +** load_d20_float64x1_t_8: +** ldr d20, \[c0, #?8\] +** ret +*/ +LOAD_REG_OFFSET (d20, float64x1_t, 8) + +/* +** load_d20_int8x8_t_248: +** ldr d20, \[c0, #?248\] +** ret +*/ +LOAD_REG_OFFSET (d20, int8x8_t, 248) + +/* +** load_d20_int8x8_t_255: +** ldr d20, \[c0, #?255\] +** ret +*/ +LOAD_REG_OFFSET (d20, int8x8_t, 255) + +/* +** load_d20_int8x8_t_256: +** add (c[0-9]+), c0, #?256 +** ldr d20, \[\1\] +** ret +*/ +LOAD_REG_OFFSET (d20, int8x8_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +LOAD_REG_INDEX (d20, int8x8_t, int32_t, 1) +LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 1) +LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 1) + +LOAD_REG_INDEX (d20, int8x8_t, int32_t, 2) +LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 2) +LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 2) + +LOAD_REG_INDEX (d20, int8x8_t, int32_t, 4) +LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 4) +LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 4) + +LOAD_REG_INDEX (d20, int8x8_t, int32_t, 8) +LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 8) +LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 8) + +LOAD_REG_INDEX (d20, int8x8_t, int32_t, 16) +LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 16) +LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-1.c new file mode 100644 index 00000000000..6f982899da0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-1.c @@ -0,0 +1,128 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#include + +#define ALT_BASE +#include "load-store-utils.h" + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_REG_OFFSET (x10, int8x16_t, m257) +STORE_REG_OFFSET (x10, int16x8_t, m256) +STORE_REG_OFFSET (x10, int32x4_t, m255) +STORE_REG_OFFSET (x10, int64x2_t, m1) +STORE_REG_OFFSET (x10, float16x8_t, 1) +STORE_REG_OFFSET (x10, bfloat16x8_t, 247) +STORE_REG_OFFSET (x10, float32x4_t, 248) +STORE_REG_OFFSET (x10, float64x2_t, 249) +STORE_REG_OFFSET (x10, int8x16_t, 256) +STORE_REG_OFFSET (x10, int8x16_t, 511) +STORE_REG_OFFSET (x10, int8x16_t, 512) + +STORE_REG_INDEX (x10, int8x16_t, int32_t, 1) +STORE_REG_INDEX (x10, int8x16_t, uint32_t, 1) +STORE_REG_INDEX (x10, int8x16_t, uint64_t, 1) + +STORE_REG_INDEX (x10, int8x16_t, int32_t, 2) +STORE_REG_INDEX (x10, int8x16_t, uint32_t, 2) +STORE_REG_INDEX (x10, int8x16_t, uint64_t, 2) + +STORE_REG_INDEX (x10, int8x16_t, int32_t, 4) +STORE_REG_INDEX (x10, int8x16_t, uint32_t, 4) +STORE_REG_INDEX (x10, int8x16_t, uint64_t, 4) + +STORE_REG_INDEX (x10, int8x16_t, int32_t, 8) +STORE_REG_INDEX (x10, int8x16_t, uint32_t, 8) +STORE_REG_INDEX (x10, int8x16_t, uint64_t, 8) + +STORE_REG_INDEX (x10, int8x16_t, int32_t, 16) +STORE_REG_INDEX (x10, int8x16_t, uint32_t, 16) +STORE_REG_INDEX (x10, int8x16_t, uint64_t, 16) + +/* +** store_q20_int8x16_t_m257: +** sub (c[0-9]+), c0, #257 +** str q20, \[\1\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16_t, m257) + +/* +** store_q20_int16x8_t_m256: +** str q20, \[c0, #?-256\] +** ret +*/ +STORE_REG_OFFSET (q20, int16x8_t, m256) + +/* +** store_q20_int32x4_t_m255: +** str q20, \[c0, #?-255\] +** ret +*/ +STORE_REG_OFFSET (q20, int32x4_t, m255) + +/* +** store_q20_int64x2_t_m1: +** str q20, \[c0, #?-1\] +** ret +*/ +STORE_REG_OFFSET (q20, int64x2_t, m1) + +/* +** store_q20_float16x8_t_1: +** str q20, \[c0, #?1\] +** ret +*/ +STORE_REG_OFFSET (q20, float16x8_t, 1) + +/* +** store_q20_bfloat16x8_t_247: +** str q20, \[c0, #?247\] +** ret +*/ +STORE_REG_OFFSET (q20, bfloat16x8_t, 247) + +/* +** store_q20_float32x4_t_248: +** str q20, \[c0, #?248\] +** ret +*/ +STORE_REG_OFFSET (q20, float32x4_t, 248) + +/* +** store_q20_float64x2_t_249: +** str q20, \[c0, #?249\] +** ret +*/ +STORE_REG_OFFSET (q20, float64x2_t, 249) + +/* +** store_q20_int8x16_t_256: +** add (c[0-9]+), c0, #?256 +** str q20, \[\1\] +** ret +*/ +STORE_REG_OFFSET (q20, int8x16_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_REG_INDEX (q20, int8x16_t, int32_t, 1) +STORE_REG_INDEX (q20, int8x16_t, uint32_t, 1) +STORE_REG_INDEX (q20, int8x16_t, uint64_t, 1) + +STORE_REG_INDEX (q20, int8x16_t, int32_t, 2) +STORE_REG_INDEX (q20, int8x16_t, uint32_t, 2) +STORE_REG_INDEX (q20, int8x16_t, uint64_t, 2) + +STORE_REG_INDEX (q20, int8x16_t, int32_t, 4) +STORE_REG_INDEX (q20, int8x16_t, uint32_t, 4) +STORE_REG_INDEX (q20, int8x16_t, uint64_t, 4) + +STORE_REG_INDEX (q20, int8x16_t, int32_t, 8) +STORE_REG_INDEX (q20, int8x16_t, uint32_t, 8) +STORE_REG_INDEX (q20, int8x16_t, uint64_t, 8) + +STORE_REG_INDEX (q20, int8x16_t, int32_t, 16) +STORE_REG_INDEX (q20, int8x16_t, uint32_t, 16) +STORE_REG_INDEX (q20, int8x16_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-2.c new file mode 100644 index 00000000000..6eabef67de4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-2.c @@ -0,0 +1,40 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ + +#include + +#define ALT_BASE +#include "load-store-utils.h" + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_ZERO_OFFSET (int8x16_t, m257) +STORE_ZERO_OFFSET (int16x8_t, m256) +STORE_ZERO_OFFSET (int32x4_t, m255) +STORE_ZERO_OFFSET (int64x2_t, m1) +STORE_ZERO_OFFSET (float16x8_t, 1) +STORE_ZERO_OFFSET (float32x4_t, 247) +STORE_ZERO_OFFSET (float64x2_t, 248) +STORE_ZERO_OFFSET (int8x16_t, 249) +STORE_ZERO_OFFSET (int8x16_t, 256) +STORE_ZERO_OFFSET (int8x16_t, 511) +STORE_ZERO_OFFSET (int8x16_t, 512) + +STORE_ZERO_INDEX (int8x16_t, int32_t, 1) +STORE_ZERO_INDEX (int8x16_t, uint32_t, 1) +STORE_ZERO_INDEX (int8x16_t, uint64_t, 1) + +STORE_ZERO_INDEX (int8x16_t, int32_t, 2) +STORE_ZERO_INDEX (int8x16_t, uint32_t, 2) +STORE_ZERO_INDEX (int8x16_t, uint64_t, 2) + +STORE_ZERO_INDEX (int8x16_t, int32_t, 4) +STORE_ZERO_INDEX (int8x16_t, uint32_t, 4) +STORE_ZERO_INDEX (int8x16_t, uint64_t, 4) + +STORE_ZERO_INDEX (int8x16_t, int32_t, 8) +STORE_ZERO_INDEX (int8x16_t, uint32_t, 8) +STORE_ZERO_INDEX (int8x16_t, uint64_t, 8) + +STORE_ZERO_INDEX (int8x16_t, int32_t, 16) +STORE_ZERO_INDEX (int8x16_t, uint32_t, 16) +STORE_ZERO_INDEX (int8x16_t, uint64_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-1.c new file mode 100644 index 00000000000..cae7b40bfd5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-1.c @@ -0,0 +1,211 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#include + +#define ALT_BASE +#include "load-store-utils.h" + +/* +** store_x10_int8x8_t_m264: +** sub (c[0-9]+), c0, #264 +** str x10, \[\1\] +** ret +*/ +STORE_REG_OFFSET (x10, int8x8_t, m264) + +/* +** store_x10_int16x4_t_m257: +** sub (c[0-9]+), c0, #257 +** str x10, \[\1\] +** ret +*/ +STORE_REG_OFFSET (x10, int16x4_t, m257) + +/* +** store_x10_int32x2_t_m256: +** str x10, \[c0, #?-256\] +** ret +*/ +STORE_REG_OFFSET (x10, int32x2_t, m256) + +/* +** store_x10_int64x1_t_m248: +** str x10, \[c0, #?-248\] +** ret +*/ +STORE_REG_OFFSET (x10, int64x1_t, m248) + +/* +** store_x10_float16x4_t_m8: +** str x10, \[c0, #?-8\] +** ret +*/ +STORE_REG_OFFSET (x10, float16x4_t, m8) + +/* +** store_x10_bfloat16x4_t_m1: +** str x10, \[c0, #?-1\] +** ret +*/ +STORE_REG_OFFSET (x10, bfloat16x4_t, m1) + +/* +** store_x10_float32x2_t_1: +** str x10, \[c0, #?1\] +** ret +*/ +STORE_REG_OFFSET (x10, float32x2_t, 1) + +/* +** store_x10_float64x1_t_8: +** str x10, \[c0, #?8\] +** ret +*/ +STORE_REG_OFFSET (x10, float64x1_t, 8) + +/* +** store_x10_int8x8_t_248: +** str x10, \[c0, #?248\] +** ret +*/ +STORE_REG_OFFSET (x10, int8x8_t, 248) + +/* +** store_x10_int8x8_t_255: +** str x10, \[c0, #?255\] +** ret +*/ +STORE_REG_OFFSET (x10, int8x8_t, 255) + +/* +** store_x10_int8x8_t_256: +** add (c[0-9]+), c0, #?256 +** str x10, \[\1\] +** ret +*/ +STORE_REG_OFFSET (x10, int8x8_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_REG_INDEX (x10, int8x8_t, int32_t, 1) +STORE_REG_INDEX (x10, int8x8_t, uint32_t, 1) +STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 1) + +STORE_REG_INDEX (x10, int8x8_t, int32_t, 2) +STORE_REG_INDEX (x10, int8x8_t, uint32_t, 2) +STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 2) + +STORE_REG_INDEX (x10, int8x8_t, int32_t, 4) +STORE_REG_INDEX (x10, int8x8_t, uint32_t, 4) +STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 4) + +STORE_REG_INDEX (x10, int8x8_t, int32_t, 8) +STORE_REG_INDEX (x10, int8x8_t, uint32_t, 8) +STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 8) + +STORE_REG_INDEX (x10, int8x8_t, int32_t, 16) +STORE_REG_INDEX (x10, int8x8_t, uint32_t, 16) +STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 16) + +/* +** store_d20_int8x8_t_m264: +** sub (c[0-9]+), c0, #264 +** str d20, \[\1\] +** ret +*/ +STORE_REG_OFFSET (d20, int8x8_t, m264) + +/* +** store_d20_int16x4_t_m257: +** sub (c[0-9]+), c0, #257 +** str d20, \[\1\] +** ret +*/ +STORE_REG_OFFSET (d20, int16x4_t, m257) + +/* +** store_d20_int32x2_t_m256: +** str d20, \[c0, #?-256\] +** ret +*/ +STORE_REG_OFFSET (d20, int32x2_t, m256) + +/* +** store_d20_int64x1_t_m248: +** str d20, \[c0, #?-248\] +** ret +*/ +STORE_REG_OFFSET (d20, int64x1_t, m248) + +/* +** store_d20_float16x4_t_m8: +** str d20, \[c0, #?-8\] +** ret +*/ +STORE_REG_OFFSET (d20, float16x4_t, m8) + +/* +** store_d20_bfloat16x4_t_m1: +** str d20, \[c0, #?-1\] +** ret +*/ +STORE_REG_OFFSET (d20, bfloat16x4_t, m1) + +/* +** store_d20_float32x2_t_1: +** str d20, \[c0, #?1\] +** ret +*/ +STORE_REG_OFFSET (d20, float32x2_t, 1) + +/* +** store_d20_float64x1_t_8: +** str d20, \[c0, #?8\] +** ret +*/ +STORE_REG_OFFSET (d20, float64x1_t, 8) + +/* +** store_d20_int8x8_t_248: +** str d20, \[c0, #?248\] +** ret +*/ +STORE_REG_OFFSET (d20, int8x8_t, 248) + +/* +** store_d20_int8x8_t_255: +** str d20, \[c0, #?255\] +** ret +*/ +STORE_REG_OFFSET (d20, int8x8_t, 255) + +/* +** store_d20_int8x8_t_256: +** add (c[0-9]+), c0, #?256 +** str d20, \[\1\] +** ret +*/ +STORE_REG_OFFSET (d20, int8x8_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_REG_INDEX (d20, int8x8_t, int32_t, 1) +STORE_REG_INDEX (d20, int8x8_t, uint32_t, 1) +STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 1) + +STORE_REG_INDEX (d20, int8x8_t, int32_t, 2) +STORE_REG_INDEX (d20, int8x8_t, uint32_t, 2) +STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 2) + +STORE_REG_INDEX (d20, int8x8_t, int32_t, 4) +STORE_REG_INDEX (d20, int8x8_t, uint32_t, 4) +STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 4) + +STORE_REG_INDEX (d20, int8x8_t, int32_t, 8) +STORE_REG_INDEX (d20, int8x8_t, uint32_t, 8) +STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 8) + +STORE_REG_INDEX (d20, int8x8_t, int32_t, 16) +STORE_REG_INDEX (d20, int8x8_t, uint32_t, 16) +STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-2.c new file mode 100644 index 00000000000..5adb1a86046 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-2.c @@ -0,0 +1,110 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */ +/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */ + +#include + +#define ALT_BASE +#include "load-store-utils.h" + +/* +** store_zero_int8x8_t_m264: +** sub (c[0-9]+), c0, #264 +** str xzr, \[\1\] +** ret +*/ +STORE_ZERO_OFFSET (int8x8_t, m264) + +/* +** store_zero_int16x4_t_m257: +** sub (c[0-9]+), c0, #257 +** str xzr, \[\1\] +** ret +*/ +STORE_ZERO_OFFSET (int16x4_t, m257) + +/* +** store_zero_int32x2_t_m256: +** str xzr, \[c0, #?-256\] +** ret +*/ +STORE_ZERO_OFFSET (int32x2_t, m256) + +/* +** store_zero_int64x1_t_m248: +** str xzr, \[c0, #?-248\] +** ret +*/ +STORE_ZERO_OFFSET (int64x1_t, m248) + +/* +** store_zero_float16x4_t_m8: +** str xzr, \[c0, #?-8\] +** ret +*/ +STORE_ZERO_OFFSET (float16x4_t, m8) + +/* +** store_zero_float32x2_t_m1: +** str xzr, \[c0, #?-1\] +** ret +*/ +STORE_ZERO_OFFSET (float32x2_t, m1) + +/* +** store_zero_float32x2_t_1: +** str xzr, \[c0, #?1\] +** ret +*/ +STORE_ZERO_OFFSET (float32x2_t, 1) + +/* +** store_zero_float64x1_t_8: +** str xzr, \[c0, #?8\] +** ret +*/ +STORE_ZERO_OFFSET (float64x1_t, 8) + +/* +** store_zero_int8x8_t_248: +** str xzr, \[c0, #?248\] +** ret +*/ +STORE_ZERO_OFFSET (int8x8_t, 248) + +/* +** store_zero_int8x8_t_255: +** str xzr, \[c0, #?255\] +** ret +*/ +STORE_ZERO_OFFSET (int8x8_t, 255) + +/* +** store_zero_int8x8_t_256: +** add (c[0-9]+), c0, #?256 +** str xzr, \[\1\] +** ret +*/ +STORE_ZERO_OFFSET (int8x8_t, 256) + +/* Check for valid asm, but don't mandate a particular sequence. */ +STORE_ZERO_INDEX (int8x8_t, int32_t, 1) +STORE_ZERO_INDEX (int8x8_t, uint32_t, 1) +STORE_ZERO_INDEX (int8x8_t, int8x8_t, 1) + +STORE_ZERO_INDEX (int8x8_t, int32_t, 2) +STORE_ZERO_INDEX (int8x8_t, uint32_t, 2) +STORE_ZERO_INDEX (int8x8_t, int8x8_t, 2) + +STORE_ZERO_INDEX (int8x8_t, int32_t, 4) +STORE_ZERO_INDEX (int8x8_t, uint32_t, 4) +STORE_ZERO_INDEX (int8x8_t, int8x8_t, 4) + +STORE_ZERO_INDEX (int8x8_t, int32_t, 8) +STORE_ZERO_INDEX (int8x8_t, uint32_t, 8) +STORE_ZERO_INDEX (int8x8_t, int8x8_t, 8) + +STORE_ZERO_INDEX (int8x8_t, int32_t, 16) +STORE_ZERO_INDEX (int8x8_t, uint32_t, 16) +STORE_ZERO_INDEX (int8x8_t, int8x8_t, 16) diff --git a/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h b/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h index bd88bef99cf..695cc30543a 100644 --- a/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h +++ b/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h @@ -77,7 +77,7 @@ store_zero_##TYPE##_##OFFSET (char *CAP base) \ { \ TYPE *CAP ptr = (TYPE *CAP) (base + OFFSET); \ - *ptr = 0; \ + *ptr = (TYPE) { 0 }; \ } #define STORE_REG_INDEX(REG, TYPE, INDEX_TYPE, SCALE) \ @@ -99,7 +99,7 @@ { \ ptrdiff_t byte_index = (ptrdiff_t) index * SCALE; \ TYPE *CAP ptr = (TYPE *CAP) (base + byte_index); \ - *ptr = 0; \ + *ptr = (TYPE) { 0 }; \ } #define PRE_MODIFY_OFFSET(TYPE, OFFSET) \