public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/ARM/heads/morello)] aarch64: Alternative-base support for 128-bit vectors
@ 2022-05-05 12:07 Matthew Malcomson
0 siblings, 0 replies; only message in thread
From: Matthew Malcomson @ 2022-05-05 12:07 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:10533b003b7f23a6028a3d0a226c75af8712e122
commit 10533b003b7f23a6028a3d0a226c75af8712e122
Author: Richard Sandiford <richard.sandiford@arm.com>
Date: Fri Apr 8 13:49:01 2022 +0100
aarch64: Alternative-base support for 128-bit vectors
The 128-bit vector move patterns don't allow direct GPR loads
and stores, but they do allow storing zero as a pair of XZRs.
For alternative-base addresses we need to split that into two stores.
(It might be more efficient in some cases to zero a Q register and
store that, but that should be handled by the cost model.)
TI and TF addresses are required to be valid for both GPRs and FPRs,
but 128-bit vector addresses are only required to be valid for FPRs
(which makes sense given the use case). We can't therefore split
a zero store for all valid addresses: we need to require an address
whose DImode halves are both legitimate, as for TImode.
Diff:
---
gcc/config/aarch64/aarch64-simd.md | 11 +-
gcc/config/aarch64/constraints.md | 6 +
.../aarch64/morello/alt-base-load-v128-1.c | 128 +++++++++++++
.../aarch64/morello/alt-base-load-v64-1.c | 211 +++++++++++++++++++++
.../aarch64/morello/alt-base-store-v128-1.c | 128 +++++++++++++
.../aarch64/morello/alt-base-store-v128-2.c | 40 ++++
.../aarch64/morello/alt-base-store-v64-1.c | 211 +++++++++++++++++++++
.../aarch64/morello/alt-base-store-v64-2.c | 110 +++++++++++
.../gcc.target/aarch64/morello/load-store-utils.h | 4 +-
9 files changed, 842 insertions(+), 7 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 47f84773d9e..a82c662e867 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -133,9 +133,9 @@
(define_insn "*aarch64_simd_mov<VQMOV:mode>"
[(set (match_operand:VQMOV 0 "nonimmediate_operand"
- "=w, Umn, m, w, ?r, ?w, ?r, w")
+ "=w, Umn, m, w, ?r, ?w, ?r, ?UAt, w")
(match_operand:VQMOV 1 "general_operand"
- "m, Dz, w, w, w, r, r, Dn"))]
+ "m, Dz, w, w, w, r, r, Dz, Dn"))]
"TARGET_SIMD
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
@@ -153,8 +153,9 @@
case 4:
case 5:
case 6:
- return "#";
case 7:
+ return "#";
+ case 8:
return aarch64_output_simd_mov_immediate (operands[1], 128);
default:
gcc_unreachable ();
@@ -162,8 +163,8 @@
}
[(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
neon_logic<q>, multiple, multiple,\
- multiple, neon_move<q>")
- (set_attr "length" "4,4,4,4,8,8,8,4")]
+ multiple, store_16, neon_move<q>")
+ (set_attr "length" "4,4,4,4,8,8,8,8,4")]
)
;; When storing lane zero we can use the normal STR and its more permissive
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 42d92a3bb97..c80a5fed2db 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -383,6 +383,12 @@
A general memory operand with a normal base register"
(match_operand 0 "aarch64_normal_base_mem_operand"))
+(define_memory_constraint "UAt"
+ "@internal
+ A TI memory operand with an alternative base register"
+ (and (match_code "mem")
+ (match_test "aarch64_alt_base_address_p (TImode, XEXP (op, 0))")))
+
(define_memory_constraint "UAu"
"@internal
Either a general memory operand with a normal base register or
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v128-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v128-1.c
new file mode 100644
index 00000000000..81cd42e002a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v128-1.c
@@ -0,0 +1,128 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/* Check for valid asm, but don't mandate a parint8x16_tcular sequence. */
+LOAD_REG_OFFSET (x10, int8x16_t, m257)
+LOAD_REG_OFFSET (x10, int16x8_t, m256)
+LOAD_REG_OFFSET (x10, int32x2_t, m255)
+LOAD_REG_OFFSET (x10, int64x2_t, m1)
+LOAD_REG_OFFSET (x10, float16x4_t, 1)
+LOAD_REG_OFFSET (x10, bfloat16x4_t, 247)
+LOAD_REG_OFFSET (x10, float32x4_t, 248)
+LOAD_REG_OFFSET (x10, float64x2_t, 249)
+LOAD_REG_OFFSET (x10, int8x16_t, 256)
+LOAD_REG_OFFSET (x10, int8x16_t, 511)
+LOAD_REG_OFFSET (x10, int8x16_t, 512)
+
+LOAD_REG_INDEX (x10, int8x16_t, int32_t, 1)
+LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 1)
+LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 1)
+
+LOAD_REG_INDEX (x10, int8x16_t, int32_t, 2)
+LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 2)
+LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 2)
+
+LOAD_REG_INDEX (x10, int8x16_t, int32_t, 4)
+LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 4)
+LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 4)
+
+LOAD_REG_INDEX (x10, int8x16_t, int32_t, 8)
+LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 8)
+LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 8)
+
+LOAD_REG_INDEX (x10, int8x16_t, int32_t, 16)
+LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 16)
+LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 16)
+
+/*
+** load_q20_int8x16_t_m257:
+** sub (c[0-9]+), c0, #257
+** ldr q20, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, int8x16_t, m257)
+
+/*
+** load_q20_int16x8_t_m256:
+** ldr q20, \[c0, #?-256\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, int16x8_t, m256)
+
+/*
+** load_q20_int32x4_t_m255:
+** ldr q20, \[c0, #?-255\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, int32x4_t, m255)
+
+/*
+** load_q20_int64x2_t_m1:
+** ldr q20, \[c0, #?-1\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, int64x2_t, m1)
+
+/*
+** load_q20_float16x8_t_1:
+** ldr q20, \[c0, #?1\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, float16x8_t, 1)
+
+/*
+** load_q20_bfloat16x8_t_247:
+** ldr q20, \[c0, #?247\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, bfloat16x8_t, 247)
+
+/*
+** load_q20_float32x4_t_248:
+** ldr q20, \[c0, #?248\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, float32x4_t, 248)
+
+/*
+** load_q20_float64x2_t_249:
+** ldr q20, \[c0, #?249\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, float64x2_t, 249)
+
+/*
+** load_q20_int8x16_t_256:
+** add (c[0-9]+), c0, #?256
+** ldr q20, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (q20, int8x16_t, 256)
+
+/* Check for valid asm, but don't mandate a parint8x16_tcular sequence. */
+LOAD_REG_INDEX (q20, int8x16_t, int32_t, 1)
+LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 1)
+LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 1)
+
+LOAD_REG_INDEX (q20, int8x16_t, int32_t, 2)
+LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 2)
+LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 2)
+
+LOAD_REG_INDEX (q20, int8x16_t, int32_t, 4)
+LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 4)
+LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 4)
+
+LOAD_REG_INDEX (q20, int8x16_t, int32_t, 8)
+LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 8)
+LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 8)
+
+LOAD_REG_INDEX (q20, int8x16_t, int32_t, 16)
+LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 16)
+LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v64-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v64-1.c
new file mode 100644
index 00000000000..424432694a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v64-1.c
@@ -0,0 +1,211 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/*
+** load_x10_int8x8_t_m264:
+** sub (c[0-9]+), c0, #264
+** ldr x10, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, int8x8_t, m264)
+
+/*
+** load_x10_int16x4_t_m257:
+** sub (c[0-9]+), c0, #257
+** ldr x10, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, int16x4_t, m257)
+
+/*
+** load_x10_int32x2_t_m256:
+** ldr x10, \[c0, #?-256\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, int32x2_t, m256)
+
+/*
+** load_x10_int64x1_t_m248:
+** ldr x10, \[c0, #?-248\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, int64x1_t, m248)
+
+/*
+** load_x10_float16x4_t_m8:
+** ldr x10, \[c0, #?-8\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, float16x4_t, m8)
+
+/*
+** load_x10_bfloat16x4_t_m1:
+** ldr x10, \[c0, #?-1\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, bfloat16x4_t, m1)
+
+/*
+** load_x10_float32x2_t_1:
+** ldr x10, \[c0, #?1\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, float32x2_t, 1)
+
+/*
+** load_x10_float64x1_t_8:
+** ldr x10, \[c0, #?8\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, float64x1_t, 8)
+
+/*
+** load_x10_int8x8_t_248:
+** ldr x10, \[c0, #?248\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, int8x8_t, 248)
+
+/*
+** load_x10_int8x8_t_255:
+** ldr x10, \[c0, #?255\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, int8x8_t, 255)
+
+/*
+** load_x10_int8x8_t_256:
+** add (c[0-9]+), c0, #?256
+** ldr x10, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (x10, int8x8_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+LOAD_REG_INDEX (x10, int8x8_t, int32_t, 1)
+LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 1)
+LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 1)
+
+LOAD_REG_INDEX (x10, int8x8_t, int32_t, 2)
+LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 2)
+LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 2)
+
+LOAD_REG_INDEX (x10, int8x8_t, int32_t, 4)
+LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 4)
+LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 4)
+
+LOAD_REG_INDEX (x10, int8x8_t, int32_t, 8)
+LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 8)
+LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 8)
+
+LOAD_REG_INDEX (x10, int8x8_t, int32_t, 16)
+LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 16)
+LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 16)
+
+/*
+** load_d20_int8x8_t_m264:
+** sub (c[0-9]+), c0, #264
+** ldr d20, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, int8x8_t, m264)
+
+/*
+** load_d20_int16x4_t_m257:
+** sub (c[0-9]+), c0, #257
+** ldr d20, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, int16x4_t, m257)
+
+/*
+** load_d20_int32x2_t_m256:
+** ldr d20, \[c0, #?-256\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, int32x2_t, m256)
+
+/*
+** load_d20_int64x1_t_m248:
+** ldr d20, \[c0, #?-248\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, int64x1_t, m248)
+
+/*
+** load_d20_float16x4_t_m8:
+** ldr d20, \[c0, #?-8\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, float16x4_t, m8)
+
+/*
+** load_d20_bfloat16x4_t_m1:
+** ldr d20, \[c0, #?-1\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, bfloat16x4_t, m1)
+
+/*
+** load_d20_float32x2_t_1:
+** ldr d20, \[c0, #?1\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, float32x2_t, 1)
+
+/*
+** load_d20_float64x1_t_8:
+** ldr d20, \[c0, #?8\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, float64x1_t, 8)
+
+/*
+** load_d20_int8x8_t_248:
+** ldr d20, \[c0, #?248\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, int8x8_t, 248)
+
+/*
+** load_d20_int8x8_t_255:
+** ldr d20, \[c0, #?255\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, int8x8_t, 255)
+
+/*
+** load_d20_int8x8_t_256:
+** add (c[0-9]+), c0, #?256
+** ldr d20, \[\1\]
+** ret
+*/
+LOAD_REG_OFFSET (d20, int8x8_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+LOAD_REG_INDEX (d20, int8x8_t, int32_t, 1)
+LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 1)
+LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 1)
+
+LOAD_REG_INDEX (d20, int8x8_t, int32_t, 2)
+LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 2)
+LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 2)
+
+LOAD_REG_INDEX (d20, int8x8_t, int32_t, 4)
+LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 4)
+LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 4)
+
+LOAD_REG_INDEX (d20, int8x8_t, int32_t, 8)
+LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 8)
+LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 8)
+
+LOAD_REG_INDEX (d20, int8x8_t, int32_t, 16)
+LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 16)
+LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-1.c
new file mode 100644
index 00000000000..6f982899da0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-1.c
@@ -0,0 +1,128 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+STORE_REG_OFFSET (x10, int8x16_t, m257)
+STORE_REG_OFFSET (x10, int16x8_t, m256)
+STORE_REG_OFFSET (x10, int32x4_t, m255)
+STORE_REG_OFFSET (x10, int64x2_t, m1)
+STORE_REG_OFFSET (x10, float16x8_t, 1)
+STORE_REG_OFFSET (x10, bfloat16x8_t, 247)
+STORE_REG_OFFSET (x10, float32x4_t, 248)
+STORE_REG_OFFSET (x10, float64x2_t, 249)
+STORE_REG_OFFSET (x10, int8x16_t, 256)
+STORE_REG_OFFSET (x10, int8x16_t, 511)
+STORE_REG_OFFSET (x10, int8x16_t, 512)
+
+STORE_REG_INDEX (x10, int8x16_t, int32_t, 1)
+STORE_REG_INDEX (x10, int8x16_t, uint32_t, 1)
+STORE_REG_INDEX (x10, int8x16_t, uint64_t, 1)
+
+STORE_REG_INDEX (x10, int8x16_t, int32_t, 2)
+STORE_REG_INDEX (x10, int8x16_t, uint32_t, 2)
+STORE_REG_INDEX (x10, int8x16_t, uint64_t, 2)
+
+STORE_REG_INDEX (x10, int8x16_t, int32_t, 4)
+STORE_REG_INDEX (x10, int8x16_t, uint32_t, 4)
+STORE_REG_INDEX (x10, int8x16_t, uint64_t, 4)
+
+STORE_REG_INDEX (x10, int8x16_t, int32_t, 8)
+STORE_REG_INDEX (x10, int8x16_t, uint32_t, 8)
+STORE_REG_INDEX (x10, int8x16_t, uint64_t, 8)
+
+STORE_REG_INDEX (x10, int8x16_t, int32_t, 16)
+STORE_REG_INDEX (x10, int8x16_t, uint32_t, 16)
+STORE_REG_INDEX (x10, int8x16_t, uint64_t, 16)
+
+/*
+** store_q20_int8x16_t_m257:
+** sub (c[0-9]+), c0, #257
+** str q20, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (q20, int8x16_t, m257)
+
+/*
+** store_q20_int16x8_t_m256:
+** str q20, \[c0, #?-256\]
+** ret
+*/
+STORE_REG_OFFSET (q20, int16x8_t, m256)
+
+/*
+** store_q20_int32x4_t_m255:
+** str q20, \[c0, #?-255\]
+** ret
+*/
+STORE_REG_OFFSET (q20, int32x4_t, m255)
+
+/*
+** store_q20_int64x2_t_m1:
+** str q20, \[c0, #?-1\]
+** ret
+*/
+STORE_REG_OFFSET (q20, int64x2_t, m1)
+
+/*
+** store_q20_float16x8_t_1:
+** str q20, \[c0, #?1\]
+** ret
+*/
+STORE_REG_OFFSET (q20, float16x8_t, 1)
+
+/*
+** store_q20_bfloat16x8_t_247:
+** str q20, \[c0, #?247\]
+** ret
+*/
+STORE_REG_OFFSET (q20, bfloat16x8_t, 247)
+
+/*
+** store_q20_float32x4_t_248:
+** str q20, \[c0, #?248\]
+** ret
+*/
+STORE_REG_OFFSET (q20, float32x4_t, 248)
+
+/*
+** store_q20_float64x2_t_249:
+** str q20, \[c0, #?249\]
+** ret
+*/
+STORE_REG_OFFSET (q20, float64x2_t, 249)
+
+/*
+** store_q20_int8x16_t_256:
+** add (c[0-9]+), c0, #?256
+** str q20, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (q20, int8x16_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+STORE_REG_INDEX (q20, int8x16_t, int32_t, 1)
+STORE_REG_INDEX (q20, int8x16_t, uint32_t, 1)
+STORE_REG_INDEX (q20, int8x16_t, uint64_t, 1)
+
+STORE_REG_INDEX (q20, int8x16_t, int32_t, 2)
+STORE_REG_INDEX (q20, int8x16_t, uint32_t, 2)
+STORE_REG_INDEX (q20, int8x16_t, uint64_t, 2)
+
+STORE_REG_INDEX (q20, int8x16_t, int32_t, 4)
+STORE_REG_INDEX (q20, int8x16_t, uint32_t, 4)
+STORE_REG_INDEX (q20, int8x16_t, uint64_t, 4)
+
+STORE_REG_INDEX (q20, int8x16_t, int32_t, 8)
+STORE_REG_INDEX (q20, int8x16_t, uint32_t, 8)
+STORE_REG_INDEX (q20, int8x16_t, uint64_t, 8)
+
+STORE_REG_INDEX (q20, int8x16_t, int32_t, 16)
+STORE_REG_INDEX (q20, int8x16_t, uint32_t, 16)
+STORE_REG_INDEX (q20, int8x16_t, uint64_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-2.c
new file mode 100644
index 00000000000..6eabef67de4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-2.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+STORE_ZERO_OFFSET (int8x16_t, m257)
+STORE_ZERO_OFFSET (int16x8_t, m256)
+STORE_ZERO_OFFSET (int32x4_t, m255)
+STORE_ZERO_OFFSET (int64x2_t, m1)
+STORE_ZERO_OFFSET (float16x8_t, 1)
+STORE_ZERO_OFFSET (float32x4_t, 247)
+STORE_ZERO_OFFSET (float64x2_t, 248)
+STORE_ZERO_OFFSET (int8x16_t, 249)
+STORE_ZERO_OFFSET (int8x16_t, 256)
+STORE_ZERO_OFFSET (int8x16_t, 511)
+STORE_ZERO_OFFSET (int8x16_t, 512)
+
+STORE_ZERO_INDEX (int8x16_t, int32_t, 1)
+STORE_ZERO_INDEX (int8x16_t, uint32_t, 1)
+STORE_ZERO_INDEX (int8x16_t, uint64_t, 1)
+
+STORE_ZERO_INDEX (int8x16_t, int32_t, 2)
+STORE_ZERO_INDEX (int8x16_t, uint32_t, 2)
+STORE_ZERO_INDEX (int8x16_t, uint64_t, 2)
+
+STORE_ZERO_INDEX (int8x16_t, int32_t, 4)
+STORE_ZERO_INDEX (int8x16_t, uint32_t, 4)
+STORE_ZERO_INDEX (int8x16_t, uint64_t, 4)
+
+STORE_ZERO_INDEX (int8x16_t, int32_t, 8)
+STORE_ZERO_INDEX (int8x16_t, uint32_t, 8)
+STORE_ZERO_INDEX (int8x16_t, uint64_t, 8)
+
+STORE_ZERO_INDEX (int8x16_t, int32_t, 16)
+STORE_ZERO_INDEX (int8x16_t, uint32_t, 16)
+STORE_ZERO_INDEX (int8x16_t, uint64_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-1.c
new file mode 100644
index 00000000000..cae7b40bfd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-1.c
@@ -0,0 +1,211 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/*
+** store_x10_int8x8_t_m264:
+** sub (c[0-9]+), c0, #264
+** str x10, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (x10, int8x8_t, m264)
+
+/*
+** store_x10_int16x4_t_m257:
+** sub (c[0-9]+), c0, #257
+** str x10, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (x10, int16x4_t, m257)
+
+/*
+** store_x10_int32x2_t_m256:
+** str x10, \[c0, #?-256\]
+** ret
+*/
+STORE_REG_OFFSET (x10, int32x2_t, m256)
+
+/*
+** store_x10_int64x1_t_m248:
+** str x10, \[c0, #?-248\]
+** ret
+*/
+STORE_REG_OFFSET (x10, int64x1_t, m248)
+
+/*
+** store_x10_float16x4_t_m8:
+** str x10, \[c0, #?-8\]
+** ret
+*/
+STORE_REG_OFFSET (x10, float16x4_t, m8)
+
+/*
+** store_x10_bfloat16x4_t_m1:
+** str x10, \[c0, #?-1\]
+** ret
+*/
+STORE_REG_OFFSET (x10, bfloat16x4_t, m1)
+
+/*
+** store_x10_float32x2_t_1:
+** str x10, \[c0, #?1\]
+** ret
+*/
+STORE_REG_OFFSET (x10, float32x2_t, 1)
+
+/*
+** store_x10_float64x1_t_8:
+** str x10, \[c0, #?8\]
+** ret
+*/
+STORE_REG_OFFSET (x10, float64x1_t, 8)
+
+/*
+** store_x10_int8x8_t_248:
+** str x10, \[c0, #?248\]
+** ret
+*/
+STORE_REG_OFFSET (x10, int8x8_t, 248)
+
+/*
+** store_x10_int8x8_t_255:
+** str x10, \[c0, #?255\]
+** ret
+*/
+STORE_REG_OFFSET (x10, int8x8_t, 255)
+
+/*
+** store_x10_int8x8_t_256:
+** add (c[0-9]+), c0, #?256
+** str x10, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (x10, int8x8_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+STORE_REG_INDEX (x10, int8x8_t, int32_t, 1)
+STORE_REG_INDEX (x10, int8x8_t, uint32_t, 1)
+STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 1)
+
+STORE_REG_INDEX (x10, int8x8_t, int32_t, 2)
+STORE_REG_INDEX (x10, int8x8_t, uint32_t, 2)
+STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 2)
+
+STORE_REG_INDEX (x10, int8x8_t, int32_t, 4)
+STORE_REG_INDEX (x10, int8x8_t, uint32_t, 4)
+STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 4)
+
+STORE_REG_INDEX (x10, int8x8_t, int32_t, 8)
+STORE_REG_INDEX (x10, int8x8_t, uint32_t, 8)
+STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 8)
+
+STORE_REG_INDEX (x10, int8x8_t, int32_t, 16)
+STORE_REG_INDEX (x10, int8x8_t, uint32_t, 16)
+STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 16)
+
+/*
+** store_d20_int8x8_t_m264:
+** sub (c[0-9]+), c0, #264
+** str d20, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (d20, int8x8_t, m264)
+
+/*
+** store_d20_int16x4_t_m257:
+** sub (c[0-9]+), c0, #257
+** str d20, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (d20, int16x4_t, m257)
+
+/*
+** store_d20_int32x2_t_m256:
+** str d20, \[c0, #?-256\]
+** ret
+*/
+STORE_REG_OFFSET (d20, int32x2_t, m256)
+
+/*
+** store_d20_int64x1_t_m248:
+** str d20, \[c0, #?-248\]
+** ret
+*/
+STORE_REG_OFFSET (d20, int64x1_t, m248)
+
+/*
+** store_d20_float16x4_t_m8:
+** str d20, \[c0, #?-8\]
+** ret
+*/
+STORE_REG_OFFSET (d20, float16x4_t, m8)
+
+/*
+** store_d20_bfloat16x4_t_m1:
+** str d20, \[c0, #?-1\]
+** ret
+*/
+STORE_REG_OFFSET (d20, bfloat16x4_t, m1)
+
+/*
+** store_d20_float32x2_t_1:
+** str d20, \[c0, #?1\]
+** ret
+*/
+STORE_REG_OFFSET (d20, float32x2_t, 1)
+
+/*
+** store_d20_float64x1_t_8:
+** str d20, \[c0, #?8\]
+** ret
+*/
+STORE_REG_OFFSET (d20, float64x1_t, 8)
+
+/*
+** store_d20_int8x8_t_248:
+** str d20, \[c0, #?248\]
+** ret
+*/
+STORE_REG_OFFSET (d20, int8x8_t, 248)
+
+/*
+** store_d20_int8x8_t_255:
+** str d20, \[c0, #?255\]
+** ret
+*/
+STORE_REG_OFFSET (d20, int8x8_t, 255)
+
+/*
+** store_d20_int8x8_t_256:
+** add (c[0-9]+), c0, #?256
+** str d20, \[\1\]
+** ret
+*/
+STORE_REG_OFFSET (d20, int8x8_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+STORE_REG_INDEX (d20, int8x8_t, int32_t, 1)
+STORE_REG_INDEX (d20, int8x8_t, uint32_t, 1)
+STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 1)
+
+STORE_REG_INDEX (d20, int8x8_t, int32_t, 2)
+STORE_REG_INDEX (d20, int8x8_t, uint32_t, 2)
+STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 2)
+
+STORE_REG_INDEX (d20, int8x8_t, int32_t, 4)
+STORE_REG_INDEX (d20, int8x8_t, uint32_t, 4)
+STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 4)
+
+STORE_REG_INDEX (d20, int8x8_t, int32_t, 8)
+STORE_REG_INDEX (d20, int8x8_t, uint32_t, 8)
+STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 8)
+
+STORE_REG_INDEX (d20, int8x8_t, int32_t, 16)
+STORE_REG_INDEX (d20, int8x8_t, uint32_t, 16)
+STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-2.c
new file mode 100644
index 00000000000..5adb1a86046
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-2.c
@@ -0,0 +1,110 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } } */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/*
+** store_zero_int8x8_t_m264:
+** sub (c[0-9]+), c0, #264
+** str xzr, \[\1\]
+** ret
+*/
+STORE_ZERO_OFFSET (int8x8_t, m264)
+
+/*
+** store_zero_int16x4_t_m257:
+** sub (c[0-9]+), c0, #257
+** str xzr, \[\1\]
+** ret
+*/
+STORE_ZERO_OFFSET (int16x4_t, m257)
+
+/*
+** store_zero_int32x2_t_m256:
+** str xzr, \[c0, #?-256\]
+** ret
+*/
+STORE_ZERO_OFFSET (int32x2_t, m256)
+
+/*
+** store_zero_int64x1_t_m248:
+** str xzr, \[c0, #?-248\]
+** ret
+*/
+STORE_ZERO_OFFSET (int64x1_t, m248)
+
+/*
+** store_zero_float16x4_t_m8:
+** str xzr, \[c0, #?-8\]
+** ret
+*/
+STORE_ZERO_OFFSET (float16x4_t, m8)
+
+/*
+** store_zero_float32x2_t_m1:
+** str xzr, \[c0, #?-1\]
+** ret
+*/
+STORE_ZERO_OFFSET (float32x2_t, m1)
+
+/*
+** store_zero_float32x2_t_1:
+** str xzr, \[c0, #?1\]
+** ret
+*/
+STORE_ZERO_OFFSET (float32x2_t, 1)
+
+/*
+** store_zero_float64x1_t_8:
+** str xzr, \[c0, #?8\]
+** ret
+*/
+STORE_ZERO_OFFSET (float64x1_t, 8)
+
+/*
+** store_zero_int8x8_t_248:
+** str xzr, \[c0, #?248\]
+** ret
+*/
+STORE_ZERO_OFFSET (int8x8_t, 248)
+
+/*
+** store_zero_int8x8_t_255:
+** str xzr, \[c0, #?255\]
+** ret
+*/
+STORE_ZERO_OFFSET (int8x8_t, 255)
+
+/*
+** store_zero_int8x8_t_256:
+** add (c[0-9]+), c0, #?256
+** str xzr, \[\1\]
+** ret
+*/
+STORE_ZERO_OFFSET (int8x8_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence. */
+STORE_ZERO_INDEX (int8x8_t, int32_t, 1)
+STORE_ZERO_INDEX (int8x8_t, uint32_t, 1)
+STORE_ZERO_INDEX (int8x8_t, int8x8_t, 1)
+
+STORE_ZERO_INDEX (int8x8_t, int32_t, 2)
+STORE_ZERO_INDEX (int8x8_t, uint32_t, 2)
+STORE_ZERO_INDEX (int8x8_t, int8x8_t, 2)
+
+STORE_ZERO_INDEX (int8x8_t, int32_t, 4)
+STORE_ZERO_INDEX (int8x8_t, uint32_t, 4)
+STORE_ZERO_INDEX (int8x8_t, int8x8_t, 4)
+
+STORE_ZERO_INDEX (int8x8_t, int32_t, 8)
+STORE_ZERO_INDEX (int8x8_t, uint32_t, 8)
+STORE_ZERO_INDEX (int8x8_t, int8x8_t, 8)
+
+STORE_ZERO_INDEX (int8x8_t, int32_t, 16)
+STORE_ZERO_INDEX (int8x8_t, uint32_t, 16)
+STORE_ZERO_INDEX (int8x8_t, int8x8_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h b/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h
index bd88bef99cf..695cc30543a 100644
--- a/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h
+++ b/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h
@@ -77,7 +77,7 @@
store_zero_##TYPE##_##OFFSET (char *CAP base) \
{ \
TYPE *CAP ptr = (TYPE *CAP) (base + OFFSET); \
- *ptr = 0; \
+ *ptr = (TYPE) { 0 }; \
}
#define STORE_REG_INDEX(REG, TYPE, INDEX_TYPE, SCALE) \
@@ -99,7 +99,7 @@
{ \
ptrdiff_t byte_index = (ptrdiff_t) index * SCALE; \
TYPE *CAP ptr = (TYPE *CAP) (base + byte_index); \
- *ptr = 0; \
+ *ptr = (TYPE) { 0 }; \
}
#define PRE_MODIFY_OFFSET(TYPE, OFFSET) \
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-05-05 12:07 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-05 12:07 [gcc(refs/vendors/ARM/heads/morello)] aarch64: Alternative-base support for 128-bit vectors Matthew Malcomson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).