[gcc(refs/vendors/ARM/heads/morello)] aarch64: Alternative-base support for 128-bit vectors

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/vendors/ARM/heads/morello)] aarch64: Alternative-base support for 128-bit vectors
@ 2022-05-05 12:07 Matthew Malcomson
  0 siblings, 0 replies; only message in thread
From: Matthew Malcomson @ 2022-05-05 12:07 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:10533b003b7f23a6028a3d0a226c75af8712e122

commit 10533b003b7f23a6028a3d0a226c75af8712e122
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Fri Apr 8 13:49:01 2022 +0100

    aarch64: Alternative-base support for 128-bit vectors
    
    The 128-bit vector move patterns don't allow direct GPR loads
    and stores, but they do allow storing zero as a pair of XZRs.
    For alternative-base addresses we need to split that into two stores.
    (It might be more efficient in some cases to zero a Q register and
    store that, but that should be handled by the cost model.)
    
    TI and TF addresses are required to be valid for both GPRs and FPRs,
    but 128-bit vector addresses are only required to be valid for FPRs
    (which makes sense given the use case).  We can't therefore split
    a zero store for all valid addresses: we need to require an address
    whose DImode halves are both legitimate, as for TImode.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md                 |  11 +-
 gcc/config/aarch64/constraints.md                  |   6 +
 .../aarch64/morello/alt-base-load-v128-1.c         | 128 +++++++++++++
 .../aarch64/morello/alt-base-load-v64-1.c          | 211 +++++++++++++++++++++
 .../aarch64/morello/alt-base-store-v128-1.c        | 128 +++++++++++++
 .../aarch64/morello/alt-base-store-v128-2.c        |  40 ++++
 .../aarch64/morello/alt-base-store-v64-1.c         | 211 +++++++++++++++++++++
 .../aarch64/morello/alt-base-store-v64-2.c         | 110 +++++++++++
 .../gcc.target/aarch64/morello/load-store-utils.h  |   4 +-
 9 files changed, 842 insertions(+), 7 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 47f84773d9e..a82c662e867 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -133,9 +133,9 @@
 
 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
   [(set (match_operand:VQMOV 0 "nonimmediate_operand"
-		"=w, Umn,  m,  w, ?r, ?w, ?r, w")
+		"=w, Umn,  m,  w, ?r, ?w, ?r, ?UAt, w")
 	(match_operand:VQMOV 1 "general_operand"
-		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
+		"m,  Dz,   w,  w,  w,  r,  r,   Dz, Dn"))]
   "TARGET_SIMD
    && (register_operand (operands[0], <MODE>mode)
        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
@@ -153,8 +153,9 @@
     case 4:
     case 5:
     case 6:
-	return "#";
     case 7:
+	return "#";
+    case 8:
 	return aarch64_output_simd_mov_immediate (operands[1], 128);
     default:
 	gcc_unreachable ();
@@ -162,8 +163,8 @@
 }
   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 		     neon_logic<q>, multiple, multiple,\
-		     multiple, neon_move<q>")
-   (set_attr "length" "4,4,4,4,8,8,8,4")]
+		     multiple, store_16, neon_move<q>")
+   (set_attr "length" "4,4,4,4,8,8,8,8,4")]
 )
 
 ;; When storing lane zero we can use the normal STR and its more permissive
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 42d92a3bb97..c80a5fed2db 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -383,6 +383,12 @@
    A general memory operand with a normal base register"
   (match_operand 0 "aarch64_normal_base_mem_operand"))
 
+(define_memory_constraint "UAt"
+  "@internal
+   A TI memory operand with an alternative base register"
+  (and (match_code "mem")
+       (match_test "aarch64_alt_base_address_p (TImode, XEXP (op, 0))")))
+
 (define_memory_constraint "UAu"
   "@internal
    Either a general memory operand with a normal base register or
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v128-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v128-1.c
new file mode 100644
index 00000000000..81cd42e002a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v128-1.c
@@ -0,0 +1,128 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/* Check for valid asm, but don't mandate a parint8x16_tcular sequence.  */
+LOAD_REG_OFFSET (x10, int8x16_t, m257)
+LOAD_REG_OFFSET (x10, int16x8_t, m256)
+LOAD_REG_OFFSET (x10, int32x2_t, m255)
+LOAD_REG_OFFSET (x10, int64x2_t, m1)
+LOAD_REG_OFFSET (x10, float16x4_t, 1)
+LOAD_REG_OFFSET (x10, bfloat16x4_t, 247)
+LOAD_REG_OFFSET (x10, float32x4_t, 248)
+LOAD_REG_OFFSET (x10, float64x2_t, 249)
+LOAD_REG_OFFSET (x10, int8x16_t, 256)
+LOAD_REG_OFFSET (x10, int8x16_t, 511)
+LOAD_REG_OFFSET (x10, int8x16_t, 512)
+
+LOAD_REG_INDEX (x10, int8x16_t, int32_t, 1)
+LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 1)
+LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 1)
+
+LOAD_REG_INDEX (x10, int8x16_t, int32_t, 2)
+LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 2)
+LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 2)
+
+LOAD_REG_INDEX (x10, int8x16_t, int32_t, 4)
+LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 4)
+LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 4)
+
+LOAD_REG_INDEX (x10, int8x16_t, int32_t, 8)
+LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 8)
+LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 8)
+
+LOAD_REG_INDEX (x10, int8x16_t, int32_t, 16)
+LOAD_REG_INDEX (x10, int8x16_t, uint32_t, 16)
+LOAD_REG_INDEX (x10, int8x16_t, uint64_t, 16)
+
+/*
+** load_q20_int8x16_t_m257:
+**	sub	(c[0-9]+), c0, #257
+**	ldr	q20, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, int8x16_t, m257)
+
+/*
+** load_q20_int16x8_t_m256:
+**	ldr	q20, \[c0, #?-256\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, int16x8_t, m256)
+
+/*
+** load_q20_int32x4_t_m255:
+**	ldr	q20, \[c0, #?-255\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, int32x4_t, m255)
+
+/*
+** load_q20_int64x2_t_m1:
+**	ldr	q20, \[c0, #?-1\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, int64x2_t, m1)
+
+/*
+** load_q20_float16x8_t_1:
+**	ldr	q20, \[c0, #?1\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, float16x8_t, 1)
+
+/*
+** load_q20_bfloat16x8_t_247:
+**	ldr	q20, \[c0, #?247\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, bfloat16x8_t, 247)
+
+/*
+** load_q20_float32x4_t_248:
+**	ldr	q20, \[c0, #?248\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, float32x4_t, 248)
+
+/*
+** load_q20_float64x2_t_249:
+**	ldr	q20, \[c0, #?249\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, float64x2_t, 249)
+
+/*
+** load_q20_int8x16_t_256:
+**	add	(c[0-9]+), c0, #?256
+**	ldr	q20, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (q20, int8x16_t, 256)
+
+/* Check for valid asm, but don't mandate a parint8x16_tcular sequence.  */
+LOAD_REG_INDEX (q20, int8x16_t, int32_t, 1)
+LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 1)
+LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 1)
+
+LOAD_REG_INDEX (q20, int8x16_t, int32_t, 2)
+LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 2)
+LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 2)
+
+LOAD_REG_INDEX (q20, int8x16_t, int32_t, 4)
+LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 4)
+LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 4)
+
+LOAD_REG_INDEX (q20, int8x16_t, int32_t, 8)
+LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 8)
+LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 8)
+
+LOAD_REG_INDEX (q20, int8x16_t, int32_t, 16)
+LOAD_REG_INDEX (q20, int8x16_t, uint32_t, 16)
+LOAD_REG_INDEX (q20, int8x16_t, uint64_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v64-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v64-1.c
new file mode 100644
index 00000000000..424432694a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-load-v64-1.c
@@ -0,0 +1,211 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/*
+** load_x10_int8x8_t_m264:
+**	sub	(c[0-9]+), c0, #264
+**	ldr	x10, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, int8x8_t, m264)
+
+/*
+** load_x10_int16x4_t_m257:
+**	sub	(c[0-9]+), c0, #257
+**	ldr	x10, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, int16x4_t, m257)
+
+/*
+** load_x10_int32x2_t_m256:
+**	ldr	x10, \[c0, #?-256\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, int32x2_t, m256)
+
+/*
+** load_x10_int64x1_t_m248:
+**	ldr	x10, \[c0, #?-248\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, int64x1_t, m248)
+
+/*
+** load_x10_float16x4_t_m8:
+**	ldr	x10, \[c0, #?-8\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, float16x4_t, m8)
+
+/*
+** load_x10_bfloat16x4_t_m1:
+**	ldr	x10, \[c0, #?-1\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, bfloat16x4_t, m1)
+
+/*
+** load_x10_float32x2_t_1:
+**	ldr	x10, \[c0, #?1\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, float32x2_t, 1)
+
+/*
+** load_x10_float64x1_t_8:
+**	ldr	x10, \[c0, #?8\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, float64x1_t, 8)
+
+/*
+** load_x10_int8x8_t_248:
+**	ldr	x10, \[c0, #?248\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, int8x8_t, 248)
+
+/*
+** load_x10_int8x8_t_255:
+**	ldr	x10, \[c0, #?255\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, int8x8_t, 255)
+
+/*
+** load_x10_int8x8_t_256:
+**	add	(c[0-9]+), c0, #?256
+**	ldr	x10, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (x10, int8x8_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+LOAD_REG_INDEX (x10, int8x8_t, int32_t, 1)
+LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 1)
+LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 1)
+
+LOAD_REG_INDEX (x10, int8x8_t, int32_t, 2)
+LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 2)
+LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 2)
+
+LOAD_REG_INDEX (x10, int8x8_t, int32_t, 4)
+LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 4)
+LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 4)
+
+LOAD_REG_INDEX (x10, int8x8_t, int32_t, 8)
+LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 8)
+LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 8)
+
+LOAD_REG_INDEX (x10, int8x8_t, int32_t, 16)
+LOAD_REG_INDEX (x10, int8x8_t, uint32_t, 16)
+LOAD_REG_INDEX (x10, int8x8_t, int8x8_t, 16)
+
+/*
+** load_d20_int8x8_t_m264:
+**	sub	(c[0-9]+), c0, #264
+**	ldr	d20, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, int8x8_t, m264)
+
+/*
+** load_d20_int16x4_t_m257:
+**	sub	(c[0-9]+), c0, #257
+**	ldr	d20, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, int16x4_t, m257)
+
+/*
+** load_d20_int32x2_t_m256:
+**	ldr	d20, \[c0, #?-256\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, int32x2_t, m256)
+
+/*
+** load_d20_int64x1_t_m248:
+**	ldr	d20, \[c0, #?-248\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, int64x1_t, m248)
+
+/*
+** load_d20_float16x4_t_m8:
+**	ldr	d20, \[c0, #?-8\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, float16x4_t, m8)
+
+/*
+** load_d20_bfloat16x4_t_m1:
+**	ldr	d20, \[c0, #?-1\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, bfloat16x4_t, m1)
+
+/*
+** load_d20_float32x2_t_1:
+**	ldr	d20, \[c0, #?1\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, float32x2_t, 1)
+
+/*
+** load_d20_float64x1_t_8:
+**	ldr	d20, \[c0, #?8\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, float64x1_t, 8)
+
+/*
+** load_d20_int8x8_t_248:
+**	ldr	d20, \[c0, #?248\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, int8x8_t, 248)
+
+/*
+** load_d20_int8x8_t_255:
+**	ldr	d20, \[c0, #?255\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, int8x8_t, 255)
+
+/*
+** load_d20_int8x8_t_256:
+**	add	(c[0-9]+), c0, #?256
+**	ldr	d20, \[\1\]
+**	ret
+*/
+LOAD_REG_OFFSET (d20, int8x8_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+LOAD_REG_INDEX (d20, int8x8_t, int32_t, 1)
+LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 1)
+LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 1)
+
+LOAD_REG_INDEX (d20, int8x8_t, int32_t, 2)
+LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 2)
+LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 2)
+
+LOAD_REG_INDEX (d20, int8x8_t, int32_t, 4)
+LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 4)
+LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 4)
+
+LOAD_REG_INDEX (d20, int8x8_t, int32_t, 8)
+LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 8)
+LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 8)
+
+LOAD_REG_INDEX (d20, int8x8_t, int32_t, 16)
+LOAD_REG_INDEX (d20, int8x8_t, uint32_t, 16)
+LOAD_REG_INDEX (d20, int8x8_t, int8x8_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-1.c
new file mode 100644
index 00000000000..6f982899da0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-1.c
@@ -0,0 +1,128 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+STORE_REG_OFFSET (x10, int8x16_t, m257)
+STORE_REG_OFFSET (x10, int16x8_t, m256)
+STORE_REG_OFFSET (x10, int32x4_t, m255)
+STORE_REG_OFFSET (x10, int64x2_t, m1)
+STORE_REG_OFFSET (x10, float16x8_t, 1)
+STORE_REG_OFFSET (x10, bfloat16x8_t, 247)
+STORE_REG_OFFSET (x10, float32x4_t, 248)
+STORE_REG_OFFSET (x10, float64x2_t, 249)
+STORE_REG_OFFSET (x10, int8x16_t, 256)
+STORE_REG_OFFSET (x10, int8x16_t, 511)
+STORE_REG_OFFSET (x10, int8x16_t, 512)
+
+STORE_REG_INDEX (x10, int8x16_t, int32_t, 1)
+STORE_REG_INDEX (x10, int8x16_t, uint32_t, 1)
+STORE_REG_INDEX (x10, int8x16_t, uint64_t, 1)
+
+STORE_REG_INDEX (x10, int8x16_t, int32_t, 2)
+STORE_REG_INDEX (x10, int8x16_t, uint32_t, 2)
+STORE_REG_INDEX (x10, int8x16_t, uint64_t, 2)
+
+STORE_REG_INDEX (x10, int8x16_t, int32_t, 4)
+STORE_REG_INDEX (x10, int8x16_t, uint32_t, 4)
+STORE_REG_INDEX (x10, int8x16_t, uint64_t, 4)
+
+STORE_REG_INDEX (x10, int8x16_t, int32_t, 8)
+STORE_REG_INDEX (x10, int8x16_t, uint32_t, 8)
+STORE_REG_INDEX (x10, int8x16_t, uint64_t, 8)
+
+STORE_REG_INDEX (x10, int8x16_t, int32_t, 16)
+STORE_REG_INDEX (x10, int8x16_t, uint32_t, 16)
+STORE_REG_INDEX (x10, int8x16_t, uint64_t, 16)
+
+/*
+** store_q20_int8x16_t_m257:
+**	sub	(c[0-9]+), c0, #257
+**	str	q20, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, int8x16_t, m257)
+
+/*
+** store_q20_int16x8_t_m256:
+**	str	q20, \[c0, #?-256\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, int16x8_t, m256)
+
+/*
+** store_q20_int32x4_t_m255:
+**	str	q20, \[c0, #?-255\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, int32x4_t, m255)
+
+/*
+** store_q20_int64x2_t_m1:
+**	str	q20, \[c0, #?-1\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, int64x2_t, m1)
+
+/*
+** store_q20_float16x8_t_1:
+**	str	q20, \[c0, #?1\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, float16x8_t, 1)
+
+/*
+** store_q20_bfloat16x8_t_247:
+**	str	q20, \[c0, #?247\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, bfloat16x8_t, 247)
+
+/*
+** store_q20_float32x4_t_248:
+**	str	q20, \[c0, #?248\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, float32x4_t, 248)
+
+/*
+** store_q20_float64x2_t_249:
+**	str	q20, \[c0, #?249\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, float64x2_t, 249)
+
+/*
+** store_q20_int8x16_t_256:
+**	add	(c[0-9]+), c0, #?256
+**	str	q20, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (q20, int8x16_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+STORE_REG_INDEX (q20, int8x16_t, int32_t, 1)
+STORE_REG_INDEX (q20, int8x16_t, uint32_t, 1)
+STORE_REG_INDEX (q20, int8x16_t, uint64_t, 1)
+
+STORE_REG_INDEX (q20, int8x16_t, int32_t, 2)
+STORE_REG_INDEX (q20, int8x16_t, uint32_t, 2)
+STORE_REG_INDEX (q20, int8x16_t, uint64_t, 2)
+
+STORE_REG_INDEX (q20, int8x16_t, int32_t, 4)
+STORE_REG_INDEX (q20, int8x16_t, uint32_t, 4)
+STORE_REG_INDEX (q20, int8x16_t, uint64_t, 4)
+
+STORE_REG_INDEX (q20, int8x16_t, int32_t, 8)
+STORE_REG_INDEX (q20, int8x16_t, uint32_t, 8)
+STORE_REG_INDEX (q20, int8x16_t, uint64_t, 8)
+
+STORE_REG_INDEX (q20, int8x16_t, int32_t, 16)
+STORE_REG_INDEX (q20, int8x16_t, uint32_t, 16)
+STORE_REG_INDEX (q20, int8x16_t, uint64_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-2.c
new file mode 100644
index 00000000000..6eabef67de4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v128-2.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+STORE_ZERO_OFFSET (int8x16_t, m257)
+STORE_ZERO_OFFSET (int16x8_t, m256)
+STORE_ZERO_OFFSET (int32x4_t, m255)
+STORE_ZERO_OFFSET (int64x2_t, m1)
+STORE_ZERO_OFFSET (float16x8_t, 1)
+STORE_ZERO_OFFSET (float32x4_t, 247)
+STORE_ZERO_OFFSET (float64x2_t, 248)
+STORE_ZERO_OFFSET (int8x16_t, 249)
+STORE_ZERO_OFFSET (int8x16_t, 256)
+STORE_ZERO_OFFSET (int8x16_t, 511)
+STORE_ZERO_OFFSET (int8x16_t, 512)
+
+STORE_ZERO_INDEX (int8x16_t, int32_t, 1)
+STORE_ZERO_INDEX (int8x16_t, uint32_t, 1)
+STORE_ZERO_INDEX (int8x16_t, uint64_t, 1)
+
+STORE_ZERO_INDEX (int8x16_t, int32_t, 2)
+STORE_ZERO_INDEX (int8x16_t, uint32_t, 2)
+STORE_ZERO_INDEX (int8x16_t, uint64_t, 2)
+
+STORE_ZERO_INDEX (int8x16_t, int32_t, 4)
+STORE_ZERO_INDEX (int8x16_t, uint32_t, 4)
+STORE_ZERO_INDEX (int8x16_t, uint64_t, 4)
+
+STORE_ZERO_INDEX (int8x16_t, int32_t, 8)
+STORE_ZERO_INDEX (int8x16_t, uint32_t, 8)
+STORE_ZERO_INDEX (int8x16_t, uint64_t, 8)
+
+STORE_ZERO_INDEX (int8x16_t, int32_t, 16)
+STORE_ZERO_INDEX (int8x16_t, uint32_t, 16)
+STORE_ZERO_INDEX (int8x16_t, uint64_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-1.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-1.c
new file mode 100644
index 00000000000..cae7b40bfd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-1.c
@@ -0,0 +1,211 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/*
+** store_x10_int8x8_t_m264:
+**	sub	(c[0-9]+), c0, #264
+**	str	x10, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, int8x8_t, m264)
+
+/*
+** store_x10_int16x4_t_m257:
+**	sub	(c[0-9]+), c0, #257
+**	str	x10, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, int16x4_t, m257)
+
+/*
+** store_x10_int32x2_t_m256:
+**	str	x10, \[c0, #?-256\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, int32x2_t, m256)
+
+/*
+** store_x10_int64x1_t_m248:
+**	str	x10, \[c0, #?-248\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, int64x1_t, m248)
+
+/*
+** store_x10_float16x4_t_m8:
+**	str	x10, \[c0, #?-8\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, float16x4_t, m8)
+
+/*
+** store_x10_bfloat16x4_t_m1:
+**	str	x10, \[c0, #?-1\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, bfloat16x4_t, m1)
+
+/*
+** store_x10_float32x2_t_1:
+**	str	x10, \[c0, #?1\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, float32x2_t, 1)
+
+/*
+** store_x10_float64x1_t_8:
+**	str	x10, \[c0, #?8\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, float64x1_t, 8)
+
+/*
+** store_x10_int8x8_t_248:
+**	str	x10, \[c0, #?248\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, int8x8_t, 248)
+
+/*
+** store_x10_int8x8_t_255:
+**	str	x10, \[c0, #?255\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, int8x8_t, 255)
+
+/*
+** store_x10_int8x8_t_256:
+**	add	(c[0-9]+), c0, #?256
+**	str	x10, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (x10, int8x8_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+STORE_REG_INDEX (x10, int8x8_t, int32_t, 1)
+STORE_REG_INDEX (x10, int8x8_t, uint32_t, 1)
+STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 1)
+
+STORE_REG_INDEX (x10, int8x8_t, int32_t, 2)
+STORE_REG_INDEX (x10, int8x8_t, uint32_t, 2)
+STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 2)
+
+STORE_REG_INDEX (x10, int8x8_t, int32_t, 4)
+STORE_REG_INDEX (x10, int8x8_t, uint32_t, 4)
+STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 4)
+
+STORE_REG_INDEX (x10, int8x8_t, int32_t, 8)
+STORE_REG_INDEX (x10, int8x8_t, uint32_t, 8)
+STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 8)
+
+STORE_REG_INDEX (x10, int8x8_t, int32_t, 16)
+STORE_REG_INDEX (x10, int8x8_t, uint32_t, 16)
+STORE_REG_INDEX (x10, int8x8_t, int8x8_t, 16)
+
+/*
+** store_d20_int8x8_t_m264:
+**	sub	(c[0-9]+), c0, #264
+**	str	d20, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, int8x8_t, m264)
+
+/*
+** store_d20_int16x4_t_m257:
+**	sub	(c[0-9]+), c0, #257
+**	str	d20, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, int16x4_t, m257)
+
+/*
+** store_d20_int32x2_t_m256:
+**	str	d20, \[c0, #?-256\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, int32x2_t, m256)
+
+/*
+** store_d20_int64x1_t_m248:
+**	str	d20, \[c0, #?-248\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, int64x1_t, m248)
+
+/*
+** store_d20_float16x4_t_m8:
+**	str	d20, \[c0, #?-8\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, float16x4_t, m8)
+
+/*
+** store_d20_bfloat16x4_t_m1:
+**	str	d20, \[c0, #?-1\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, bfloat16x4_t, m1)
+
+/*
+** store_d20_float32x2_t_1:
+**	str	d20, \[c0, #?1\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, float32x2_t, 1)
+
+/*
+** store_d20_float64x1_t_8:
+**	str	d20, \[c0, #?8\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, float64x1_t, 8)
+
+/*
+** store_d20_int8x8_t_248:
+**	str	d20, \[c0, #?248\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, int8x8_t, 248)
+
+/*
+** store_d20_int8x8_t_255:
+**	str	d20, \[c0, #?255\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, int8x8_t, 255)
+
+/*
+** store_d20_int8x8_t_256:
+**	add	(c[0-9]+), c0, #?256
+**	str	d20, \[\1\]
+**	ret
+*/
+STORE_REG_OFFSET (d20, int8x8_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+STORE_REG_INDEX (d20, int8x8_t, int32_t, 1)
+STORE_REG_INDEX (d20, int8x8_t, uint32_t, 1)
+STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 1)
+
+STORE_REG_INDEX (d20, int8x8_t, int32_t, 2)
+STORE_REG_INDEX (d20, int8x8_t, uint32_t, 2)
+STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 2)
+
+STORE_REG_INDEX (d20, int8x8_t, int32_t, 4)
+STORE_REG_INDEX (d20, int8x8_t, uint32_t, 4)
+STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 4)
+
+STORE_REG_INDEX (d20, int8x8_t, int32_t, 8)
+STORE_REG_INDEX (d20, int8x8_t, uint32_t, 8)
+STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 8)
+
+STORE_REG_INDEX (d20, int8x8_t, int32_t, 16)
+STORE_REG_INDEX (d20, int8x8_t, uint32_t, 16)
+STORE_REG_INDEX (d20, int8x8_t, int8x8_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-2.c b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-2.c
new file mode 100644
index 00000000000..5adb1a86046
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/morello/alt-base-store-v64-2.c
@@ -0,0 +1,110 @@
+/* { dg-do assemble } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" ""  { {-O[123s]} } } } */
+/* { dg-skip-if "" { *-*-* } { "-mabi=purecap" "-mfake-capability" } { "" } }  */
+
+#include <arm_neon.h>
+
+#define ALT_BASE
+#include "load-store-utils.h"
+
+/*
+** store_zero_int8x8_t_m264:
+**	sub	(c[0-9]+), c0, #264
+**	str	xzr, \[\1\]
+**	ret
+*/
+STORE_ZERO_OFFSET (int8x8_t, m264)
+
+/*
+** store_zero_int16x4_t_m257:
+**	sub	(c[0-9]+), c0, #257
+**	str	xzr, \[\1\]
+**	ret
+*/
+STORE_ZERO_OFFSET (int16x4_t, m257)
+
+/*
+** store_zero_int32x2_t_m256:
+**	str	xzr, \[c0, #?-256\]
+**	ret
+*/
+STORE_ZERO_OFFSET (int32x2_t, m256)
+
+/*
+** store_zero_int64x1_t_m248:
+**	str	xzr, \[c0, #?-248\]
+**	ret
+*/
+STORE_ZERO_OFFSET (int64x1_t, m248)
+
+/*
+** store_zero_float16x4_t_m8:
+**	str	xzr, \[c0, #?-8\]
+**	ret
+*/
+STORE_ZERO_OFFSET (float16x4_t, m8)
+
+/*
+** store_zero_float32x2_t_m1:
+**	str	xzr, \[c0, #?-1\]
+**	ret
+*/
+STORE_ZERO_OFFSET (float32x2_t, m1)
+
+/*
+** store_zero_float32x2_t_1:
+**	str	xzr, \[c0, #?1\]
+**	ret
+*/
+STORE_ZERO_OFFSET (float32x2_t, 1)
+
+/*
+** store_zero_float64x1_t_8:
+**	str	xzr, \[c0, #?8\]
+**	ret
+*/
+STORE_ZERO_OFFSET (float64x1_t, 8)
+
+/*
+** store_zero_int8x8_t_248:
+**	str	xzr, \[c0, #?248\]
+**	ret
+*/
+STORE_ZERO_OFFSET (int8x8_t, 248)
+
+/*
+** store_zero_int8x8_t_255:
+**	str	xzr, \[c0, #?255\]
+**	ret
+*/
+STORE_ZERO_OFFSET (int8x8_t, 255)
+
+/*
+** store_zero_int8x8_t_256:
+**	add	(c[0-9]+), c0, #?256
+**	str	xzr, \[\1\]
+**	ret
+*/
+STORE_ZERO_OFFSET (int8x8_t, 256)
+
+/* Check for valid asm, but don't mandate a particular sequence.  */
+STORE_ZERO_INDEX (int8x8_t, int32_t, 1)
+STORE_ZERO_INDEX (int8x8_t, uint32_t, 1)
+STORE_ZERO_INDEX (int8x8_t, int8x8_t, 1)
+
+STORE_ZERO_INDEX (int8x8_t, int32_t, 2)
+STORE_ZERO_INDEX (int8x8_t, uint32_t, 2)
+STORE_ZERO_INDEX (int8x8_t, int8x8_t, 2)
+
+STORE_ZERO_INDEX (int8x8_t, int32_t, 4)
+STORE_ZERO_INDEX (int8x8_t, uint32_t, 4)
+STORE_ZERO_INDEX (int8x8_t, int8x8_t, 4)
+
+STORE_ZERO_INDEX (int8x8_t, int32_t, 8)
+STORE_ZERO_INDEX (int8x8_t, uint32_t, 8)
+STORE_ZERO_INDEX (int8x8_t, int8x8_t, 8)
+
+STORE_ZERO_INDEX (int8x8_t, int32_t, 16)
+STORE_ZERO_INDEX (int8x8_t, uint32_t, 16)
+STORE_ZERO_INDEX (int8x8_t, int8x8_t, 16)
diff --git a/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h b/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h
index bd88bef99cf..695cc30543a 100644
--- a/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h
+++ b/gcc/testsuite/gcc.target/aarch64/morello/load-store-utils.h
@@ -77,7 +77,7 @@
   store_zero_##TYPE##_##OFFSET (char *CAP base)				\
   {									\
     TYPE *CAP ptr = (TYPE *CAP) (base + OFFSET);			\
-    *ptr = 0;								\
+    *ptr = (TYPE) { 0 };						\
   }
 
 #define STORE_REG_INDEX(REG, TYPE, INDEX_TYPE, SCALE)			\
@@ -99,7 +99,7 @@
   {									\
     ptrdiff_t byte_index = (ptrdiff_t) index * SCALE;			\
     TYPE *CAP ptr = (TYPE *CAP) (base + byte_index);			\
-    *ptr = 0;								\
+    *ptr = (TYPE) { 0 };						\
   }
 
 #define PRE_MODIFY_OFFSET(TYPE, OFFSET)					\


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-05 12:07 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-05 12:07 [gcc(refs/vendors/ARM/heads/morello)] aarch64: Alternative-base support for 128-bit vectors Matthew Malcomson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).