diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f36665e27bb66e0f1fb42443ce7b506bd2bf6914..bf13f0753a856a13ae92ceeb44291df3dc379a13 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -153,6 +153,19 @@ (define_insn "*aarch64_simd_mov" (set_attr "length" "4,4,4,8,8,8,4")] ) +;; When storing lane zero we can use the normal STR and its more permissive +;; addressing modes. + +(define_insn "aarch64_store_lane0" + [(set (match_operand: 0 "memory_operand" "=m") + (vec_select: (match_operand:VALL_F16 1 "register_operand" "w") + (parallel [(match_operand 2 "const_int_operand" "n")])))] + "TARGET_SIMD + && ENDIAN_LANE_N (mode, INTVAL (operands[2])) == 0" + "str\\t%1, %0" + [(set_attr "type" "neon_store1_1reg")] +) + (define_insn "load_pair" [(set (match_operand:VD 0 "register_operand" "=w") (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump")) diff --git a/gcc/testsuite/gcc.target/aarch64/store_lane0_str_1.c b/gcc/testsuite/gcc.target/aarch64/store_lane0_str_1.c new file mode 100644 index 0000000000000000000000000000000000000000..4464fec2c1f24c212be4fc6c94b509843fd0058e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/store_lane0_str_1.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef int v2si __attribute__ ((vector_size (8))); +typedef float v2sf __attribute__ ((vector_size (8))); +typedef short v4hi __attribute__ ((vector_size (8))); +typedef __fp16 v4hf __attribute__ ((vector_size (8))); +typedef char v8qi __attribute__ ((vector_size (8))); + +typedef int v4si __attribute__ ((vector_size (16))); +typedef float v4sf __attribute__ ((vector_size (16))); +typedef short v8hi __attribute__ ((vector_size (16))); +typedef __fp16 v8hf __attribute__ ((vector_size (16))); +typedef char v16qi __attribute__ ((vector_size (16))); +typedef long long v2di __attribute__ ((vector_size (16))); +typedef double v2df __attribute__ ((vector_size (16))); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define LANE(N) (N - 1) +#else +#define LANE(N) 0 +#endif + +#define FUNC(T, E, N) \ +void \ +store_lane_##T (T x, E *y) \ +{ \ + y[0] = x[N - 1 - LANE (N)]; \ + y[3] = x[LANE (N)]; \ +} + +FUNC (v2si, int, 2) +FUNC (v2sf, float, 2) +FUNC (v4hi, short, 4) +FUNC (v4hf, __fp16, 4) +FUNC (v8qi, char, 8) + +FUNC (v4si, int, 4) +FUNC (v4sf, float, 4) +FUNC (v8hi, short, 8) +FUNC (v8hf, __fp16, 8) +FUNC (v16qi, char, 16) +FUNC (v2di, long long, 2) +FUNC (v2df, double, 2) + +/* When storing lane zero of a vector we can use the scalar STR instruction + that supports more addressing modes. */ + +/* { dg-final { scan-assembler-times "str\ts\[0-9\]+" 4 } } */ +/* { dg-final { scan-assembler-times "str\tb\[0-9\]+" 2 } } */ +/* { dg-final { scan-assembler-times "str\th\[0-9\]+" 4 } } */ +/* { dg-final { scan-assembler-times "str\td\[0-9\]+" 2 } } */ +/* { dg-final { scan-assembler-not "umov" } } */ +/* { dg-final { scan-assembler-not "dup" } } */