public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-7142] aarch64: Extend vec_concat patterns to 8-byte vectors
@ 2022-02-09 16:57 Richard Sandiford
0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2022-02-09 16:57 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:83d7e720cd1d075312e798c4ebd2e093f03465fb
commit r12-7142-g83d7e720cd1d075312e798c4ebd2e093f03465fb
Author: Richard Sandiford <richard.sandiford@arm.com>
Date: Wed Feb 9 16:57:06 2022 +0000
aarch64: Extend vec_concat patterns to 8-byte vectors
This patch extends the previous support for 16-byte vec_concat
so that it supports pairs of 4-byte elements. This too isn't
strictly a regression fix, since the 8-byte forms weren't affected
by the same problems as the 16-byte forms, but it leaves things in
a more consistent state.
gcc/
* config/aarch64/iterators.md (VDCSIF): New mode iterator.
(VDBL): Handle SF.
(single_wx, single_type, single_dtype, dblq): New mode attributes.
* config/aarch64/aarch64-simd.md (load_pair_lanes<mode>): Extend
from VDC to VDCSIF.
(store_pair_lanes<mode>): Likewise.
(*aarch64_combine_internal<mode>): Likewise.
(*aarch64_combine_internal_be<mode>): Likewise.
(*aarch64_combinez<mode>): Likewise.
(*aarch64_combinez_be<mode>): Likewise.
* config/aarch64/aarch64.cc (aarch64_classify_address): Handle
8-byte modes for ADDR_QUERY_LDP_STP_N.
(aarch64_print_operand): Likewise for %y.
gcc/testsuite/
* gcc.target/aarch64/vec-init-13.c: New test.
* gcc.target/aarch64/vec-init-14.c: Likewise.
* gcc.target/aarch64/vec-init-15.c: Likewise.
* gcc.target/aarch64/vec-init-16.c: Likewise.
* gcc.target/aarch64/vec-init-17.c: Likewise.
Diff:
---
gcc/config/aarch64/aarch64-simd.md | 72 +++++++--------
gcc/config/aarch64/aarch64.cc | 16 +++-
gcc/config/aarch64/iterators.md | 38 +++++++-
gcc/testsuite/gcc.target/aarch64/vec-init-13.c | 123 +++++++++++++++++++++++++
gcc/testsuite/gcc.target/aarch64/vec-init-14.c | 123 +++++++++++++++++++++++++
gcc/testsuite/gcc.target/aarch64/vec-init-15.c | 15 +++
gcc/testsuite/gcc.target/aarch64/vec-init-16.c | 12 +++
gcc/testsuite/gcc.target/aarch64/vec-init-17.c | 73 +++++++++++++++
8 files changed, 430 insertions(+), 42 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ef6e772503d..18733428f3f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4243,12 +4243,12 @@
(define_insn "load_pair_lanes<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
(vec_concat:<VDBL>
- (match_operand:VDC 1 "memory_operand" "Utq")
- (match_operand:VDC 2 "memory_operand" "m")))]
+ (match_operand:VDCSIF 1 "memory_operand" "Utq")
+ (match_operand:VDCSIF 2 "memory_operand" "m")))]
"TARGET_SIMD
&& aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
- "ldr\\t%q0, %1"
- [(set_attr "type" "neon_load1_1reg_q")]
+ "ldr\\t%<single_dtype>0, %1"
+ [(set_attr "type" "neon_load1_1reg<dblq>")]
)
;; This STP pattern is a partial duplicate of the general vec_concat patterns
@@ -4273,12 +4273,12 @@
(define_insn "store_pair_lanes<mode>"
[(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
(vec_concat:<VDBL>
- (match_operand:VDC 1 "register_operand" "w, r")
- (match_operand:VDC 2 "register_operand" "w, r")))]
+ (match_operand:VDCSIF 1 "register_operand" "w, r")
+ (match_operand:VDCSIF 2 "register_operand" "w, r")))]
"TARGET_SIMD"
"@
- stp\\t%d1, %d2, %y0
- stp\\t%x1, %x2, %y0"
+ stp\t%<single_type>1, %<single_type>2, %y0
+ stp\t%<single_wx>1, %<single_wx>2, %y0"
[(set_attr "type" "neon_stp, store_16")]
)
@@ -4292,37 +4292,37 @@
(define_insn "*aarch64_combine_internal<mode>"
[(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
(vec_concat:<VDBL>
- (match_operand:VDC 1 "register_operand" "0, 0, 0, ?w, ?r")
- (match_operand:VDC 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))]
+ (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")
+ (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))]
"TARGET_SIMD
&& !BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <VDBL>mode)
|| register_operand (operands[2], <MODE>mode))"
"@
- ins\t%0.d[1], %2.d[0]
- ins\t%0.d[1], %2
- ld1\t{%0.d}[1], %2
- stp\t%d1, %d2, %y0
- stp\t%x1, %x2, %y0"
- [(set_attr "type" "neon_ins_q, neon_from_gp_q, neon_load1_one_lane_q, neon_stp, store_16")]
+ ins\t%0.<single_type>[1], %2.<single_type>[0]
+ ins\t%0.<single_type>[1], %<single_wx>2
+ ld1\t{%0.<single_type>}[1], %2
+ stp\t%<single_type>1, %<single_type>2, %y0
+ stp\t%<single_wx>1, %<single_wx>2, %y0"
+ [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
)
(define_insn "*aarch64_combine_internal_be<mode>"
[(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
(vec_concat:<VDBL>
- (match_operand:VDC 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r")
- (match_operand:VDC 1 "register_operand" "0, 0, 0, ?w, ?r")))]
+ (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r")
+ (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")))]
"TARGET_SIMD
&& BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <VDBL>mode)
|| register_operand (operands[2], <MODE>mode))"
"@
- ins\t%0.d[1], %2.d[0]
- ins\t%0.d[1], %2
- ld1\t{%0.d}[1], %2
- stp\t%d2, %d1, %y0
- stp\t%x2, %x1, %y0"
- [(set_attr "type" "neon_ins_q, neon_from_gp_q, neon_load1_one_lane_q, neon_stp, store_16")]
+ ins\t%0.<single_type>[1], %2.<single_type>[0]
+ ins\t%0.<single_type>[1], %<single_wx>2
+ ld1\t{%0.<single_type>}[1], %2
+ stp\t%<single_type>2, %<single_type>1, %y0
+ stp\t%<single_wx>2, %<single_wx>1, %y0"
+ [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
)
;; In this insn, operand 1 should be low, and operand 2 the high part of the
@@ -4331,13 +4331,13 @@
(define_insn "*aarch64_combinez<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
(vec_concat:<VDBL>
- (match_operand:VDC 1 "nonimmediate_operand" "w,?r,m")
- (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
+ (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")
+ (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"@
- mov\\t%0.8b, %1.8b
- fmov\t%d0, %1
- ldr\\t%d0, %1"
+ fmov\\t%<single_type>0, %<single_type>1
+ fmov\t%<single_type>0, %<single_wx>1
+ ldr\\t%<single_type>0, %1"
[(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
(set_attr "arch" "simd,fp,simd")]
)
@@ -4345,13 +4345,13 @@
(define_insn "*aarch64_combinez_be<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
(vec_concat:<VDBL>
- (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
- (match_operand:VDC 1 "nonimmediate_operand" "w,?r,m")))]
+ (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
+ (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"@
- mov\\t%0.8b, %1.8b
- fmov\t%d0, %1
- ldr\\t%d0, %1"
+ fmov\\t%<single_type>0, %<single_type>1
+ fmov\t%<single_type>0, %<single_wx>1
+ ldr\\t%<single_type>0, %1"
[(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
(set_attr "arch" "simd,fp,simd")]
)
@@ -4362,8 +4362,8 @@
(define_expand "@aarch64_vec_concat<mode>"
[(set (match_operand:<VDBL> 0 "register_operand")
(vec_concat:<VDBL>
- (match_operand:VDC 1 "general_operand")
- (match_operand:VDC 2 "general_operand")))]
+ (match_operand:VDCSIF 1 "general_operand")
+ (match_operand:VDCSIF 2 "general_operand")))]
"TARGET_SIMD"
{
int lo = BYTES_BIG_ENDIAN ? 2 : 1;
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index af42d1bedfe..7bb97bd48e4 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9922,9 +9922,15 @@ aarch64_classify_address (struct aarch64_address_info *info,
/* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
corresponds to the actual size of the memory being loaded/stored and the
mode of the corresponding addressing mode is half of that. */
- if (type == ADDR_QUERY_LDP_STP_N
- && known_eq (GET_MODE_SIZE (mode), 16))
- mode = DFmode;
+ if (type == ADDR_QUERY_LDP_STP_N)
+ {
+ if (known_eq (GET_MODE_SIZE (mode), 16))
+ mode = DFmode;
+ else if (known_eq (GET_MODE_SIZE (mode), 8))
+ mode = SFmode;
+ else
+ return false;
+ }
bool allow_reg_index_p = (!load_store_pair_p
&& ((vec_flags == 0
@@ -11404,7 +11410,9 @@ aarch64_print_operand (FILE *f, rtx x, int code)
machine_mode mode = GET_MODE (x);
if (!MEM_P (x)
- || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
+ || (code == 'y'
+ && maybe_ne (GET_MODE_SIZE (mode), 8)
+ && maybe_ne (GET_MODE_SIZE (mode), 16)))
{
output_operand_lossage ("invalid operand for '%%%c'", code);
return;
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index a0c02e4ac15..88067a3536a 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -236,6 +236,9 @@
;; Double vector modes for combines.
(define_mode_iterator VDC [V8QI V4HI V4BF V4HF V2SI V2SF DI DF])
+;; VDC plus SI and SF.
+(define_mode_iterator VDCSIF [V8QI V4HI V4BF V4HF V2SI V2SF SI SF DI DF])
+
;; Polynomial modes for vector combines.
(define_mode_iterator VDC_P [V8QI V4HI DI])
@@ -1436,8 +1439,8 @@
(define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI")
(V4HF "V8HF") (V4BF "V8BF")
(V2SI "V4SI") (V2SF "V4SF")
- (SI "V2SI") (DI "V2DI")
- (DF "V2DF")])
+ (SI "V2SI") (SF "V2SF")
+ (DI "V2DI") (DF "V2DF")])
;; Register suffix for double-length mode.
(define_mode_attr Vdtype [(V4HF "8h") (V2SF "4s")])
@@ -1557,6 +1560,30 @@
(V4SI "2s") (V8HF "4h")
(V4SF "2s")])
+;; Whether a mode fits in W or X registers (i.e. "w" for 32-bit modes
+;; and "x" for 64-bit modes).
+(define_mode_attr single_wx [(SI "w") (SF "w")
+ (V8QI "x") (V4HI "x")
+ (V4HF "x") (V4BF "x")
+ (V2SI "x") (V2SF "x")
+ (DI "x") (DF "x")])
+
+;; Whether a mode fits in S or D registers (i.e. "s" for 32-bit modes
+;; and "d" for 64-bit modes).
+(define_mode_attr single_type [(SI "s") (SF "s")
+ (V8QI "d") (V4HI "d")
+ (V4HF "d") (V4BF "d")
+ (V2SI "d") (V2SF "d")
+ (DI "d") (DF "d")])
+
+;; Whether a double-width mode fits in D or Q registers (i.e. "d" for
+;; 32-bit modes and "q" for 64-bit modes).
+(define_mode_attr single_dtype [(SI "d") (SF "d")
+ (V8QI "q") (V4HI "q")
+ (V4HF "q") (V4BF "q")
+ (V2SI "q") (V2SF "q")
+ (DI "q") (DF "q")])
+
;; Define corresponding core/FP element mode for each vector mode.
(define_mode_attr vw [(V8QI "w") (V16QI "w")
(V4HI "w") (V8HI "w")
@@ -1849,6 +1876,13 @@
(V4x1DF "") (V4x2DF "_q")
(V4x4BF "") (V4x8BF "_q")])
+;; Equivalent of the "q" attribute for the <VDBL> mode.
+(define_mode_attr dblq [(SI "") (SF "")
+ (V8QI "_q") (V4HI "_q")
+ (V4HF "_q") (V4BF "_q")
+ (V2SI "_q") (V2SF "_q")
+ (DI "_q") (DF "_q")])
+
(define_mode_attr vp [(V8QI "v") (V16QI "v")
(V4HI "v") (V8HI "v")
(V2SI "p") (V4SI "v")
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-13.c b/gcc/testsuite/gcc.target/aarch64/vec-init-13.c
new file mode 100644
index 00000000000..d0f88cbe71a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-13.c
@@ -0,0 +1,123 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** s64q_1:
+** fmov d0, x0
+** ret
+*/
+int64x2_t s64q_1(int64_t a0) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (int64x2_t) { 0, a0 };
+ else
+ return (int64x2_t) { a0, 0 };
+}
+/*
+** s64q_2:
+** ldr d0, \[x0\]
+** ret
+*/
+int64x2_t s64q_2(int64_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (int64x2_t) { 0, ptr[0] };
+ else
+ return (int64x2_t) { ptr[0], 0 };
+}
+/*
+** s64q_3:
+** ldr d0, \[x0, #?8\]
+** ret
+*/
+int64x2_t s64q_3(int64_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (int64x2_t) { 0, ptr[1] };
+ else
+ return (int64x2_t) { ptr[1], 0 };
+}
+
+/*
+** f64q_1:
+** fmov d0, d0
+** ret
+*/
+float64x2_t f64q_1(float64_t a0) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (float64x2_t) { 0, a0 };
+ else
+ return (float64x2_t) { a0, 0 };
+}
+/*
+** f64q_2:
+** ldr d0, \[x0\]
+** ret
+*/
+float64x2_t f64q_2(float64_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (float64x2_t) { 0, ptr[0] };
+ else
+ return (float64x2_t) { ptr[0], 0 };
+}
+/*
+** f64q_3:
+** ldr d0, \[x0, #?8\]
+** ret
+*/
+float64x2_t f64q_3(float64_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (float64x2_t) { 0, ptr[1] };
+ else
+ return (float64x2_t) { ptr[1], 0 };
+}
+
+/*
+** s32q_1:
+** fmov d0, d0
+** ret
+*/
+int32x4_t s32q_1(int32x2_t a0, int32x2_t a1) {
+ return vcombine_s32 (a0, (int32x2_t) { 0, 0 });
+}
+/*
+** s32q_2:
+** ldr d0, \[x0\]
+** ret
+*/
+int32x4_t s32q_2(int32x2_t *ptr) {
+ return vcombine_s32 (ptr[0], (int32x2_t) { 0, 0 });
+}
+/*
+** s32q_3:
+** ldr d0, \[x0, #?8\]
+** ret
+*/
+int32x4_t s32q_3(int32x2_t *ptr) {
+ return vcombine_s32 (ptr[1], (int32x2_t) { 0, 0 });
+}
+
+/*
+** f32q_1:
+** fmov d0, d0
+** ret
+*/
+float32x4_t f32q_1(float32x2_t a0, float32x2_t a1) {
+ return vcombine_f32 (a0, (float32x2_t) { 0, 0 });
+}
+/*
+** f32q_2:
+** ldr d0, \[x0\]
+** ret
+*/
+float32x4_t f32q_2(float32x2_t *ptr) {
+ return vcombine_f32 (ptr[0], (float32x2_t) { 0, 0 });
+}
+/*
+** f32q_3:
+** ldr d0, \[x0, #?8\]
+** ret
+*/
+float32x4_t f32q_3(float32x2_t *ptr) {
+ return vcombine_f32 (ptr[1], (float32x2_t) { 0, 0 });
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-14.c b/gcc/testsuite/gcc.target/aarch64/vec-init-14.c
new file mode 100644
index 00000000000..02875088cd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-14.c
@@ -0,0 +1,123 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+void ext();
+
+/*
+** s32_1:
+** fmov s0, w0
+** ins v0\.s\[1\], w1
+** ret
+*/
+int32x2_t s32_1(int32_t a0, int32_t a1) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (int32x2_t) { a1, a0 };
+ else
+ return (int32x2_t) { a0, a1 };
+}
+/*
+** s32_2:
+** fmov s0, w0
+** ld1 {v0\.s}\[1\], \[x1\]
+** ret
+*/
+int32x2_t s32_2(int32_t a0, int32_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (int32x2_t) { ptr[0], a0 };
+ else
+ return (int32x2_t) { a0, ptr[0] };
+}
+/*
+** s32_3:
+** ldr s0, \[x0\]
+** ins v0\.s\[1\], w1
+** ret
+*/
+int32x2_t s32_3(int32_t *ptr, int32_t a1) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (int32x2_t) { a1, ptr[0] };
+ else
+ return (int32x2_t) { ptr[0], a1 };
+}
+/*
+** s32_4:
+** stp w1, w2, \[x0\]
+** ret
+*/
+void s32_4(int32x2_t *res, int32_t a0, int32_t a1) {
+ res[0] = (int32x2_t) { a0, a1 };
+}
+/*
+** s32_5:
+** stp w1, w2, \[x0, #?4\]
+** ret
+*/
+void s32_5(uintptr_t res, int32_t a0, int32_t a1) {
+ *(int32x2_t *)(res + 4) = (int32x2_t) { a0, a1 };
+}
+/* Currently uses d8 to hold res across the call. */
+int32x2_t s32_6(int32_t a0, int32_t a1) {
+ int32x2_t res = { a0, a1 };
+ ext ();
+ return res;
+}
+
+/*
+** f32_1:
+** ins v0\.s\[1\], v1\.s\[0\]
+** ret
+*/
+float32x2_t f32_1(float32_t a0, float32_t a1) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (float32x2_t) { a1, a0 };
+ else
+ return (float32x2_t) { a0, a1 };
+}
+/*
+** f32_2:
+** ld1 {v0\.s}\[1\], \[x0\]
+** ret
+*/
+float32x2_t f32_2(float32_t a0, float32_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (float32x2_t) { ptr[0], a0 };
+ else
+ return (float32x2_t) { a0, ptr[0] };
+}
+/*
+** f32_3:
+** ldr s0, \[x0\]
+** ins v0\.s\[1\], v1\.s\[0\]
+** ret
+*/
+float32x2_t f32_3(float32_t a0, float32_t a1, float32_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (float32x2_t) { a1, ptr[0] };
+ else
+ return (float32x2_t) { ptr[0], a1 };
+}
+/*
+** f32_4:
+** stp s0, s1, \[x0\]
+** ret
+*/
+void f32_4(float32x2_t *res, float32_t a0, float32_t a1) {
+ res[0] = (float32x2_t) { a0, a1 };
+}
+/*
+** f32_5:
+** stp s0, s1, \[x0, #?4\]
+** ret
+*/
+void f32_5(uintptr_t res, float32_t a0, float32_t a1) {
+ *(float32x2_t *)(res + 4) = (float32x2_t) { a0, a1 };
+}
+/* Currently uses d8 to hold res across the call. */
+float32x2_t f32_6(float32_t a0, float32_t a1) {
+ float32x2_t res = { a0, a1 };
+ ext ();
+ return res;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-15.c b/gcc/testsuite/gcc.target/aarch64/vec-init-15.c
new file mode 100644
index 00000000000..82f0a8f55ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-15.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+int32x2_t f1(int32_t *x, int c) {
+ return c ? (int32x2_t) { x[0], x[2] } : (int32x2_t) { 0, 0 };
+}
+
+int32x2_t f2(int32_t *x, int i0, int i1, int c) {
+ return c ? (int32x2_t) { x[i0], x[i1] } : (int32x2_t) { 0, 0 };
+}
+
+/* { dg-final { scan-assembler-times {\t(?:ldr\ts[0-9]+|ld1\t)} 4 } } */
+/* { dg-final { scan-assembler-not {\tldr\tw} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-16.c b/gcc/testsuite/gcc.target/aarch64/vec-init-16.c
new file mode 100644
index 00000000000..e00aec7a32c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-16.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <arm_neon.h>
+
+void f1(int32x2_t *res, int32_t *x, int c0, int c1) {
+ res[0] = (int32x2_t) { c0 ? x[0] : 0, c1 ? x[2] : 0 };
+}
+
+/* { dg-final { scan-assembler-times {\tldr\tw[0-9]+} 2 } } */
+/* { dg-final { scan-assembler {\tstp\tw[0-9]+, w[0-9]+} } } */
+/* { dg-final { scan-assembler-not {\tldr\ts} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-17.c b/gcc/testsuite/gcc.target/aarch64/vec-init-17.c
new file mode 100644
index 00000000000..86191b3ca1d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-17.c
@@ -0,0 +1,73 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#include <arm_neon.h>
+
+/*
+** s32_1:
+** fmov s0, w0
+** ret
+*/
+int32x2_t s32_1(int32_t a0) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (int32x2_t) { 0, a0 };
+ else
+ return (int32x2_t) { a0, 0 };
+}
+/*
+** s32_2:
+** ldr s0, \[x0\]
+** ret
+*/
+int32x2_t s32_2(int32_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (int32x2_t) { 0, ptr[0] };
+ else
+ return (int32x2_t) { ptr[0], 0 };
+}
+/*
+** s32_3:
+** ldr s0, \[x0, #?4\]
+** ret
+*/
+int32x2_t s32_3(int32_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (int32x2_t) { 0, ptr[1] };
+ else
+ return (int32x2_t) { ptr[1], 0 };
+}
+
+/*
+** f32_1:
+** fmov s0, s0
+** ret
+*/
+float32x2_t f32_1(float32_t a0) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (float32x2_t) { 0, a0 };
+ else
+ return (float32x2_t) { a0, 0 };
+}
+/*
+** f32_2:
+** ldr s0, \[x0\]
+** ret
+*/
+float32x2_t f32_2(float32_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (float32x2_t) { 0, ptr[0] };
+ else
+ return (float32x2_t) { ptr[0], 0 };
+}
+/*
+** f32_3:
+** ldr s0, \[x0, #?4\]
+** ret
+*/
+float32x2_t f32_3(float32_t *ptr) {
+ if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ return (float32x2_t) { 0, ptr[1] };
+ else
+ return (float32x2_t) { ptr[1], 0 };
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-02-09 16:57 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-09 16:57 [gcc r12-7142] aarch64: Extend vec_concat patterns to 8-byte vectors Richard Sandiford
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).