From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1130) id 700C43858436; Wed, 9 Feb 2022 16:57:57 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 700C43858436 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Richard Sandiford To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-7142] aarch64: Extend vec_concat patterns to 8-byte vectors X-Act-Checkin: gcc X-Git-Author: Richard Sandiford X-Git-Refname: refs/heads/trunk X-Git-Oldrev: bce43c0493f65d2589776f0dafa396d5477a84c7 X-Git-Newrev: 83d7e720cd1d075312e798c4ebd2e093f03465fb Message-Id: <20220209165757.700C43858436@sourceware.org> Date: Wed, 9 Feb 2022 16:57:57 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 09 Feb 2022 16:57:57 -0000 https://gcc.gnu.org/g:83d7e720cd1d075312e798c4ebd2e093f03465fb commit r12-7142-g83d7e720cd1d075312e798c4ebd2e093f03465fb Author: Richard Sandiford Date: Wed Feb 9 16:57:06 2022 +0000 aarch64: Extend vec_concat patterns to 8-byte vectors This patch extends the previous support for 16-byte vec_concat so that it supports pairs of 4-byte elements. This too isn't strictly a regression fix, since the 8-byte forms weren't affected by the same problems as the 16-byte forms, but it leaves things in a more consistent state. gcc/ * config/aarch64/iterators.md (VDCSIF): New mode iterator. (VDBL): Handle SF. (single_wx, single_type, single_dtype, dblq): New mode attributes. * config/aarch64/aarch64-simd.md (load_pair_lanes): Extend from VDC to VDCSIF. (store_pair_lanes): Likewise. (*aarch64_combine_internal): Likewise. (*aarch64_combine_internal_be): Likewise. (*aarch64_combinez): Likewise. (*aarch64_combinez_be): Likewise. * config/aarch64/aarch64.cc (aarch64_classify_address): Handle 8-byte modes for ADDR_QUERY_LDP_STP_N. (aarch64_print_operand): Likewise for %y. gcc/testsuite/ * gcc.target/aarch64/vec-init-13.c: New test. * gcc.target/aarch64/vec-init-14.c: Likewise. * gcc.target/aarch64/vec-init-15.c: Likewise. * gcc.target/aarch64/vec-init-16.c: Likewise. * gcc.target/aarch64/vec-init-17.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-simd.md | 72 +++++++-------- gcc/config/aarch64/aarch64.cc | 16 +++- gcc/config/aarch64/iterators.md | 38 +++++++- gcc/testsuite/gcc.target/aarch64/vec-init-13.c | 123 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/vec-init-14.c | 123 +++++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/vec-init-15.c | 15 +++ gcc/testsuite/gcc.target/aarch64/vec-init-16.c | 12 +++ gcc/testsuite/gcc.target/aarch64/vec-init-17.c | 73 +++++++++++++++ 8 files changed, 430 insertions(+), 42 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index ef6e772503d..18733428f3f 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4243,12 +4243,12 @@ (define_insn "load_pair_lanes" [(set (match_operand: 0 "register_operand" "=w") (vec_concat: - (match_operand:VDC 1 "memory_operand" "Utq") - (match_operand:VDC 2 "memory_operand" "m")))] + (match_operand:VDCSIF 1 "memory_operand" "Utq") + (match_operand:VDCSIF 2 "memory_operand" "m")))] "TARGET_SIMD && aarch64_mergeable_load_pair_p (mode, operands[1], operands[2])" - "ldr\\t%q0, %1" - [(set_attr "type" "neon_load1_1reg_q")] + "ldr\\t%0, %1" + [(set_attr "type" "neon_load1_1reg")] ) ;; This STP pattern is a partial duplicate of the general vec_concat patterns @@ -4273,12 +4273,12 @@ (define_insn "store_pair_lanes" [(set (match_operand: 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn") (vec_concat: - (match_operand:VDC 1 "register_operand" "w, r") - (match_operand:VDC 2 "register_operand" "w, r")))] + (match_operand:VDCSIF 1 "register_operand" "w, r") + (match_operand:VDCSIF 2 "register_operand" "w, r")))] "TARGET_SIMD" "@ - stp\\t%d1, %d2, %y0 - stp\\t%x1, %x2, %y0" + stp\t%1, %2, %y0 + stp\t%1, %2, %y0" [(set_attr "type" "neon_stp, store_16")] ) @@ -4292,37 +4292,37 @@ (define_insn "*aarch64_combine_internal" [(set (match_operand: 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn") (vec_concat: - (match_operand:VDC 1 "register_operand" "0, 0, 0, ?w, ?r") - (match_operand:VDC 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))] + (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r") + (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))] "TARGET_SIMD && !BYTES_BIG_ENDIAN && (register_operand (operands[0], mode) || register_operand (operands[2], mode))" "@ - ins\t%0.d[1], %2.d[0] - ins\t%0.d[1], %2 - ld1\t{%0.d}[1], %2 - stp\t%d1, %d2, %y0 - stp\t%x1, %x2, %y0" - [(set_attr "type" "neon_ins_q, neon_from_gp_q, neon_load1_one_lane_q, neon_stp, store_16")] + ins\t%0.[1], %2.[0] + ins\t%0.[1], %2 + ld1\t{%0.}[1], %2 + stp\t%1, %2, %y0 + stp\t%1, %2, %y0" + [(set_attr "type" "neon_ins, neon_from_gp, neon_load1_one_lane, neon_stp, store_16")] ) (define_insn "*aarch64_combine_internal_be" [(set (match_operand: 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn") (vec_concat: - (match_operand:VDC 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r") - (match_operand:VDC 1 "register_operand" "0, 0, 0, ?w, ?r")))] + (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r") + (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")))] "TARGET_SIMD && BYTES_BIG_ENDIAN && (register_operand (operands[0], mode) || register_operand (operands[2], mode))" "@ - ins\t%0.d[1], %2.d[0] - ins\t%0.d[1], %2 - ld1\t{%0.d}[1], %2 - stp\t%d2, %d1, %y0 - stp\t%x2, %x1, %y0" - [(set_attr "type" "neon_ins_q, neon_from_gp_q, neon_load1_one_lane_q, neon_stp, store_16")] + ins\t%0.[1], %2.[0] + ins\t%0.[1], %2 + ld1\t{%0.}[1], %2 + stp\t%2, %1, %y0 + stp\t%2, %1, %y0" + [(set_attr "type" "neon_ins, neon_from_gp, neon_load1_one_lane, neon_stp, store_16")] ) ;; In this insn, operand 1 should be low, and operand 2 the high part of the @@ -4331,13 +4331,13 @@ (define_insn "*aarch64_combinez" [(set (match_operand: 0 "register_operand" "=w,w,w") (vec_concat: - (match_operand:VDC 1 "nonimmediate_operand" "w,?r,m") - (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))] + (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m") + (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ - mov\\t%0.8b, %1.8b - fmov\t%d0, %1 - ldr\\t%d0, %1" + fmov\\t%0, %1 + fmov\t%0, %1 + ldr\\t%0, %1" [(set_attr "type" "neon_move, neon_from_gp, neon_load1_1reg") (set_attr "arch" "simd,fp,simd")] ) @@ -4345,13 +4345,13 @@ (define_insn "*aarch64_combinez_be" [(set (match_operand: 0 "register_operand" "=w,w,w") (vec_concat: - (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero") - (match_operand:VDC 1 "nonimmediate_operand" "w,?r,m")))] + (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero") + (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "@ - mov\\t%0.8b, %1.8b - fmov\t%d0, %1 - ldr\\t%d0, %1" + fmov\\t%0, %1 + fmov\t%0, %1 + ldr\\t%0, %1" [(set_attr "type" "neon_move, neon_from_gp, neon_load1_1reg") (set_attr "arch" "simd,fp,simd")] ) @@ -4362,8 +4362,8 @@ (define_expand "@aarch64_vec_concat" [(set (match_operand: 0 "register_operand") (vec_concat: - (match_operand:VDC 1 "general_operand") - (match_operand:VDC 2 "general_operand")))] + (match_operand:VDCSIF 1 "general_operand") + (match_operand:VDCSIF 2 "general_operand")))] "TARGET_SIMD" { int lo = BYTES_BIG_ENDIAN ? 2 : 1; diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index af42d1bedfe..7bb97bd48e4 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -9922,9 +9922,15 @@ aarch64_classify_address (struct aarch64_address_info *info, /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode corresponds to the actual size of the memory being loaded/stored and the mode of the corresponding addressing mode is half of that. */ - if (type == ADDR_QUERY_LDP_STP_N - && known_eq (GET_MODE_SIZE (mode), 16)) - mode = DFmode; + if (type == ADDR_QUERY_LDP_STP_N) + { + if (known_eq (GET_MODE_SIZE (mode), 16)) + mode = DFmode; + else if (known_eq (GET_MODE_SIZE (mode), 8)) + mode = SFmode; + else + return false; + } bool allow_reg_index_p = (!load_store_pair_p && ((vec_flags == 0 @@ -11404,7 +11410,9 @@ aarch64_print_operand (FILE *f, rtx x, int code) machine_mode mode = GET_MODE (x); if (!MEM_P (x) - || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16))) + || (code == 'y' + && maybe_ne (GET_MODE_SIZE (mode), 8) + && maybe_ne (GET_MODE_SIZE (mode), 16))) { output_operand_lossage ("invalid operand for '%%%c'", code); return; diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index a0c02e4ac15..88067a3536a 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -236,6 +236,9 @@ ;; Double vector modes for combines. (define_mode_iterator VDC [V8QI V4HI V4BF V4HF V2SI V2SF DI DF]) +;; VDC plus SI and SF. +(define_mode_iterator VDCSIF [V8QI V4HI V4BF V4HF V2SI V2SF SI SF DI DF]) + ;; Polynomial modes for vector combines. (define_mode_iterator VDC_P [V8QI V4HI DI]) @@ -1436,8 +1439,8 @@ (define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI") (V4HF "V8HF") (V4BF "V8BF") (V2SI "V4SI") (V2SF "V4SF") - (SI "V2SI") (DI "V2DI") - (DF "V2DF")]) + (SI "V2SI") (SF "V2SF") + (DI "V2DI") (DF "V2DF")]) ;; Register suffix for double-length mode. (define_mode_attr Vdtype [(V4HF "8h") (V2SF "4s")]) @@ -1557,6 +1560,30 @@ (V4SI "2s") (V8HF "4h") (V4SF "2s")]) +;; Whether a mode fits in W or X registers (i.e. "w" for 32-bit modes +;; and "x" for 64-bit modes). +(define_mode_attr single_wx [(SI "w") (SF "w") + (V8QI "x") (V4HI "x") + (V4HF "x") (V4BF "x") + (V2SI "x") (V2SF "x") + (DI "x") (DF "x")]) + +;; Whether a mode fits in S or D registers (i.e. "s" for 32-bit modes +;; and "d" for 64-bit modes). +(define_mode_attr single_type [(SI "s") (SF "s") + (V8QI "d") (V4HI "d") + (V4HF "d") (V4BF "d") + (V2SI "d") (V2SF "d") + (DI "d") (DF "d")]) + +;; Whether a double-width mode fits in D or Q registers (i.e. "d" for +;; 32-bit modes and "q" for 64-bit modes). +(define_mode_attr single_dtype [(SI "d") (SF "d") + (V8QI "q") (V4HI "q") + (V4HF "q") (V4BF "q") + (V2SI "q") (V2SF "q") + (DI "q") (DF "q")]) + ;; Define corresponding core/FP element mode for each vector mode. (define_mode_attr vw [(V8QI "w") (V16QI "w") (V4HI "w") (V8HI "w") @@ -1849,6 +1876,13 @@ (V4x1DF "") (V4x2DF "_q") (V4x4BF "") (V4x8BF "_q")]) +;; Equivalent of the "q" attribute for the mode. +(define_mode_attr dblq [(SI "") (SF "") + (V8QI "_q") (V4HI "_q") + (V4HF "_q") (V4BF "_q") + (V2SI "_q") (V2SF "_q") + (DI "_q") (DF "_q")]) + (define_mode_attr vp [(V8QI "v") (V16QI "v") (V4HI "v") (V8HI "v") (V2SI "p") (V4SI "v") diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-13.c b/gcc/testsuite/gcc.target/aarch64/vec-init-13.c new file mode 100644 index 00000000000..d0f88cbe71a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-13.c @@ -0,0 +1,123 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include + +/* +** s64q_1: +** fmov d0, x0 +** ret +*/ +int64x2_t s64q_1(int64_t a0) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (int64x2_t) { 0, a0 }; + else + return (int64x2_t) { a0, 0 }; +} +/* +** s64q_2: +** ldr d0, \[x0\] +** ret +*/ +int64x2_t s64q_2(int64_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (int64x2_t) { 0, ptr[0] }; + else + return (int64x2_t) { ptr[0], 0 }; +} +/* +** s64q_3: +** ldr d0, \[x0, #?8\] +** ret +*/ +int64x2_t s64q_3(int64_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (int64x2_t) { 0, ptr[1] }; + else + return (int64x2_t) { ptr[1], 0 }; +} + +/* +** f64q_1: +** fmov d0, d0 +** ret +*/ +float64x2_t f64q_1(float64_t a0) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (float64x2_t) { 0, a0 }; + else + return (float64x2_t) { a0, 0 }; +} +/* +** f64q_2: +** ldr d0, \[x0\] +** ret +*/ +float64x2_t f64q_2(float64_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (float64x2_t) { 0, ptr[0] }; + else + return (float64x2_t) { ptr[0], 0 }; +} +/* +** f64q_3: +** ldr d0, \[x0, #?8\] +** ret +*/ +float64x2_t f64q_3(float64_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (float64x2_t) { 0, ptr[1] }; + else + return (float64x2_t) { ptr[1], 0 }; +} + +/* +** s32q_1: +** fmov d0, d0 +** ret +*/ +int32x4_t s32q_1(int32x2_t a0, int32x2_t a1) { + return vcombine_s32 (a0, (int32x2_t) { 0, 0 }); +} +/* +** s32q_2: +** ldr d0, \[x0\] +** ret +*/ +int32x4_t s32q_2(int32x2_t *ptr) { + return vcombine_s32 (ptr[0], (int32x2_t) { 0, 0 }); +} +/* +** s32q_3: +** ldr d0, \[x0, #?8\] +** ret +*/ +int32x4_t s32q_3(int32x2_t *ptr) { + return vcombine_s32 (ptr[1], (int32x2_t) { 0, 0 }); +} + +/* +** f32q_1: +** fmov d0, d0 +** ret +*/ +float32x4_t f32q_1(float32x2_t a0, float32x2_t a1) { + return vcombine_f32 (a0, (float32x2_t) { 0, 0 }); +} +/* +** f32q_2: +** ldr d0, \[x0\] +** ret +*/ +float32x4_t f32q_2(float32x2_t *ptr) { + return vcombine_f32 (ptr[0], (float32x2_t) { 0, 0 }); +} +/* +** f32q_3: +** ldr d0, \[x0, #?8\] +** ret +*/ +float32x4_t f32q_3(float32x2_t *ptr) { + return vcombine_f32 (ptr[1], (float32x2_t) { 0, 0 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-14.c b/gcc/testsuite/gcc.target/aarch64/vec-init-14.c new file mode 100644 index 00000000000..02875088cd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-14.c @@ -0,0 +1,123 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include + +void ext(); + +/* +** s32_1: +** fmov s0, w0 +** ins v0\.s\[1\], w1 +** ret +*/ +int32x2_t s32_1(int32_t a0, int32_t a1) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (int32x2_t) { a1, a0 }; + else + return (int32x2_t) { a0, a1 }; +} +/* +** s32_2: +** fmov s0, w0 +** ld1 {v0\.s}\[1\], \[x1\] +** ret +*/ +int32x2_t s32_2(int32_t a0, int32_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (int32x2_t) { ptr[0], a0 }; + else + return (int32x2_t) { a0, ptr[0] }; +} +/* +** s32_3: +** ldr s0, \[x0\] +** ins v0\.s\[1\], w1 +** ret +*/ +int32x2_t s32_3(int32_t *ptr, int32_t a1) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (int32x2_t) { a1, ptr[0] }; + else + return (int32x2_t) { ptr[0], a1 }; +} +/* +** s32_4: +** stp w1, w2, \[x0\] +** ret +*/ +void s32_4(int32x2_t *res, int32_t a0, int32_t a1) { + res[0] = (int32x2_t) { a0, a1 }; +} +/* +** s32_5: +** stp w1, w2, \[x0, #?4\] +** ret +*/ +void s32_5(uintptr_t res, int32_t a0, int32_t a1) { + *(int32x2_t *)(res + 4) = (int32x2_t) { a0, a1 }; +} +/* Currently uses d8 to hold res across the call. */ +int32x2_t s32_6(int32_t a0, int32_t a1) { + int32x2_t res = { a0, a1 }; + ext (); + return res; +} + +/* +** f32_1: +** ins v0\.s\[1\], v1\.s\[0\] +** ret +*/ +float32x2_t f32_1(float32_t a0, float32_t a1) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (float32x2_t) { a1, a0 }; + else + return (float32x2_t) { a0, a1 }; +} +/* +** f32_2: +** ld1 {v0\.s}\[1\], \[x0\] +** ret +*/ +float32x2_t f32_2(float32_t a0, float32_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (float32x2_t) { ptr[0], a0 }; + else + return (float32x2_t) { a0, ptr[0] }; +} +/* +** f32_3: +** ldr s0, \[x0\] +** ins v0\.s\[1\], v1\.s\[0\] +** ret +*/ +float32x2_t f32_3(float32_t a0, float32_t a1, float32_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (float32x2_t) { a1, ptr[0] }; + else + return (float32x2_t) { ptr[0], a1 }; +} +/* +** f32_4: +** stp s0, s1, \[x0\] +** ret +*/ +void f32_4(float32x2_t *res, float32_t a0, float32_t a1) { + res[0] = (float32x2_t) { a0, a1 }; +} +/* +** f32_5: +** stp s0, s1, \[x0, #?4\] +** ret +*/ +void f32_5(uintptr_t res, float32_t a0, float32_t a1) { + *(float32x2_t *)(res + 4) = (float32x2_t) { a0, a1 }; +} +/* Currently uses d8 to hold res across the call. */ +float32x2_t f32_6(float32_t a0, float32_t a1) { + float32x2_t res = { a0, a1 }; + ext (); + return res; +} diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-15.c b/gcc/testsuite/gcc.target/aarch64/vec-init-15.c new file mode 100644 index 00000000000..82f0a8f55ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-15.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +int32x2_t f1(int32_t *x, int c) { + return c ? (int32x2_t) { x[0], x[2] } : (int32x2_t) { 0, 0 }; +} + +int32x2_t f2(int32_t *x, int i0, int i1, int c) { + return c ? (int32x2_t) { x[i0], x[i1] } : (int32x2_t) { 0, 0 }; +} + +/* { dg-final { scan-assembler-times {\t(?:ldr\ts[0-9]+|ld1\t)} 4 } } */ +/* { dg-final { scan-assembler-not {\tldr\tw} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-16.c b/gcc/testsuite/gcc.target/aarch64/vec-init-16.c new file mode 100644 index 00000000000..e00aec7a32c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-16.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +void f1(int32x2_t *res, int32_t *x, int c0, int c1) { + res[0] = (int32x2_t) { c0 ? x[0] : 0, c1 ? x[2] : 0 }; +} + +/* { dg-final { scan-assembler-times {\tldr\tw[0-9]+} 2 } } */ +/* { dg-final { scan-assembler {\tstp\tw[0-9]+, w[0-9]+} } } */ +/* { dg-final { scan-assembler-not {\tldr\ts} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-17.c b/gcc/testsuite/gcc.target/aarch64/vec-init-17.c new file mode 100644 index 00000000000..86191b3ca1d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-17.c @@ -0,0 +1,73 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include + +/* +** s32_1: +** fmov s0, w0 +** ret +*/ +int32x2_t s32_1(int32_t a0) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (int32x2_t) { 0, a0 }; + else + return (int32x2_t) { a0, 0 }; +} +/* +** s32_2: +** ldr s0, \[x0\] +** ret +*/ +int32x2_t s32_2(int32_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (int32x2_t) { 0, ptr[0] }; + else + return (int32x2_t) { ptr[0], 0 }; +} +/* +** s32_3: +** ldr s0, \[x0, #?4\] +** ret +*/ +int32x2_t s32_3(int32_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (int32x2_t) { 0, ptr[1] }; + else + return (int32x2_t) { ptr[1], 0 }; +} + +/* +** f32_1: +** fmov s0, s0 +** ret +*/ +float32x2_t f32_1(float32_t a0) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (float32x2_t) { 0, a0 }; + else + return (float32x2_t) { a0, 0 }; +} +/* +** f32_2: +** ldr s0, \[x0\] +** ret +*/ +float32x2_t f32_2(float32_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (float32x2_t) { 0, ptr[0] }; + else + return (float32x2_t) { ptr[0], 0 }; +} +/* +** f32_3: +** ldr s0, \[x0, #?4\] +** ret +*/ +float32x2_t f32_3(float32_t *ptr) { + if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return (float32x2_t) { 0, ptr[1] }; + else + return (float32x2_t) { ptr[1], 0 }; +}