From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2109) id B2CAC3858CDA; Tue, 10 Jan 2023 13:38:19 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B2CAC3858CDA DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1673357899; bh=shF/XgfGzWnXjNySBV50rnVc4ToVGv79eslxiNjy/rM=; h=From:To:Subject:Date:From; b=OyR3fByRCjUFHJOHTjuHyFpewS710Nzm+B5yPTQkLv0tC3kawhtJmt4jZm9cCLOfX 5qXiM4UCzYXVSBOHINRl5pb+1OrvtPAP9DHeOatMJC8tdDIOgVe8kI6q8RbpXcPWw1 6A7/ysLlaZV82OilarJACOOL6kDz54/r93yLB1PQ= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Stam Markianos-Wright To: gcc-cvs@gcc.gnu.org Subject: [gcc r11-10461] Fix memory constraint on MVE v[ld/st][2/4] instructions [PR107714] X-Act-Checkin: gcc X-Git-Author: Stam Markianos-Wright X-Git-Refname: refs/heads/releases/gcc-11 X-Git-Oldrev: cedc5812df74843dcfbe9a2599e5fc00e6fee0fb X-Git-Newrev: 08842ad274f5e2630994f7c6e70b2d31768107ea Message-Id: <20230110133819.B2CAC3858CDA@sourceware.org> Date: Tue, 10 Jan 2023 13:38:19 +0000 (GMT) List-Id: https://gcc.gnu.org/g:08842ad274f5e2630994f7c6e70b2d31768107ea commit r11-10461-g08842ad274f5e2630994f7c6e70b2d31768107ea Author: Stam Markianos-Wright Date: Fri Dec 30 11:25:22 2022 +0000 Fix memory constraint on MVE v[ld/st][2/4] instructions [PR107714] In the M-Class Arm-ARM: https://developer.arm.com/documentation/ddi0553/bu/?lang=en these MVE instructions only have '!' writeback variant and at: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107714 we found that the Um constraint would also allow through a register offset writeback, resulting in an assembler error. Here I have added a new constraint and predicate for these instructions, which (uniquely, AFAICT), only support a `!` writeback increment by the data size (inside the compiler this is a POST_INC). No regressions in arm-none-eabi with MVE and MVE.FP. gcc/ChangeLog: PR target/107714 * config/arm/arm-protos.h (mve_struct_mem_operand): New protoype. * config/arm/arm.c (mve_struct_mem_operand): New function. * config/arm/constraints.md (Ug): New constraint. * config/arm/mve.md (mve_vst4q): Change constraint. (mve_vst2q): Likewise. (mve_vld4q): Likewise. (mve_vld2q): Likewise. * config/arm/predicates.md (mve_struct_operand): New predicate. gcc/testsuite/ChangeLog: PR target/107714 * gcc.target/arm/mve/intrinsics/vldst24q_reg_offset.c: New test. (cherry picked from commit 4269a6567eb991e6838f40bda5be9e3a7972530c) Diff: --- gcc/config/arm/arm-protos.h | 1 + gcc/config/arm/arm.c | 18 ++ gcc/config/arm/constraints.md | 5 + gcc/config/arm/mve.md | 8 +- gcc/config/arm/predicates.md | 4 + .../arm/mve/intrinsics/vldst24q_reg_offset.c | 300 +++++++++++++++++++++ 6 files changed, 332 insertions(+), 4 deletions(-) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 08d152e67ac..d5dc40a7a90 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -122,6 +122,7 @@ extern int arm_coproc_mem_operand_wb (rtx, int); extern int neon_vector_mem_operand (rtx, int, bool); extern int mve_vector_mem_operand (machine_mode, rtx, bool); extern int neon_struct_mem_operand (rtx); +extern int mve_struct_mem_operand (rtx); extern rtx *neon_vcmla_lane_prepare_operands (rtx *); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 730b1fe0071..96d62b2164e 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -13532,6 +13532,24 @@ neon_vector_mem_operand (rtx op, int type, bool strict) return FALSE; } +/* Return TRUE if OP is a mem suitable for loading/storing an MVE struct + type. */ +int +mve_struct_mem_operand (rtx op) +{ + rtx ind = XEXP (op, 0); + + /* Match: (mem (reg)). */ + if (REG_P (ind)) + return arm_address_register_rtx_p (ind, 0); + + /* Allow only post-increment by the mode size. */ + if (GET_CODE (ind) == POST_INC) + return arm_address_register_rtx_p (XEXP (ind, 0), 0); + + return FALSE; +} + /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct type. */ int diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index a5a19a7ed5b..c7b65701030 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -460,6 +460,11 @@ (and (match_code "mem") (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, FALSE)"))) +(define_memory_constraint "Ug" + "@internal + In Thumb-2 state a valid MVE struct load/store address." + (match_operand 0 "mve_struct_operand")) + (define_memory_constraint "Uj" "@internal In ARM/Thumb-2 state a VFP load/store address that supports writeback diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index c9313744a16..1935813e474 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -92,7 +92,7 @@ ;; [vst4q]) ;; (define_insn "mve_vst4q" - [(set (match_operand:XI 0 "neon_struct_operand" "=Um") + [(set (match_operand:XI 0 "mve_struct_operand" "=Ug") (unspec:XI [(match_operand:XI 1 "s_register_operand" "w") (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] VST4Q)) @@ -10312,7 +10312,7 @@ ;; [vst2q]) ;; (define_insn "mve_vst2q" - [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + [(set (match_operand:OI 0 "mve_struct_operand" "=Ug") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] VST2Q)) @@ -10341,7 +10341,7 @@ ;; (define_insn "mve_vld2q" [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:OI [(match_operand:OI 1 "mve_struct_operand" "Ug") (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] VLD2Q)) ] @@ -10369,7 +10369,7 @@ ;; (define_insn "mve_vld4q" [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(match_operand:XI 1 "neon_struct_operand" "Um") + (unspec:XI [(match_operand:XI 1 "mve_struct_operand" "Ug") (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] VLD4Q)) ] diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index c661f015fc5..4cc46142671 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -872,6 +872,10 @@ (and (match_code "mem") (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)"))) +(define_predicate "mve_struct_operand" + (and (match_code "mem") + (match_test "TARGET_HAVE_MVE && mve_struct_mem_operand (op)"))) + (define_predicate "neon_permissive_struct_operand" (and (match_code "mem") (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, false)"))) diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldst24q_reg_offset.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldst24q_reg_offset.c new file mode 100644 index 00000000000..d028b91e81a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldst24q_reg_offset.c @@ -0,0 +1,300 @@ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O1" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_mve.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* +**test: +** ... +** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +*/ +void +test(const uint8_t * in, uint8_t * out, int width) +{ + uint8x16x2_t rg = vld2q(in); + uint8x16x2_t gb = vld2q(in + width); + vst2q (out, rg); + vst2q (out + width, gb); +} + +/* +**test2: +** ... +** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]! +** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]! +** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +*/ +void +test2(const uint8_t * in, uint8_t * out) +{ + uint8x16x2_t rg = vld2q(in); + uint8x16x2_t gb = vld2q(in + 32); + vst2q (out, rg); + vst2q (out + 32, gb); +} + +/* +**test3: +** ... +** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +*/ +void +test3(const uint8_t * in, uint8_t * out) +{ + uint8x16x2_t rg = vld2q(in); + uint8x16x2_t gb = vld2q(in - 32); + vst2q (out, rg); + vst2q (out - 32, gb); +} + +/* +**test4: +** ... +** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +*/ +void +test4(const uint8_t * in, uint8_t * out) +{ + uint8x16x2_t rg = vld2q(in); + uint8x16x2_t gb = vld2q(in + 64); + vst2q (out, rg); + vst2q (out + 64, gb); +} + +/* +**test5: +** ... +** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vld20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst20.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst21.8 {q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +*/ +void +test5(const uint8_t * in, uint8_t * out) +{ + uint8x16x2_t rg = vld2q(in); + uint8x16x2_t gb = vld2q(in + 42); + vst2q (out, rg); + vst2q (out + 42, gb); +} + +/* +**test6: +** ... +** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +*/ +void +test6(const uint8_t * in, uint8_t * out, int width) +{ + uint8x16x4_t rg = vld4q(in); + uint8x16x4_t gb = vld4q(in + width); + vst4q (out, rg); + vst4q (out + width, gb); +} + +/* +**test7: +** ... +** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +*/ +void +test7(const uint8_t * in, uint8_t * out) +{ + uint8x16x4_t rg = vld4q(in); + uint8x16x4_t gb = vld4q(in + 32); + vst4q (out, rg); + vst4q (out + 32, gb); +} + +/* +**test8: +** ... +** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]! +** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\]! +** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +*/ +void +test8(const uint8_t * in, uint8_t * out) +{ + uint8x16x4_t rg = vld4q(in); + uint8x16x4_t gb = vld4q(in + 64); + vst4q (out, rg); + vst4q (out + 64, gb); +} + +/* +**test9: +** ... +** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +*/ +void +test9(const uint8_t * in, uint8_t * out) +{ + uint8x16x4_t rg = vld4q(in); + uint8x16x4_t gb = vld4q(in - 64); + vst4q (out, rg); + vst4q (out - 64, gb); +} + +/* +**test10: +** ... +** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vld40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vld43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +** vst40.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst41.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst42.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** vst43.8 {q[0-9]+, q[0-9]+, q[0-9]+, q[0-9]+}, \[(?:ip|fp|r[0-9]+)\] +** ... +*/ +void +test10(const uint8_t * in, uint8_t * out) +{ + uint8x16x4_t rg = vld4q(in); + uint8x16x4_t gb = vld4q(in + 42); + vst4q (out, rg); + vst4q (out + 42, gb); +} + +#ifdef __cplusplus +} +#endif + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file