From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpbgbr2.qq.com (smtpbgbr2.qq.com [54.207.22.56]) by sourceware.org (Postfix) with ESMTPS id 2578F3858D38 for ; Wed, 22 Mar 2023 02:50:06 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 2578F3858D38 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=rivai.ai Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=rivai.ai X-QQ-mid: bizesmtp62t1679453399tb695433 Received: from server1.localdomain ( [58.60.1.22]) by bizesmtp.qq.com (ESMTP) with id ; Wed, 22 Mar 2023 10:49:58 +0800 (CST) X-QQ-SSF: 01400000000000E0N000000A0000000 X-QQ-FEAT: wF64VgvUy+WQuAZlQyWLG0o2SCsCnYu18scmE2rQDon80ewlYmujBAwZSIngX 5W+8sj1lLG0iKIgcZ/EqMFJ3qPZ3bTQFqkOYHufgFrHlpY9eJg68dOZqSYMyu6Lxb5OJSwA 7z544JtXsJmUPtd7O5/vUMPfOHCyOLviFLLWSEO/zP0zwAa4FQWXNAXmXGAJf+WPzCD38dU CZE392HJ18zZ6vbN4qGbXasW3tfVJVvmo9D5GOBx66fZP1nE2k5gszrjfhWbzOGCG9eSk15 HmEEX4qxYBjZdQCRG7ezjvWOF5LqIQqV6Krhk1aU72ONJiMtxAb3xPg+OFq3d06zpUbaNma RxvtEo/Mlo2r9CeCzY4Oj0RiM+hrdp1AAEB3Ku+Lb2OzmPDK4RanCumuKLVmwE6QrFnqra0 X-QQ-GoodBg: 2 From: juzhe.zhong@rivai.ai To: gcc-patches@gcc.gnu.org Cc: kito.cheng@gmail.com, Ju-Zhe Zhong Subject: [PATCH] RISC-V: Fix ICE in LRA for LMUL < 1 vector spillings Date: Wed, 22 Mar 2023 10:49:56 +0800 Message-Id: <20230322024956.74271-1-juzhe.zhong@rivai.ai> X-Mailer: git-send-email 2.36.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-QQ-SENDSIZE: 520 Feedback-ID: bizesmtp:rivai.ai:qybglogicsvr:qybglogicsvr7 X-Spam-Status: No, score=-10.6 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_ASCII_DIVIDERS,KAM_DMARC_STATUS,KAM_SHORT,RCVD_IN_BARRACUDACENTRAL,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H2,SPF_HELO_PASS,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: From: Ju-Zhe Zhong gcc/ChangeLog: * config/riscv/riscv-protos.h (emit_vlmax_vsetvl): Define as global. (emit_vlmax_op): Ditto. * config/riscv/riscv-v.cc (get_sew): New function. (emit_vlmax_vsetvl): Adapt function. (emit_pred_op): Ditto. (emit_vlmax_op): Ditto. (emit_nonvlmax_op): Ditto. (legitimize_move): Fix LRA ICE. (gen_no_side_effects_vsetvl_rtx): Adapt function. * config/riscv/vector.md (@mov_lra): New pattern. (@mov_lra): Ditto. (*mov_lra): Ditto. (*mov_lra): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/binop_vv_constraint-4.c: Adapt testcase. * gcc.target/riscv/rvv/base/binop_vv_constraint-6.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-127.c: Ditto. * gcc.target/riscv/rvv/base/spill-1.c: Ditto. * gcc.target/riscv/rvv/base/spill-2.c: Ditto. * gcc.target/riscv/rvv/base/spill-3.c: Ditto. * gcc.target/riscv/rvv/base/spill-5.c: Ditto. * gcc.target/riscv/rvv/base/spill-7.c: Ditto. * g++.target/riscv/rvv/base/bug-18.C: New test. * gcc.target/riscv/rvv/base/merge_constraint-3.c: New test. * gcc.target/riscv/rvv/base/merge_constraint-4.c: New test. --- gcc/config/riscv/riscv-protos.h | 2 + gcc/config/riscv/riscv-v.cc | 67 +++++-- gcc/config/riscv/vector.md | 56 ++++++ .../g++.target/riscv/rvv/base/bug-18.C | 140 +++++++++++++++ .../riscv/rvv/base/binop_vv_constraint-4.c | 1 + .../riscv/rvv/base/binop_vv_constraint-6.c | 1 + .../riscv/rvv/base/binop_vx_constraint-127.c | 2 +- .../riscv/rvv/base/merge_constraint-3.c | 95 ++++++++++ .../riscv/rvv/base/merge_constraint-4.c | 28 +++ .../gcc.target/riscv/rvv/base/spill-1.c | 168 +++++++++--------- .../gcc.target/riscv/rvv/base/spill-2.c | 112 ++++++------ .../gcc.target/riscv/rvv/base/spill-3.c | 56 +++--- .../gcc.target/riscv/rvv/base/spill-5.c | 26 +-- .../gcc.target/riscv/rvv/base/spill-7.c | 161 +++++++++-------- 14 files changed, 636 insertions(+), 279 deletions(-) create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index f35aaf35b48..060dddbdc22 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -157,7 +157,9 @@ bool check_builtin_call (location_t, vec, unsigned int, tree, unsigned int, tree *); bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); bool legitimize_move (rtx, rtx, machine_mode); +void emit_vlmax_vsetvl (machine_mode, rtx); void emit_vlmax_op (unsigned, rtx, rtx, machine_mode); +void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode); void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode); enum vlmul_type get_vlmul (machine_mode); unsigned int get_ratio (machine_mode); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 9b83ef6ea5e..d7b77fd6123 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -98,6 +98,15 @@ private: expand_operand m_ops[MAX_OPERANDS]; }; +static unsigned +get_sew (machine_mode mode) +{ + unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL + ? 8 + : GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + return sew; +} + /* Return true if X is a const_vector with all duplicate elements, which is in the range between MINVAL and MAXVAL. */ bool @@ -109,13 +118,10 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval, && IN_RANGE (INTVAL (elt), minval, maxval)); } -static rtx -emit_vlmax_vsetvl (machine_mode vmode) +void +emit_vlmax_vsetvl (machine_mode vmode, rtx vl) { - rtx vl = gen_reg_rtx (Pmode); - unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL - ? 8 - : GET_MODE_BITSIZE (GET_MODE_INNER (vmode)); + unsigned int sew = get_sew (vmode); enum vlmul_type vlmul = get_vlmul (vmode); unsigned int ratio = calculate_ratio (sew, vlmul); @@ -125,8 +131,6 @@ emit_vlmax_vsetvl (machine_mode vmode) const0_rtx)); else emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode))); - - return vl; } /* Calculate SEW/LMUL ratio. */ @@ -166,7 +170,7 @@ calculate_ratio (unsigned int sew, enum vlmul_type vlmul) /* Emit an RVV unmask && vl mov from SRC to DEST. */ static void emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len, - machine_mode mask_mode) + machine_mode mask_mode, bool vlmax_p) { insn_expander<8> e; machine_mode mode = GET_MODE (dest); @@ -186,17 +190,18 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len, e.add_input_operand (len, Pmode); else { - rtx vlmax = emit_vlmax_vsetvl (mode); + rtx vlmax = gen_reg_rtx (Pmode); + emit_vlmax_vsetvl (mode, vlmax); e.add_input_operand (vlmax, Pmode); } if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL) e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ()); - if (len) - e.add_avl_type_operand (avl_type::NONVLMAX); - else + if (vlmax_p) e.add_avl_type_operand (avl_type::VLMAX); + else + e.add_avl_type_operand (avl_type::NONVLMAX); e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src)); } @@ -204,14 +209,21 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len, void emit_vlmax_op (unsigned icode, rtx dest, rtx src, machine_mode mask_mode) { - emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode); + emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode, true); +} + +void +emit_vlmax_op (unsigned icode, rtx dest, rtx src, rtx len, + machine_mode mask_mode) +{ + emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, true); } void emit_nonvlmax_op (unsigned icode, rtx dest, rtx src, rtx len, machine_mode mask_mode) { - emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode); + emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, false); } static void @@ -265,6 +277,20 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode) expand_const_vector (dest, src, mask_mode); return true; } + + /* In order to decrease the memory traffic, we don't use whole register + * load/store for the LMUL less than 1 and mask mode, so those case will + * require one extra general purpose register, but it's not allowed during LRA + * process, so we have a special move pattern used for LRA, which will defer + * the expansion after LRA. */ + if ((known_lt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) + || GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + && lra_in_progress) + { + emit_insn (gen_mov_lra (mode, Pmode, dest, src)); + return true; + } + if (known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) && GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL) { @@ -274,6 +300,13 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode) return false; } + + if (register_operand (src, mode) && register_operand (dest, mode)) + { + emit_insn (gen_rtx_SET (dest, src)); + return true; + } + if (!register_operand (src, mode) && !register_operand (dest, mode)) { rtx tmp = gen_reg_rtx (mode); @@ -540,9 +573,7 @@ force_vector_length_operand (rtx vl) static rtx gen_no_side_effects_vsetvl_rtx (machine_mode vmode, rtx vl, rtx avl) { - unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL - ? 8 - : GET_MODE_BITSIZE (GET_MODE_INNER (vmode)); + unsigned int sew = get_sew (vmode); return gen_vsetvl_no_side_effects (Pmode, vl, avl, gen_int_mode (sew, Pmode), gen_int_mode (get_vlmul (vmode), Pmode), const0_rtx, const0_rtx); diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 96dc1af5a3d..ebb014aecb1 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -635,6 +635,62 @@ [(set_attr "type" "vmov") (set_attr "mode" "")]) +(define_expand "@mov_lra" + [(parallel + [(set (match_operand:V_FRACT 0 "reg_or_mem_operand") + (match_operand:V_FRACT 1 "reg_or_mem_operand")) + (clobber (match_scratch:P 2))])] + "TARGET_VECTOR && (lra_in_progress || reload_completed)" +{}) + +(define_expand "@mov_lra" + [(parallel + [(set (match_operand:VB 0 "reg_or_mem_operand") + (match_operand:VB 1 "reg_or_mem_operand")) + (clobber (match_scratch:P 2))])] + "TARGET_VECTOR && (lra_in_progress || reload_completed)" +{}) + +(define_insn_and_split "*mov_lra" + [(set (match_operand:V_FRACT 0 "reg_or_mem_operand" "=vr, m,vr") + (match_operand:V_FRACT 1 "reg_or_mem_operand" " m,vr,vr")) + (clobber (match_scratch:P 2 "=&r,&r,X"))] + "TARGET_VECTOR && (lra_in_progress || reload_completed)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + if (REG_P (operands[0]) && REG_P (operands[1])) + emit_insn (gen_rtx_SET (operands[0], operands[1])); + else + { + riscv_vector::emit_vlmax_vsetvl (mode, operands[2]); + riscv_vector::emit_vlmax_op (code_for_pred_mov (mode), + operands[0], operands[1], operands[2], mode); + } + DONE; +}) + +(define_insn_and_split "*mov_lra" + [(set (match_operand:VB 0 "reg_or_mem_operand" "=vr, m,vr") + (match_operand:VB 1 "reg_or_mem_operand" " m,vr,vr")) + (clobber (match_scratch:P 2 "=&r,&r,X"))] + "TARGET_VECTOR && (lra_in_progress || reload_completed)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + if (REG_P (operands[0]) && REG_P (operands[1])) + emit_insn (gen_rtx_SET (operands[0], operands[1])); + else + { + riscv_vector::emit_vlmax_vsetvl (mode, operands[2]); + riscv_vector::emit_vlmax_op (code_for_pred_mov (mode), + operands[0], operands[1], operands[2], mode); + } + DONE; +}) + ;; ----------------------------------------------------------------- ;; ---- Duplicate Operations ;; ----------------------------------------------------------------- diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C b/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C new file mode 100644 index 00000000000..d6088338dbc --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C @@ -0,0 +1,140 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +#include +#include "riscv_vector.h" +using std::cerr; +using std::endl; +template < class , class b > int c(b val) { + return val; +} +auto &f32(c< float, uint32_t >); +template < class d > +bool check(d , d , size_t ); +int main() { + size_t e ; + int16_t f[] {}; + size_t g ; + int32_t i[] {4784}; + size_t aa = 4; + int16_t ab[] {2313}; + int16_t j[] {7114 }; + int16_t k[] {7696 }; + uint32_t l[] {9951 }; + int32_t m[] {2659 }; + uint16_t n[] {7537 }; + int32_t o[] {05733} + ; + uint32_t p[] {7010090 }; + uint32_t q[] {21060 }; + uint32_t r[] {2273 }; + uint32_t s[] {4094366 }; + int16_t ac[] {11880 }; + int16_t t[] {10988}; + int16_t ad[] {30376}; + int8_t u[] {}; + int8_t ae[] {7}; + int8_t v[] {40}; + int8_t af[] {6}; + int16_t w[] {4077 }; + int16_t x[] {7932 }; + int8_t y[] {3}; + int8_t z[] {4}; + uint16_t ag[] {2831}; + int16_t ah[] {10412 }; + int16_t ai[] {6823}; + int32_t aj[] {8572 }; + int32_t ak[] {9999 }; + uint32_t al[] {50166962 }; + uint32_t am[] {9781 }; + int8_t an[] {9, 35}; + float ao[] {222.65, 22.79}; + float ap[] {126.10, 13.92}; + int64_t aq[] {508727, 5556}; + int16_t ar[] {2861 }; + int16_t as[] {21420}; + int16_t at[] {4706 }; + uint32_t au ; + uint32_t av = 600295662; + size_t aw ; + int16_t ax = 13015; + uint32_t ay ; + uint16_t az = 10652; + int32_t ba ; + int8_t bb ; + int64_t bc = 40183771683589512; + +asm volatile ("ttt":::"memory"); + vint16mf4_t bd = __riscv_vle16_v_i16mf4(j, 2); + vuint32mf2_t be = __riscv_vle32_v_u32mf2(l, 2); + vint32mf2_t bf = __riscv_vle32_v_i32mf2(m, 2); + vuint16mf4_t bg = __riscv_vle16_v_u16mf4(n, 2); + vint8mf4_t bh ; + vuint32m2_t bi = __riscv_vle32_v_u32m2(p, 2); + vuint32m2_t bj = __riscv_vle32_v_u32m2(q, 2); + vuint32m2_t bk = __riscv_vle32_v_u32m2(r, 2); + vuint32m2_t bl = __riscv_vle32_v_u32m2(s, 2); + vint16m1_t bm = __riscv_vle16_v_i16m1(ac, 2); + vint16m1_t bn = __riscv_vle16_v_i16m1(t, 2); + vint8mf2_t bo = __riscv_vle8_v_i8mf2(u, 1); + vint8mf2_t bp = __riscv_vle8_v_i8mf2(ae, 1); + vint8mf8_t bq = __riscv_vle8_v_i8mf8(af, 1); + vint16mf4_t br = __riscv_vle16_v_i16mf4(w, 2); + vint16mf4_t bs = __riscv_vle16_v_i16mf4(x, 2); + vint8mf8_t bt = __riscv_vle8_v_i8mf8(y, 1); + vint8mf8_t bu = __riscv_vle8_v_i8mf8(z, 1); + vuint16mf4_t bv = __riscv_vle16_v_u16mf4(ag, 1); + vint16mf4_t bw = __riscv_vle16_v_i16mf4(ah, 2); + vint16mf4_t bx = __riscv_vle16_v_i16mf4(ai, 2); + vint32mf2_t by = __riscv_vle32_v_i32mf2(aj, 2); + vint32mf2_t bz = __riscv_vle32_v_i32mf2(ak, 2); + vuint32mf2_t ca = __riscv_vle32_v_u32mf2(al, 2); + vuint32mf2_t cb = __riscv_vle32_v_u32mf2(am, 2); + vint8mf8_t cc = __riscv_vle8_v_i8mf8(an, 2); + vfloat32mf2_t cd = __riscv_vle32_v_f32mf2(ao, 2); + vfloat32mf2_t ce = __riscv_vle32_v_f32mf2(ap, 2); + vint64m1_t cf = __riscv_vle64_v_i64m1(aq, 2); + vint16mf4_t cg = __riscv_vle16_v_i16mf4(ar, 2); + vint16mf4_t ch = __riscv_vle16_v_i16mf4(as, 2); + vint16mf4_t var_62 = __riscv_vle16_v_i16mf4(at, 2); + vbool64_t var_20 = __riscv_vmadc_vx_u32mf2_b64(be, ay, 2); + int8_t var_17 = __riscv_vmv_x_s_i8mf4_i8(bh); + vbool16_t var_28 = __riscv_vmsltu_vv_u32m2_b16(bk, bl, 2); + vint8mf2_t var_14 = __riscv_vadd_vv_i8mf2(bo, bp, 1); + vbool64_t var_8 = __riscv_vmseq_vv_i16mf4_b64(br, bs, 2); + vbool64_t var_42 = __riscv_vmsbc_vx_u16mf4_b64(bv, az, 1); + vbool64_t var_46 = __riscv_vmsge_vx_i32mf2_b64(by, ba, 2); + vint16mf4_t var_4 = __riscv_vncvt_x_x_w_i16mf4(bz, 2); + vbool64_t var_51 = __riscv_vmsgt_vx_i8mf8_b64(cc, bb, 2); + vbool64_t var_56 = __riscv_vmfne_vv_f32mf2_b64(cd, ce, 2); + vbool64_t var_55 = __riscv_vmseq_vx_i64m1_b64(cf, bc, 2); + vuint32m2_t var_16 = __riscv_vslideup_vx_u32m2_mu(var_28, bi, bj, aw, 2); + vint8mf2_t var_12 = __riscv_vmulh_vv_i8mf2(var_14, var_14, 1); + vint16mf4_t var_0 = __riscv_vdiv_vv_i16mf4_mu(var_8, var_4, ch, var_62, 2); + vuint32m2_t var_13 = __riscv_vsub_vx_u32m2(var_16, av, 2); + int8_t var_9 = __riscv_vmv_x_s_i8mf2_i8(var_12); + vint16mf4_t var_19 = __riscv_vor_vx_i16mf4_mu(var_20, var_0, bd, ax, 2); + uint32_t var_10 = __riscv_vmv_x_s_u32m2_u32(var_13); + vint8mf8_t var_7 = __riscv_vmadd_vx_i8mf8_mu(var_42, bt, var_9, bu, 1); + __riscv_vse16_v_i16mf4(k, var_19, 2); + vuint32mf2_t var_3 = + __riscv_vslide1down_vx_u32mf2_mu(var_51, ca, cb, var_10, 2); + if (check(k, ab, aa)) + cerr << "check 8 fails" << endl; + vbool64_t var_2 = __riscv_vmsne_vx_u32mf2_b64_mu(var_55, var_56, var_3, au, 2); + vint16mf4_t var_1 = __riscv_vssub_vv_i16mf4_mu(var_2, var_0, var_4, cg, 2); + vint16mf4_t var_5 = __riscv_vxor_vv_i16mf4_mu(var_46, var_1, bw, bx, 2); + vint32mf2_t var_18 = __riscv_vwmaccsu_vv_i32mf2(bf, var_1, bg, 2); + vint8mf8_t var_6 = __riscv_vncvt_x_x_w_i8mf8_mu(var_8, var_7, var_5, 1); + vint16m1_t var_15 = __riscv_vredand_vs_i16mf4_i16m1_tu(bm, var_5, bn, 2); + __riscv_vse32_v_i32mf2(o, var_18, 2); + vbool64_t var_11 = __riscv_vmsge_vx_i8mf8_b64(var_6, var_17, 1); + __riscv_vse16_v_i16m1(ad, var_15, 1); + if (check(o, i, g)) + cerr << "check 1 fails" << endl; + __riscv_vse8_v_i8mf8_m(var_11, v, bq, 1); + if (check(ad, f, e)) + cerr << "check 4 fails" << endl; + cerr << "check 7 fails" << endl; + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c index 1b0afed037a..552c264d895 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c @@ -24,3 +24,4 @@ void f2 (void * in, void *out, int32_t x) __riscv_vsm_v_b32 (out, m4, 4); } +/* { dg-final { scan-assembler-times {vmv} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c index 384e2301a69..6a65fb576e8 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c @@ -24,4 +24,5 @@ void f2 (void * in, void *out, int32_t x) __riscv_vsm_v_b32 (out, m4, 4); } +/* { dg-final { scan-assembler-times {vmv} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c index a353a7ab2d5..3933c35f4ce 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c @@ -24,4 +24,4 @@ void f2 (void * in, void *out, int32_t x) __riscv_vsm_v_b32 (out, m4, 4); } - +/* { dg-final { scan-assembler-times {vmv} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c new file mode 100644 index 00000000000..d9cbc853918 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c @@ -0,0 +1,95 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ + +#include "riscv_vector.h" + +void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift) +{ + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl); + vuint16m1_t v2 = __riscv_vle16_v_u16m1 (base2, vl); + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl); + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28"); + + vbool16_t v = __riscv_vmseq_vv_u16m1_b16_mu(m1,m2,v1,v2,vl); + asm volatile("#" :: + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27"); + + __riscv_vsm_v_b16 (out,v,vl); +} + +void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift) +{ + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl); + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl); + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29"); + + vbool16_t v = __riscv_vmseq_vx_u16m1_b16_mu(m1,m2,v1,shift,vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29"); + + + __riscv_vsm_v_b16 (out,v,vl); +} + +void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift) +{ + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl); + vuint16m1_t v2 = __riscv_vle16_v_u16m1 (base2, vl); + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl); + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28"); + + vbool16_t v = __riscv_vmsltu_vv_u16m1_b16_mu(m1,m2,v1,v2,vl); + asm volatile("#" :: + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27"); + + __riscv_vsm_v_b16 (out,v,vl); +} + +void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift) +{ + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl); + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl); + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29"); + + vbool16_t v = __riscv_vmsltu_vx_u16m1_b16_mu(m1,m2,v1,shift,vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29"); + + + __riscv_vsm_v_b16 (out,v,vl); +} + +/* { dg-final { scan-assembler-not {vmv} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c new file mode 100644 index 00000000000..db245b02570 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */ + +#include "riscv_vector.h" + +void f (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift) +{ + vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl); + vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl); + vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29"); + + vbool16_t v = __riscv_vmsltu_vx_u16m1_b16_mu(m1,m2,v1,shift,vl); + asm volatile("#" :: + : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", + "v26", "v27", "v28", "v29", "v30", "v31"); + + + __riscv_vsm_v_b16 (out,v,vl); +} + +/* { dg-final { scan-assembler-times {vmv} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c index ec38a828ee7..2f2d85807ec 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c @@ -10,20 +10,20 @@ ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** csrr\ta2,vlenb -** srli\ta2,a2,3 -** slli\ta3,a2,3 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse8.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,3 -** slli\ta3,a2,3 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle8.v\tv[0-9]+,0\(a3\) -** vse8.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -43,20 +43,20 @@ spill_1 (int8_t *in, int8_t *out) ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e8,mf4,ta,ma ** vle8.v\tv[0-9]+,0\(a0\) -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse8.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle8.v\tv[0-9]+,0\(a3\) -** vse8.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -76,16 +76,16 @@ spill_2 (int8_t *in, int8_t *out) ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e8,mf2,ta,ma ** vle8.v\tv[0-9]+,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse8.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle8.v\tv[0-9]+,0\(a3\) -** vse8.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -107,7 +107,7 @@ spill_3 (int8_t *in, int8_t *out) ** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re8.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -131,7 +131,7 @@ spill_4 (int8_t *in, int8_t *out) ** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re8.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -155,7 +155,7 @@ spill_5 (int8_t *in, int8_t *out) ** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re8.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -179,7 +179,7 @@ spill_6 (int8_t *in, int8_t *out) ** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re8.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -200,20 +200,20 @@ spill_7 (int8_t *in, int8_t *out) ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e8,mf8,ta,ma ** vle8.v\tv[0-9]+,0\(a0\) -** csrr\ta2,vlenb -** srli\ta2,a2,3 -** slli\ta3,a2,3 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse8.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,3 -** slli\ta3,a2,3 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle8.v\tv[0-9]+,0\(a3\) -** vse8.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -233,20 +233,20 @@ spill_8 (uint8_t *in, uint8_t *out) ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e8,mf4,ta,ma ** vle8.v\tv[0-9]+,0\(a0\) -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse8.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle8.v\tv[0-9]+,0\(a3\) -** vse8.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -266,16 +266,16 @@ spill_9 (uint8_t *in, uint8_t *out) ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e8,mf2,ta,ma ** vle8.v\tv[0-9]+,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse8.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle8.v\tv[0-9]+,0\(a3\) -** vse8.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -297,7 +297,7 @@ spill_10 (uint8_t *in, uint8_t *out) ** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re8.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -321,7 +321,7 @@ spill_11 (uint8_t *in, uint8_t *out) ** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re8.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -345,7 +345,7 @@ spill_12 (uint8_t *in, uint8_t *out) ** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re8.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -369,7 +369,7 @@ spill_13 (uint8_t *in, uint8_t *out) ** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re8.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c index 147a727b134..4bcaf4dce79 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c @@ -11,20 +11,20 @@ ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e16,mf4,ta,ma ** vle16.v\tv[0-9]+,0\(a0\) -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse16.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle16.v\tv[0-9]+,0\(a3\) -** vse16.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle16.v\tv[0-9]+,0\([a-x0-9]+\) +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -44,16 +44,16 @@ spill_2 (int16_t *in, int16_t *out) ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e16,mf2,ta,ma ** vle16.v\tv[0-9]+,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse16.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle16.v\tv[0-9]+,0\(a3\) -** vse16.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle16.v\tv[0-9]+,0\([a-x0-9]+\) +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -75,7 +75,7 @@ spill_3 (int16_t *in, int16_t *out) ** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re16.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -99,7 +99,7 @@ spill_4 (int16_t *in, int16_t *out) ** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re16.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -123,7 +123,7 @@ spill_5 (int16_t *in, int16_t *out) ** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re16.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -147,7 +147,7 @@ spill_6 (int16_t *in, int16_t *out) ** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re16.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -168,20 +168,20 @@ spill_7 (int16_t *in, int16_t *out) ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e16,mf4,ta,ma ** vle16.v\tv[0-9]+,0\(a0\) -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vse16.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta2,vlenb -** srli\ta2,a2,2 -** slli\ta3,a2,2 -** sub\ta3,a3,a2 -** add\ta3,a3,sp -** vle16.v\tv[0-9]+,0\(a3\) -** vse16.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle16.v\tv[0-9]+,0\([a-x0-9]+\) +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -201,16 +201,16 @@ spill_9 (uint16_t *in, uint16_t *out) ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e16,mf2,ta,ma ** vle16.v\tv[0-9]+,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse16.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle16.v\tv[0-9]+,0\(a3\) -** vse16.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle16.v\tv[0-9]+,0\([a-x0-9]+\) +** vse16.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -232,7 +232,7 @@ spill_10 (uint16_t *in, uint16_t *out) ** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re16.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -256,7 +256,7 @@ spill_11 (uint16_t *in, uint16_t *out) ** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re16.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -280,7 +280,7 @@ spill_12 (uint16_t *in, uint16_t *out) ** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re16.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -304,7 +304,7 @@ spill_13 (uint16_t *in, uint16_t *out) ** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re16.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c index 81d695a2a73..82d685e029d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c @@ -11,16 +11,16 @@ ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e32,mf2,ta,ma ** vle32.v\tv[0-9]+,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse32.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle32.v\tv[0-9]+,0\(a3\) -** vse32.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle32.v\tv[0-9]+,0\([a-x0-9]+\) +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -42,7 +42,7 @@ spill_3 (int32_t *in, int32_t *out) ** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re32.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -66,7 +66,7 @@ spill_4 (int32_t *in, int32_t *out) ** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re32.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -90,7 +90,7 @@ spill_5 (int32_t *in, int32_t *out) ** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re32.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -114,7 +114,7 @@ spill_6 (int32_t *in, int32_t *out) ** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re32.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -135,16 +135,16 @@ spill_7 (int32_t *in, int32_t *out) ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e32,mf2,ta,ma ** vle32.v\tv[0-9]+,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse32.v\tv[0-9]+,0\(a3\) -** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle32.v\tv[0-9]+,0\(a3\) -** vse32.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) +** ... +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle32.v\tv[0-9]+,0\([a-x0-9]+\) +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -166,7 +166,7 @@ spill_10 (uint32_t *in, uint32_t *out) ** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re32.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -190,7 +190,7 @@ spill_11 (uint32_t *in, uint32_t *out) ** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re32.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -214,7 +214,7 @@ spill_12 (uint32_t *in, uint32_t *out) ** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re32.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -238,7 +238,7 @@ spill_13 (uint32_t *in, uint32_t *out) ** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re32.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c index 8ec7a2d4b2b..5b3f75f3552 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c @@ -11,16 +11,16 @@ ** sub\tsp,sp,t0 ** vsetvli\ta5,zero,e32,mf2,ta,ma ** vle32.v\tv[0-9]+,0\(a0\) -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vse32.v\tv[0-9]+,0\(a3\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) ** ... -** csrr\ta3,vlenb -** srli\ta3,a3,1 -** add\ta3,a3,sp -** vle32.v\tv[0-9]+,0\(a3\) -** vse32.v\tv[0-9]+,0\(a1\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,sp +** vle32.v\tv[0-9]+,0\([a-x0-9]+\) +** vse32.v\tv[0-9]+,0\([a-x0-9]+\) ** csrr\tt0,vlenb ** add\tsp,sp,t0 ** ... @@ -42,7 +42,7 @@ spill_3 (float *in, float *out) ** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re32.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vs1r.v\tv2,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -66,7 +66,7 @@ spill_4 (float *in, float *out) ** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re32.v\tv4,0\(sp\) -** vs2r.v\tv4,0\(a1\) +** vs2r.v\tv4,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -90,7 +90,7 @@ spill_5 (float *in, float *out) ** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re32.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vs4r.v\tv8,0\([a-x0-9]+\) ** ... ** jr\tra */ @@ -114,7 +114,7 @@ spill_6 (float *in, float *out) ** vs8r.v\tv[0-9]+,0\(sp\) ** ... ** vl8re32.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vs8r.v\tv16,0\([a-x0-9]+\) ** ... ** jr\tra */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c index e852a75578e..2bc54557dee 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c @@ -7,89 +7,92 @@ /* ** spill: -** csrr\tt0,vlenb -** slli\tt1,t0,4 -** sub\tsp,sp,t1 -** vsetvli\ta3,zero,e8,mf8,ta,ma -** vle8.v\tv[0-9]+,0\(a0\) -** csrr\ta5,vlenb -** srli\ta5,a5,3 -** add\ta5,a5,sp -** vse8.v\tv[0-9]+,0\(a5\) -** addi\ta5,a0,1 -** vsetvli\ta4,zero,e8,mf4,ta,ma -** vle8.v\tv[0-9]+,0\(a5\) -** csrr\ta5,vlenb -** srli\ta5,a5,2 -** add\ta5,a5,sp -** vse8.v\tv[0-9]+,0\(a5\) -** addi\ta2,a0,2 -** vsetvli\ta5,zero,e8,mf2,ta,ma -** vle8.v\tv[0-9]+,0\(a2\) -** csrr\ta2,vlenb -** srli\ta2,a2,1 -** add\ta2,a2,sp -** vse8.v\tv[0-9]+,0\(a2\) -** addi\ta2,a0,3 -** vl1re8.v\tv[0-9]+,0\(a2\) -** csrr\ta2,vlenb -** add\ta2,a2,sp -** vs1r.v\tv[0-9]+,0\(a2\) -** addi\ta2,a0,4 -** vl2re8.v\tv[0-9]+,0\(a2\) -** csrr\tt3,vlenb -** slli\ta2,t3,1 -** add\ta2,a2,sp -** vs2r.v\tv[0-9]+,0\(a2\) -** addi\ta2,a0,5 -** vl4re8.v\tv[0-9]+,0\(a2\) -** mv\ta2,t3 -** slli\tt3,t3,2 -** add\tt3,t3,sp -** vs4r.v\tv[0-9]+,0\(t3\) -** addi\ta0,a0,6 -** vl8re8.v\tv[0-9]+,0\(a0\) -** slli\ta0,a2,3 -** add\ta0,a0,sp -** vs8r.v\tv[0-9]+,0\(a0\) +** csrr\t[a-x0-9]+,vlenb +** slli\t[a-x0-9]+,[a-x0-9]+,4 +** sub\tsp,[a-x0-9]+,[a-x0-9]+ +** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,1 +** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,2 +** vsetvli\t[a-x0-9]+,zero,e8,mf2,ta,ma +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,3 +** vl1re8.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vs1r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,4 +** vl2re8.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** slli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vs2r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,5 +** vl4re8.v\tv[0-9]+,0\([a-x0-9]+\) +** mv\t[a-x0-9]+,[a-x0-9]+ +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vs4r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,6 +** vl8re8.v\tv[0-9]+,0\([a-x0-9]+\) ** ... -** srli\ta0,a2,3 -** add\ta0,a0,sp +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vs8r.v\tv[0-9]+,0\([a-x0-9]+\) ** ... -** vle8.v\tv[0-9]+,0\(a0\) -** vse8.v\tv[0-9]+,0\(a1\) -** addi\ta3,a1,1 -** srli\ta0,a2,2 -** add\ta0,a0,sp +** srli\t[a-x0-9]+,[a-x0-9]+,3 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ ** ... -** vle8.v\tv[0-9]+,0\(a0\) -** vse8.v\tv[0-9]+,0\(a3\) -** addi\ta4,a1,2 -** srli\ta3,a2,1 -** add\ta3,a3,sp +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,1 +** srli\t[a-x0-9]+,[a-x0-9]+,2 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ ** ... -** vle8.v\tv[0-9]+,0\(a3\) -** vse8.v\tv[0-9]+,0\(a4\) -** addi\ta5,a1,3 -** add\ta4,a2,sp -** vl1re8.v\tv[0-9]+,0\(a4\) -** vs1r.v\tv[0-9]+,0\(a5\) -** addi\ta5,a1,4 -** slli\ta4,a2,1 -** add\ta4,a4,sp -** vl2re8.v\tv[0-9]+,0\(a4\) -** vs2r.v\tv[0-9]+,0\(a5\) -** addi\ta5,a1,5 -** vl4re8.v\tv[0-9]+,0\(t3\) -** vs4r.v\tv[0-9]+,0\(a5\) -** addi\ta1,a1,6 -** slli\ta5,a2,3 -** add\ta5,a5,sp -** vl8re8.v\tv[0-9]+,0\(a5\) -** vs8r.v\tv[0-9]+,0\(a1\) -** csrr\tt0,vlenb -** slli\tt1,t0,4 -** add\tsp,sp,t1 +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,2 +** srli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** ... +** vle8.v\tv[0-9]+,0\([a-x0-9]+\) +** vse8.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,3 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vl1re8.v\tv[0-9]+,0\([a-x0-9]+\) +** vs1r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,4 +** slli\t[a-x0-9]+,[a-x0-9]+,1 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vl2re8.v\tv[0-9]+,0\([a-x0-9]+\) +** vs2r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,5 +** slli\t[a-x0-9]+,[a-x0-9]+,2 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vl4re8.v\tv[0-9]+,0\([a-x0-9]+\) +** vs4r.v\tv[0-9]+,0\([a-x0-9]+\) +** addi\t[a-x0-9]+,[a-x0-9]+,6 +** slli\t[a-x0-9]+,[a-x0-9]+,3 +** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+ +** vl8re8.v\tv[0-9]+,0\([a-x0-9]+\) +** vs8r.v\tv[0-9]+,0\([a-x0-9]+\) +** csrr\t[a-x0-9]+,vlenb +** slli\t[a-x0-9]+,[a-x0-9]+,4 +** add\tsp,[a-x0-9]+,[a-x0-9]+ ** ... ** jr\tra */ -- 2.36.1