From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpbgbr2.qq.com (smtpbgbr2.qq.com [54.207.22.56]) by sourceware.org (Postfix) with ESMTPS id AA4DF3858404 for ; Tue, 26 Sep 2023 14:49:37 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org AA4DF3858404 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=rivai.ai Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=rivai.ai X-QQ-mid: bizesmtp73t1695739770tfdhz5sj Received: from rios-cad122.hadoop.rioslab.org ( [58.60.1.26]) by bizesmtp.qq.com (ESMTP) with id ; Tue, 26 Sep 2023 22:49:29 +0800 (CST) X-QQ-SSF: 01400000000000G0V000000A0000000 X-QQ-FEAT: W+onFc5Tw4OxK0sQvtpxQM2qYghX1CjXxd08Uw9aKYabP2iXq+578j+gD4Nbm Lpm5Ycgknbdp7vVjK9JUaj0fuIPyRQqmRGTW2wg9LUTsY/NpsJNTowhN6eWkuacnwh5NqjP zYy47Lkmda70fe0pmfDi4U/czeddc1IRTfehbFeFdhyVqCUTQCLGAMv395YWmsHhDQHvXG3 lHi7r/YnuUTYigEAYVumjAn8XQpb+z+/rxYfrOGvevAlkXACcMKo3R/3ynIW+DzZCk0D8JN zLo2bg7lpRxnBr/jb0pGsbShZPIIXgCTF8p74V8a/JO0jeTVgqqxIq88zXxDPzDB6/oDR2y 0dp5KAgynMGzqlolxjJqnsT9ERBBzPB0nCkysk3ue26U0hoCABX+ieGrM8ZXvoq/rC2OsWn DEOwhioFFxerFAfUo58ocg== X-QQ-GoodBg: 2 X-BIZMAIL-ID: 12047579028708163242 From: Juzhe-Zhong To: gcc-patches@gcc.gnu.org Cc: kito.cheng@gmail.com, kito.cheng@sifive.com, jeffreyalaw@gmail.com, rdapp.gcc@gmail.com, Juzhe-Zhong Subject: [PATCH V2] RISC-V: Fix mem-to-mem VLS move pattern[PR111566] Date: Tue, 26 Sep 2023 22:49:28 +0800 Message-Id: <20230926144928.630319-1-juzhe.zhong@rivai.ai> X-Mailer: git-send-email 2.36.3 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-QQ-SENDSIZE: 520 Feedback-ID: bizesmtp:rivai.ai:qybglogicsvrgz:qybglogicsvrgz7a-one-0 X-Spam-Status: No, score=-10.3 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_STATUS,KAM_SHORT,RCVD_IN_BARRACUDACENTRAL,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H2,SPF_HELO_PASS,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: The mem-to-mem insn pattern is splitted from reg-to-mem/mem-to-reg/reg-to-reg causes ICE in RA since RA prefer they stay together. Now, we split mem-to-mem as a pure pre-RA split pattern and only allow define_insn match mem-to-mem VLS move in pre-RA stage (Forbid mem-to-mem move after RA). Tested no difference. Committed. PR target/111566 gcc/ChangeLog: * config/riscv/vector.md (*mov_mem_to_mem): Only allow mem-to-mem move for VLS modes size <= MAX_BITS_PER_WORD gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/fortran/pr111566.f90: New test. --- gcc/config/riscv/vector.md | 60 ++++++++++--------- .../gcc.target/riscv/rvv/fortran/pr111566.f90 | 31 ++++++++++ 2 files changed, 62 insertions(+), 29 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index d5300a33946..a6dbaa74a10 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1222,48 +1222,50 @@ DONE; }) -(define_insn_and_split "*mov_mem_to_mem" +;; Some VLS modes (like V2SImode) have size <= a general purpose +;; register width, we optimize such mem-to-mem move into mem-to-mem +;; scalar move. Otherwise, we always force operands[1] into register +;; so that we will never get mem-to-mem move after RA. +(define_split [(set (match_operand:VLS_AVL_IMM 0 "memory_operand") (match_operand:VLS_AVL_IMM 1 "memory_operand"))] - "TARGET_VECTOR && can_create_pseudo_p ()" - "#" - "&& 1" + "TARGET_VECTOR && can_create_pseudo_p () + && GET_MODE_BITSIZE (mode).to_constant () <= MAX_BITS_PER_WORD" [(const_int 0)] { - if (GET_MODE_BITSIZE (mode).to_constant () <= MAX_BITS_PER_WORD) - { - /* Opitmize the following case: - - typedef int8_t v2qi __attribute__ ((vector_size (2))); - v2qi v = *(v2qi*)in; - *(v2qi*)out = v; - - We prefer scalar load/store instead of vle.v/vse.v when - the VLS modes size is smaller scalar mode. */ - machine_mode mode; - unsigned size = GET_MODE_BITSIZE (mode).to_constant (); - if (FLOAT_MODE_P (mode)) - mode = mode_for_size (size, MODE_FLOAT, 0).require (); - else - mode = mode_for_size (size, MODE_INT, 0).require (); - emit_move_insn (gen_lowpart (mode, operands[0]), - gen_lowpart (mode, operands[1])); - } + /* Opitmize the following case: + + typedef int8_t v2qi __attribute__ ((vector_size (2))); + v2qi v = *(v2qi*)in; + *(v2qi*)out = v; + + We prefer scalar load/store instead of vle.v/vse.v when + the VLS modes size is smaller scalar mode. */ + machine_mode mode; + unsigned size = GET_MODE_BITSIZE (mode).to_constant (); + if (FLOAT_MODE_P (mode)) + mode = mode_for_size (size, MODE_FLOAT, 0).require (); else - { - operands[1] = force_reg (mode, operands[1]); - emit_move_insn (operands[0], operands[1]); - } + mode = mode_for_size (size, MODE_INT, 0).require (); + emit_move_insn (gen_lowpart (mode, operands[0]), + gen_lowpart (mode, operands[1])); DONE; } - [(set_attr "type" "vmov")] ) +;; We recognize mem-to-mem move in pre-RA stage so that we won't have +;; ICE (unrecognizable insn: (set (mem) (mem))). Then, the previous +;; mem-to-mem split pattern will force operands[1] into a register so +;; that mem-to-mem move will never happen after RA. +;; +;; We don't allow mem-to-mem move in post-RA stage since we +;; don't have an instruction to split mem-to-mem move after RA. (define_insn_and_split "*mov" [(set (match_operand:VLS_AVL_IMM 0 "reg_or_mem_operand" "=vr, m, vr") (match_operand:VLS_AVL_IMM 1 "reg_or_mem_operand" " m,vr, vr"))] "TARGET_VECTOR - && (register_operand (operands[0], mode) + && (can_create_pseudo_p () + || register_operand (operands[0], mode) || register_operand (operands[1], mode))" "@ # diff --git a/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 new file mode 100644 index 00000000000..2e30dc9bfaa --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 @@ -0,0 +1,31 @@ +! { dg-do compile } +! { dg-options "-march=rv64gcv -mabi=lp64d -Ofast -fallow-argument-mismatch -fmax-stack-var-size=65536 -S -std=legacy -w" } + +module a + integer,parameter :: SHR_KIND_R8 = selected_real_kind(12) +end module a +module b + use a, c => shr_kind_r8 +contains + subroutine d(cg , km, i1, i2) + real (c) ch(i2,km) + real (c) cg(4,i1:i2,km) + real dc(i2,km) + real(c) ci(i2,km) + real(c) cj(i2,km) + do k=2,ck + do i=i1,0 + cl = ci(i,k) *ci(i,1) / cj(i,k)+ch(i,1) + cm = cg(1,i,k) - min(e,cg(1,i,co)) + dc(i,k) = sign(cm, cl) + enddo + enddo + if ( cq == 0 ) then + do i=i1,i2 + if( cr <= cs ) then + cg= sign( min(ct, cg), cg) + endif + enddo + endif + end subroutine d +end module b -- 2.36.3