From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <SRS0=TEaY=FK=rivai.ai=juzhe.zhong@sourceware.org>
Received: from smtpbgbr2.qq.com (smtpbgbr2.qq.com [54.207.22.56])
	by sourceware.org (Postfix) with ESMTPS id AA4DF3858404
	for <gcc-patches@gcc.gnu.org>; Tue, 26 Sep 2023 14:49:37 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org AA4DF3858404
Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=rivai.ai
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=rivai.ai
X-QQ-mid: bizesmtp73t1695739770tfdhz5sj
Received: from rios-cad122.hadoop.rioslab.org ( [58.60.1.26])
	by bizesmtp.qq.com (ESMTP) with 
	id ; Tue, 26 Sep 2023 22:49:29 +0800 (CST)
X-QQ-SSF: 01400000000000G0V000000A0000000
X-QQ-FEAT: W+onFc5Tw4OxK0sQvtpxQM2qYghX1CjXxd08Uw9aKYabP2iXq+578j+gD4Nbm
	Lpm5Ycgknbdp7vVjK9JUaj0fuIPyRQqmRGTW2wg9LUTsY/NpsJNTowhN6eWkuacnwh5NqjP
	zYy47Lkmda70fe0pmfDi4U/czeddc1IRTfehbFeFdhyVqCUTQCLGAMv395YWmsHhDQHvXG3
	lHi7r/YnuUTYigEAYVumjAn8XQpb+z+/rxYfrOGvevAlkXACcMKo3R/3ynIW+DzZCk0D8JN
	zLo2bg7lpRxnBr/jb0pGsbShZPIIXgCTF8p74V8a/JO0jeTVgqqxIq88zXxDPzDB6/oDR2y
	0dp5KAgynMGzqlolxjJqnsT9ERBBzPB0nCkysk3ue26U0hoCABX+ieGrM8ZXvoq/rC2OsWn
	DEOwhioFFxerFAfUo58ocg==
X-QQ-GoodBg: 2
X-BIZMAIL-ID: 12047579028708163242
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
To: gcc-patches@gcc.gnu.org
Cc: kito.cheng@gmail.com,
	kito.cheng@sifive.com,
	jeffreyalaw@gmail.com,
	rdapp.gcc@gmail.com,
	Juzhe-Zhong <juzhe.zhong@rivai.ai>
Subject: [PATCH V2] RISC-V: Fix mem-to-mem VLS move pattern[PR111566]
Date: Tue, 26 Sep 2023 22:49:28 +0800
Message-Id: <20230926144928.630319-1-juzhe.zhong@rivai.ai>
X-Mailer: git-send-email 2.36.3
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-QQ-SENDSIZE: 520
Feedback-ID: bizesmtp:rivai.ai:qybglogicsvrgz:qybglogicsvrgz7a-one-0
X-Spam-Status: No, score=-10.3 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_STATUS,KAM_SHORT,RCVD_IN_BARRACUDACENTRAL,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H2,SPF_HELO_PASS,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org
List-Id: <gcc-patches.gcc.gnu.org>

The mem-to-mem insn pattern is splitted from reg-to-mem/mem-to-reg/reg-to-reg
causes ICE in RA since RA prefer they stay together.

Now, we split mem-to-mem as a pure pre-RA split pattern and only allow
define_insn match mem-to-mem VLS move in pre-RA stage (Forbid mem-to-mem move after RA).

Tested no difference. Committed.

	PR target/111566

gcc/ChangeLog:

	* config/riscv/vector.md (*mov<mode>_mem_to_mem): Only allow mem-to-mem move for VLS modes size <= MAX_BITS_PER_WORD

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/fortran/pr111566.f90: New test.

---
 gcc/config/riscv/vector.md                    | 60 ++++++++++---------
 .../gcc.target/riscv/rvv/fortran/pr111566.f90 | 31 ++++++++++
 2 files changed, 62 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index d5300a33946..a6dbaa74a10 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1222,48 +1222,50 @@
     DONE;
 })
 
-(define_insn_and_split "*mov<mode>_mem_to_mem"
+;; Some VLS modes (like V2SImode) have size <= a general purpose
+;; register width, we optimize such mem-to-mem move into mem-to-mem
+;; scalar move.  Otherwise, we always force operands[1] into register
+;; so that we will never get mem-to-mem move after RA.
+(define_split
   [(set (match_operand:VLS_AVL_IMM 0 "memory_operand")
 	(match_operand:VLS_AVL_IMM 1 "memory_operand"))]
-  "TARGET_VECTOR && can_create_pseudo_p ()"
-  "#"
-  "&& 1"
+  "TARGET_VECTOR && can_create_pseudo_p ()
+   && GET_MODE_BITSIZE (<MODE>mode).to_constant () <= MAX_BITS_PER_WORD"
   [(const_int 0)]
   {
-    if (GET_MODE_BITSIZE (<MODE>mode).to_constant () <= MAX_BITS_PER_WORD)
-      {
-        /* Opitmize the following case:
-
-	    typedef int8_t v2qi __attribute__ ((vector_size (2)));
-	    v2qi v = *(v2qi*)in;
-	    *(v2qi*)out = v;
-
-	    We prefer scalar load/store instead of vle.v/vse.v when
-	    the VLS modes size is smaller scalar mode.  */
-        machine_mode mode;
-        unsigned size = GET_MODE_BITSIZE (<MODE>mode).to_constant ();
-        if (FLOAT_MODE_P (<MODE>mode))
-	  mode = mode_for_size (size, MODE_FLOAT, 0).require ();
-        else
-	  mode = mode_for_size (size, MODE_INT, 0).require ();
-        emit_move_insn (gen_lowpart (mode, operands[0]),
-		        gen_lowpart (mode, operands[1]));
-      }
+    /* Opitmize the following case:
+
+	typedef int8_t v2qi __attribute__ ((vector_size (2)));
+	v2qi v = *(v2qi*)in;
+	*(v2qi*)out = v;
+
+	We prefer scalar load/store instead of vle.v/vse.v when
+	the VLS modes size is smaller scalar mode.  */
+    machine_mode mode;
+    unsigned size = GET_MODE_BITSIZE (<MODE>mode).to_constant ();
+    if (FLOAT_MODE_P (<MODE>mode))
+      mode = mode_for_size (size, MODE_FLOAT, 0).require ();
     else
-      {
-	operands[1] = force_reg (<MODE>mode, operands[1]);
-	emit_move_insn (operands[0], operands[1]);
-      }
+      mode = mode_for_size (size, MODE_INT, 0).require ();
+    emit_move_insn (gen_lowpart (mode, operands[0]),
+    		    gen_lowpart (mode, operands[1]));
     DONE;
   }
-  [(set_attr "type" "vmov")]
 )
 
+;; We recognize mem-to-mem move in pre-RA stage so that we won't have
+;; ICE (unrecognizable insn: (set (mem) (mem))).  Then, the previous
+;; mem-to-mem split pattern will force operands[1] into a register so
+;; that mem-to-mem move will never happen after RA.
+;;
+;; We don't allow mem-to-mem move in post-RA stage since we
+;; don't have an instruction to split mem-to-mem move after RA.
 (define_insn_and_split "*mov<mode>"
   [(set (match_operand:VLS_AVL_IMM 0 "reg_or_mem_operand" "=vr, m, vr")
 	(match_operand:VLS_AVL_IMM 1 "reg_or_mem_operand" "  m,vr, vr"))]
   "TARGET_VECTOR
-   && (register_operand (operands[0], <MODE>mode)
+   && (can_create_pseudo_p ()
+       || register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   "@
    #
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90 b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90
new file mode 100644
index 00000000000..2e30dc9bfaa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/fortran/pr111566.f90
@@ -0,0 +1,31 @@
+! { dg-do compile }
+! { dg-options "-march=rv64gcv -mabi=lp64d -Ofast -fallow-argument-mismatch -fmax-stack-var-size=65536 -S  -std=legacy -w" }
+
+module a
+  integer,parameter :: SHR_KIND_R8 = selected_real_kind(12)
+end module a
+module b
+  use a,  c => shr_kind_r8
+contains
+  subroutine d(cg , km, i1, i2)
+    real (c) ch(i2,km)
+    real (c) cg(4,i1:i2,km)
+    real  dc(i2,km)
+    real(c) ci(i2,km)
+    real(c) cj(i2,km)
+    do k=2,ck
+       do i=i1,0
+          cl = ci(i,k) *ci(i,1) /      cj(i,k)+ch(i,1)
+          cm = cg(1,i,k) - min(e,cg(1,i,co))
+          dc(i,k) = sign(cm, cl)
+       enddo
+    enddo
+    if ( cq == 0 ) then
+       do i=i1,i2
+          if( cr <=  cs ) then
+             cg= sign( min(ct,   cg),  cg)
+          endif
+       enddo
+    endif
+  end subroutine d
+end module b
-- 
2.36.3