public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS]
@ 2023-08-09 10:51 Juzhe-Zhong
  2023-08-09 17:01 ` Jeff Law
  0 siblings, 1 reply; 5+ messages in thread
From: Juzhe-Zhong @ 2023-08-09 10:51 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong

Realize we have a bug in VSETVL PASS which is triggered by strided_load_run-1.c in RV32 system.

FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test

This is because VSETVL PASS incorrect hoist vsetvl instruction:

...
   10156:	0d9075d7          	vsetvli	a1,zero,e64,m2,ta,ma ---> pollute 'a1' register which will be used by following insns.
   1015a:	01d586b3          	add	a3,a1,t4  --------> use 'a1'
   1015e:	5e070257          	vmv.v.v	v4,v14
   10162:	b7032257          	vmacc.vv	v4,v6,v16
   10166:	26440257          	vand.vv	v4,v4,v8
   1016a:	22880227          	vs2r.v	v4,(a6)
   1016e:	00b6b7b3          	sltu	a5,a3,a1
   10172:	22888227          	vs2r.v	v4,(a7)
   10176:	9e60b157          	vmv2r.v	v2,v6
   1017a:	97ba                	add	a5,a5,a4
   1017c:	a6a62157          	vmadd.vv	v2,v12,v10
   10180:	26240157          	vand.vv	v2,v2,v8
   10184:	22830127          	vs2r.v	v2,(t1)
   10188:	873e                	mv	a4,a5
   1018a:	982a                	add	a6,a6,a0
   1018c:	98aa                	add	a7,a7,a0
   1018e:	932a                	add	t1,t1,a0
   10190:	85b6                	mv	a1,a3       -----> set 'a1'
...

gcc/ChangeLog:

        * config/riscv/riscv-vsetvl.cc (anticipatable_occurrence_p): Fix incorrect anticipate info.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c: Adapt test.
        * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-24.c: Ditto.
        * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-25.c: Ditto.
        * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-26.c: Ditto.
        * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-36.c: Ditto.
        * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-14.c: Ditto.
        * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-15.c: Ditto.

---
 gcc/config/riscv/riscv-vsetvl.cc              |  4 ++-
 .../gather-scatter/strided_load_run-1.c       |  1 +
 .../riscv/rvv/vsetvl/vlmax_back_prop-24.c     |  2 +-
 .../riscv/rvv/vsetvl/vlmax_back_prop-25.c     | 31 +++++++++----------
 .../riscv/rvv/vsetvl/vlmax_back_prop-26.c     | 30 +++++++++---------
 .../riscv/rvv/vsetvl/vlmax_back_prop-36.c     |  2 +-
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-14.c  | 10 +++---
 .../riscv/rvv/vsetvl/vlmax_switch_vtype-15.c  | 14 ++++-----
 8 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 628bf116db0..08c487d82c0 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -333,7 +333,9 @@ anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem)
   if (dem.has_avl_reg ())
     {
       /* rs1 (avl) are not modified in the basic block prior to the VSETVL.  */
-      if (!vlmax_avl_p (dem.get_avl ()))
+      rtx avl
+	= has_vl_op (insn->rtl ()) ? get_vl (insn->rtl ()) : dem.get_avl ();
+      if (!vlmax_avl_p (avl))
 	{
 	  set_info *set = dem.get_avl_source ();
 	  /* If it's undefined, it's not anticipatable conservatively.  */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c
index 4b03c25a907..7ffa93bf13f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c
@@ -1,4 +1,5 @@
 /* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-mcmodel=medany" } */
 
 #include "strided_load-1.c"
 #include <assert.h>
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-24.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-24.c
index bc98e5f8269..fe41d15cb28 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-24.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-24.c
@@ -30,7 +30,7 @@ void f (int32_t * restrict in, int32_t * restrict out, int n, int cond)
   *(vint32mf2_t*)(out + 7000) = v;
  
   for (int i = 0; i < n; i++) {
-    vbool64_t v;
+    vbool64_t v = *(vbool64_t*)(in + i + 9000);
     *(vbool64_t*)(out + i + 700) = v;
   }
 }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-25.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-25.c
index 0a10827daf5..c566f8a4751 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-25.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-25.c
@@ -10,7 +10,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vint8mf8_t*)(out + 100) = v;
     for (int i = 0; i < n; i++)
       {
-        vint16mf4_t v2;
+        vint16mf4_t v2 = __riscv_vmv_v_x_i16mf4 (0, __riscv_vsetvlmax_e16mf4 ());
         *(vint16mf4_t*)(out + i + 100) = v2;
       }
   } else if (cond == 1) {
@@ -18,7 +18,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vint8mf8_t*)(out + 200) = v;
     for (int i = 0; i < n; i++)
       {
-        vint32mf2_t v2;
+        vint32mf2_t v2 = __riscv_vmv_v_x_i32mf2 (0, __riscv_vsetvlmax_e32mf2 ());
         *(vint32mf2_t*)(out + i + 200) = v2;
       }
   } else if (cond == 2) {
@@ -26,7 +26,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vint8mf8_t*)(out + 300) = v;
     for (int i = 0; i < n; i++)
       {
-        vint8mf8_t v2;
+        vint8mf8_t v2 = __riscv_vmv_v_x_i8mf8 (0, __riscv_vsetvlmax_e8mf8 ());
         *(vint8mf8_t*)(out + i + 300) = v2;
       }
   } else if (cond == 3) {
@@ -34,7 +34,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vint8mf8_t*)(out + 400) = v;
     for (int i = 0; i < n; i++)
       {
-        vint64m1_t v2;
+        vint64m1_t v2 = __riscv_vmv_v_x_i64m1 (0, __riscv_vsetvlmax_e64m1 ());
         *(vint64m1_t*)(out + i + 400) = v2;
       }
   } else if (cond == 4) {
@@ -42,7 +42,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vint8mf8_t*)(out + 500) = v;
     for (int i = 0; i < n; i++)
       {
-        vfloat32mf2_t v2;
+        vfloat32mf2_t v2 = __riscv_vfmv_v_f_f32mf2 (0, __riscv_vsetvlmax_e32mf2 ());
         *(vfloat32mf2_t*)(out + i + 500) = v2;
       }
   } else if (cond == 5) {
@@ -50,7 +50,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vuint8mf8_t*)(out + 600) = v;
     for (int i = 0; i < n; i++)
       {
-        vuint16mf4_t v2;
+        vuint16mf4_t v2 = __riscv_vmv_v_x_u16mf4 (0, __riscv_vsetvlmax_e16mf4 ());
         *(vuint16mf4_t*)(out + i + 600) = v2;
       }
   } else if (cond == 6) {
@@ -58,7 +58,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vuint8mf8_t*)(out + 700) = v;
     for (int i = 0; i < n; i++)
       {
-        vuint32mf2_t v2;
+        vuint32mf2_t v2 = __riscv_vmv_v_x_u32mf2 (0, __riscv_vsetvlmax_e32mf2 ());
         *(vuint32mf2_t*)(out + i + 700) = v2;
       }
   } else if (cond == 7) {
@@ -66,7 +66,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vuint8mf8_t*)(out + 800) = v;
     for (int i = 0; i < n; i++)
       {
-        vuint8mf8_t v2;
+        vuint8mf8_t v2 = __riscv_vmv_v_x_u8mf8 (0, __riscv_vsetvlmax_e8mf8 ());
         *(vuint8mf8_t*)(out + i + 800) = v2;
       }
   } else if (cond == 8) {
@@ -74,7 +74,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vuint8mf8_t*)(out + 900) = v;
     for (int i = 0; i < n; i++)
       {
-        vuint64m1_t v2;
+        vuint64m1_t v2 = __riscv_vmv_v_x_u64m1 (0, __riscv_vsetvlmax_e64m1 ());
         *(vuint64m1_t*)(out + i + 900) = v2;
       }
   } else {
@@ -82,15 +82,14 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vuint8mf8_t*)(out + 1000) = v;
     for (int i = 0; i < n; i++)
       {
-        vfloat32mf2_t v2;
+        vfloat32mf2_t v2 = *(vfloat32mf2_t*)(in + i + 9000);
         *(vfloat32mf2_t*)(out + i + 1000) = v2;
       }
   }
 }
 
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 4 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 10 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 20 { target { no-opts "-O0"  no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 10 { target { no-opts "-O0"  no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-26.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-26.c
index a6540751398..d0e75258188 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-26.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-26.c
@@ -10,7 +10,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vint8mf8_t*)(out + 100) = v;
     for (int i = 0; i < n; i++)
       {
-        vint16mf4_t v2;
+        vint16mf4_t v2 = __riscv_vmv_v_x_i16mf4 (0, __riscv_vsetvlmax_e16mf4 ());
         *(vint16mf4_t*)(out + i + 100) = v2;
       }
   } else if (cond == 1) {
@@ -18,7 +18,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vint8mf8_t*)(out + 200) = v;
     for (int i = 0; i < n; i++)
       {
-        vint32mf2_t v2;
+        vint32mf2_t v2 = __riscv_vmv_v_x_i32mf2 (0, __riscv_vsetvlmax_e32mf2 ());
         *(vint32mf2_t*)(out + i + 200) = v2;
       }
   } else if (cond == 2) {
@@ -26,7 +26,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vint8mf8_t*)(out + 300) = v;
     for (int i = 0; i < n; i++)
       {
-        vint8mf8_t v2;
+        vint8mf8_t v2 = __riscv_vmv_v_x_i8mf8 (0, __riscv_vsetvlmax_e8mf8 ());
         *(vint8mf8_t*)(out + i + 300) = v2;
       }
   } else if (cond == 3) {
@@ -34,7 +34,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vint8mf8_t*)(out + 400) = v;
     for (int i = 0; i < n; i++)
       {
-        vint64m1_t v2;
+        vint64m1_t v2 = __riscv_vmv_v_x_i64m1 (0, __riscv_vsetvlmax_e64m1 ());
         *(vint64m1_t*)(out + i + 400) = v2;
       }
   } else if (cond == 4) {
@@ -42,7 +42,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vint8mf8_t*)(out + 500) = v;
     for (int i = 0; i < n; i++)
       {
-        vfloat32mf2_t v2;
+        vfloat32mf2_t v2 = __riscv_vfmv_v_f_f32mf2 (0, __riscv_vsetvlmax_e32mf2 ());
         *(vfloat32mf2_t*)(out + i + 500) = v2;
       }
   } else if (cond == 5) {
@@ -50,7 +50,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vuint8mf8_t*)(out + 600) = v;
     for (int i = 0; i < n; i++)
       {
-        vuint16mf4_t v2;
+        vuint16mf4_t v2 = __riscv_vmv_v_x_u16mf4 (0, __riscv_vsetvlmax_e16mf4 ());
         *(vuint16mf4_t*)(out + i + 600) = v2;
       }
   } else if (cond == 6) {
@@ -58,7 +58,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vuint8mf8_t*)(out + 700) = v;
     for (int i = 0; i < n; i++)
       {
-        vuint32mf2_t v2;
+        vuint32mf2_t v2 = __riscv_vmv_v_x_u32mf2 (0, __riscv_vsetvlmax_e32mf2 ());
         *(vuint32mf2_t*)(out + i + 700) = v2;
       }
   } else if (cond == 7) {
@@ -66,7 +66,7 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vuint8mf8_t*)(out + 800) = v;
     for (int i = 0; i < n; i++)
       {
-        vuint8mf8_t v2;
+        vuint8mf8_t v2 = __riscv_vmv_v_x_u8mf8 (0, __riscv_vsetvlmax_e8mf8 ());
         *(vuint8mf8_t*)(out + i + 800) = v2;
       }
   } else if (cond == 8) {
@@ -74,16 +74,14 @@ void f (void * restrict in, void * restrict out, int n, int cond)
     *(vuint8mf8_t*)(out + 900) = v;
     for (int i = 0; i < n; i++)
       {
-        vuint64m1_t v2;
+        vuint64m1_t v2 = *(vuint64m1_t*)(in + i + 9000);
         *(vuint64m1_t*)(out + i + 900) = v2;
       }
   }
 }
 
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 8 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 17 { target { no-opts "-O0"  no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf4,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 9 { target { no-opts "-O0"  no-opts "-O1"  no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-36.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-36.c
index a6009b74101..9be774c958b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-36.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-36.c
@@ -37,7 +37,7 @@ void f (int32_t * restrict in, int32_t * restrict out, int32_t * restrict in2, i
    }
   for (int i = 0; i < n; i++) 
     {
-      vint8mf8_t v1;
+      vint8mf8_t v1 = *(vint8mf8_t*)(in2 + i + 20);
       *(vint8mf8_t*)(out + i + 10) = v1;
     }
 }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-14.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-14.c
index f416a231f0e..1fc97f8b6f2 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-14.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-14.c
@@ -6,7 +6,7 @@
 void f (void * restrict in, void * restrict out, int32_t * a, int32_t * b, int n, int cond)
 {
   for (int i = 0; i < n; i++) {
-    vint16mf4_t v;
+    vint16mf4_t v = __riscv_vmv_v_x_i16mf4 (0, __riscv_vsetvlmax_e16mf4 ());
     *(vint16mf4_t*)(out + i + 700) = v;
   }
   for (int i = 0; i < n; i++) {
@@ -19,15 +19,15 @@ void f (void * restrict in, void * restrict out, int32_t * a, int32_t * b, int n
     a[i] = a[i] - b[i];
   }
   for (int i = 0; i < n; i++) {
-    vint32mf2_t v;
+    vint32mf2_t v = __riscv_vmv_v_x_i32mf2 (0, __riscv_vsetvlmax_e32mf2 ());
     *(vint32mf2_t*)(out + i + 7000) = v;
   }
   for (int i = 0; i < n; i++) {
-    vint64m1_t v;
+    vint64m1_t v = __riscv_vmv_v_x_i64m1 (0, __riscv_vsetvlmax_e64m1 ());
     *(vint64m1_t*)(out + i + 8000) = v;
   }
   for (int i = 0; i < n; i++) {
-    vint8mf8_t v;
+    vint8mf8_t v = __riscv_vmv_v_x_i8mf8 (0, __riscv_vsetvlmax_e8mf8 ());
     *(vint8mf8_t*)(out + i + 9000) = v;
   }
 }
@@ -36,4 +36,4 @@ void f (void * restrict in, void * restrict out, int32_t * a, int32_t * b, int n
 /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-funroll-loops" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-flto" no-opts "-g" } } } } */
 /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0"  no-opts "-funroll-loops" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-flto" no-opts "-g" } } } } */
 /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0"  no-opts "-funroll-loops" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-flto" no-opts "-g" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-funroll-loops" no-opts "-Os" no-opts "-Oz" no-opts "-flto" no-opts "-g" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" "-O1" no-opts "-funroll-loops" no-opts "-Os" no-opts "-Oz" no-opts "-flto" no-opts "-g" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-15.c
index a39b48ccb99..f3b37661fbe 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-15.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-15.c
@@ -6,7 +6,7 @@
 void f (void * restrict in, void * restrict out, int32_t * a, int32_t * b, int n, int cond)
 {
   for (int i = 0; i < n; i++) {
-    vint16mf4_t v;
+    vint16mf4_t v = __riscv_vmv_v_x_i16mf4 (0, __riscv_vsetvlmax_e16mf4 ());
     *(vint16mf4_t*)(out + i + 700) = v;
   }
   for (int i = 0; i < n; i++) {
@@ -19,27 +19,27 @@ void f (void * restrict in, void * restrict out, int32_t * a, int32_t * b, int n
     a[i] = a[i] - b[i];
   }
   for (int i = 0; i < n; i++) {
-    vint32mf2_t v;
+    vint32mf2_t v = __riscv_vmv_v_x_i32mf2 (0, __riscv_vsetvlmax_e32mf2 ());
     *(vint32mf2_t*)(out + i + 7000) = v;
   }
   for (int i = 0; i < n; i++) {
-    vint16mf2_t v;
+    vint16mf2_t v = __riscv_vmv_v_x_i16mf2 (0, __riscv_vsetvlmax_e16mf2 ());
     *(vint16mf2_t*)(out + i + 777) = v;
   }
   for (int i = 0; i < n; i++) {
-    vint64m1_t v;
+    vint64m1_t v = __riscv_vmv_v_x_i64m1 (0, __riscv_vsetvlmax_e64m1 ());
     *(vint64m1_t*)(out + i + 8000) = v;
   }
   for (int i = 0; i < n; i++) {
-    vfloat32mf2_t v;
+    vfloat32mf2_t v = __riscv_vfmv_v_f_f32mf2 (0, __riscv_vsetvlmax_e32mf2 ());
     *(vfloat32mf2_t*)(out + i + 7777) = v;
   }
   for (int i = 0; i < n; i++) {
-    vuint16mf2_t v;
+    vuint16mf2_t v = __riscv_vmv_v_x_u16mf2 (0, __riscv_vsetvlmax_e16mf2 ());
     *(vuint16mf2_t*)(out + i + 888) = v;
   }
   for (int i = 0; i < n; i++) {
-    vint8mf8_t v;
+    vint8mf8_t v = __riscv_vmv_v_x_i8mf8 (0, __riscv_vsetvlmax_e8mf8 ());
     *(vint8mf8_t*)(out + i + 9000) = v;
   }
 }
-- 
2.36.3


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS]
  2023-08-09 10:51 [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS] Juzhe-Zhong
@ 2023-08-09 17:01 ` Jeff Law
  2023-08-09 23:43   ` Li, Pan2
  2023-08-10  0:50   ` juzhe.zhong
  0 siblings, 2 replies; 5+ messages in thread
From: Jeff Law @ 2023-08-09 17:01 UTC (permalink / raw)
  To: Juzhe-Zhong, gcc-patches; +Cc: kito.cheng, kito.cheng, rdapp.gcc



On 8/9/23 04:51, Juzhe-Zhong wrote:
> Realize we have a bug in VSETVL PASS which is triggered by strided_load_run-1.c in RV32 system.
> 
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> 
> This is because VSETVL PASS incorrect hoist vsetvl instruction:
> 
> ...
>     10156:	0d9075d7          	vsetvli	a1,zero,e64,m2,ta,ma ---> pollute 'a1' register which will be used by following insns.
>     1015a:	01d586b3          	add	a3,a1,t4  --------> use 'a1'
>     1015e:	5e070257          	vmv.v.v	v4,v14
>     10162:	b7032257          	vmacc.vv	v4,v6,v16
>     10166:	26440257          	vand.vv	v4,v4,v8
>     1016a:	22880227          	vs2r.v	v4,(a6)
>     1016e:	00b6b7b3          	sltu	a5,a3,a1
>     10172:	22888227          	vs2r.v	v4,(a7)
>     10176:	9e60b157          	vmv2r.v	v2,v6
>     1017a:	97ba                	add	a5,a5,a4
>     1017c:	a6a62157          	vmadd.vv	v2,v12,v10
>     10180:	26240157          	vand.vv	v2,v2,v8
>     10184:	22830127          	vs2r.v	v2,(t1)
>     10188:	873e                	mv	a4,a5
>     1018a:	982a                	add	a6,a6,a0
>     1018c:	98aa                	add	a7,a7,a0
>     1018e:	932a                	add	t1,t1,a0
>     10190:	85b6                	mv	a1,a3       -----> set 'a1'
> ...
> 
> gcc/ChangeLog:
> 
>          * config/riscv/riscv-vsetvl.cc (anticipatable_occurrence_p): Fix incorrect anticipate info.
> 
> gcc/testsuite/ChangeLog:
> 
>          * gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c: Adapt test.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-24.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-25.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-26.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-36.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-14.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-15.c: Ditto.
OK.

Do we need to backport this to gcc-13?

jeff

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS]
  2023-08-09 17:01 ` Jeff Law
@ 2023-08-09 23:43   ` Li, Pan2
  2023-08-10  0:50   ` juzhe.zhong
  1 sibling, 0 replies; 5+ messages in thread
From: Li, Pan2 @ 2023-08-09 23:43 UTC (permalink / raw)
  To: Jeff Law, Juzhe-Zhong, gcc-patches; +Cc: kito.cheng, kito.cheng, rdapp.gcc

Committed to trunk, thanks Jeff.

Pan

-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Jeff Law via Gcc-patches
Sent: Thursday, August 10, 2023 1:01 AM
To: Juzhe-Zhong <juzhe.zhong@rivai.ai>; gcc-patches@gcc.gnu.org
Cc: kito.cheng@gmail.com; kito.cheng@sifive.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS]



On 8/9/23 04:51, Juzhe-Zhong wrote:
> Realize we have a bug in VSETVL PASS which is triggered by strided_load_run-1.c in RV32 system.
> 
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> 
> This is because VSETVL PASS incorrect hoist vsetvl instruction:
> 
> ...
>     10156:	0d9075d7          	vsetvli	a1,zero,e64,m2,ta,ma ---> pollute 'a1' register which will be used by following insns.
>     1015a:	01d586b3          	add	a3,a1,t4  --------> use 'a1'
>     1015e:	5e070257          	vmv.v.v	v4,v14
>     10162:	b7032257          	vmacc.vv	v4,v6,v16
>     10166:	26440257          	vand.vv	v4,v4,v8
>     1016a:	22880227          	vs2r.v	v4,(a6)
>     1016e:	00b6b7b3          	sltu	a5,a3,a1
>     10172:	22888227          	vs2r.v	v4,(a7)
>     10176:	9e60b157          	vmv2r.v	v2,v6
>     1017a:	97ba                	add	a5,a5,a4
>     1017c:	a6a62157          	vmadd.vv	v2,v12,v10
>     10180:	26240157          	vand.vv	v2,v2,v8
>     10184:	22830127          	vs2r.v	v2,(t1)
>     10188:	873e                	mv	a4,a5
>     1018a:	982a                	add	a6,a6,a0
>     1018c:	98aa                	add	a7,a7,a0
>     1018e:	932a                	add	t1,t1,a0
>     10190:	85b6                	mv	a1,a3       -----> set 'a1'
> ...
> 
> gcc/ChangeLog:
> 
>          * config/riscv/riscv-vsetvl.cc (anticipatable_occurrence_p): Fix incorrect anticipate info.
> 
> gcc/testsuite/ChangeLog:
> 
>          * gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c: Adapt test.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-24.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-25.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-26.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-36.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-14.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-15.c: Ditto.
OK.

Do we need to backport this to gcc-13?

jeff

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Re: [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS]
  2023-08-09 17:01 ` Jeff Law
  2023-08-09 23:43   ` Li, Pan2
@ 2023-08-10  0:50   ` juzhe.zhong
  2023-08-10  4:33     ` Li, Pan2
  1 sibling, 1 reply; 5+ messages in thread
From: juzhe.zhong @ 2023-08-10  0:50 UTC (permalink / raw)
  To: jeffreyalaw, gcc-patches; +Cc: kito.cheng, Kito.cheng, Robin Dapp

[-- Attachment #1: Type: text/plain, Size: 2580 bytes --]

Yes. I think so. Will backport GCC 13 soon.



juzhe.zhong@rivai.ai
 
From: Jeff Law
Date: 2023-08-10 01:01
To: Juzhe-Zhong; gcc-patches
CC: kito.cheng; kito.cheng; rdapp.gcc
Subject: Re: [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS]
 
 
On 8/9/23 04:51, Juzhe-Zhong wrote:
> Realize we have a bug in VSETVL PASS which is triggered by strided_load_run-1.c in RV32 system.
> 
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> 
> This is because VSETVL PASS incorrect hoist vsetvl instruction:
> 
> ...
>     10156: 0d9075d7          vsetvli a1,zero,e64,m2,ta,ma ---> pollute 'a1' register which will be used by following insns.
>     1015a: 01d586b3          add a3,a1,t4  --------> use 'a1'
>     1015e: 5e070257          vmv.v.v v4,v14
>     10162: b7032257          vmacc.vv v4,v6,v16
>     10166: 26440257          vand.vv v4,v4,v8
>     1016a: 22880227          vs2r.v v4,(a6)
>     1016e: 00b6b7b3          sltu a5,a3,a1
>     10172: 22888227          vs2r.v v4,(a7)
>     10176: 9e60b157          vmv2r.v v2,v6
>     1017a: 97ba                add a5,a5,a4
>     1017c: a6a62157          vmadd.vv v2,v12,v10
>     10180: 26240157          vand.vv v2,v2,v8
>     10184: 22830127          vs2r.v v2,(t1)
>     10188: 873e                mv a4,a5
>     1018a: 982a                add a6,a6,a0
>     1018c: 98aa                add a7,a7,a0
>     1018e: 932a                add t1,t1,a0
>     10190: 85b6                mv a1,a3       -----> set 'a1'
> ...
> 
> gcc/ChangeLog:
> 
>          * config/riscv/riscv-vsetvl.cc (anticipatable_occurrence_p): Fix incorrect anticipate info.
> 
> gcc/testsuite/ChangeLog:
> 
>          * gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c: Adapt test.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-24.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-25.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-26.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-36.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-14.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-15.c: Ditto.
OK.
 
Do we need to backport this to gcc-13?
 
jeff
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: Re: [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS]
  2023-08-10  0:50   ` juzhe.zhong
@ 2023-08-10  4:33     ` Li, Pan2
  0 siblings, 0 replies; 5+ messages in thread
From: Li, Pan2 @ 2023-08-10  4:33 UTC (permalink / raw)
  To: juzhe.zhong, jeffreyalaw, gcc-patches; +Cc: kito.cheng, Kito.cheng, Robin Dapp

Thanks Jeff. Ported to gcc-13 with minor changes to test cases.

Pan

-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of juzhe.zhong@rivai.ai
Sent: Thursday, August 10, 2023 8:50 AM
To: jeffreyalaw <jeffreyalaw@gmail.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: kito.cheng <kito.cheng@gmail.com>; Kito.cheng <kito.cheng@sifive.com>; Robin Dapp <rdapp.gcc@gmail.com>
Subject: Re: Re: [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS]

Yes. I think so. Will backport GCC 13 soon.



juzhe.zhong@rivai.ai
 
From: Jeff Law
Date: 2023-08-10 01:01
To: Juzhe-Zhong; gcc-patches
CC: kito.cheng; kito.cheng; rdapp.gcc
Subject: Re: [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS]
 
 
On 8/9/23 04:51, Juzhe-Zhong wrote:
> Realize we have a bug in VSETVL PASS which is triggered by strided_load_run-1.c in RV32 system.
> 
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> FAIL: gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c execution test
> 
> This is because VSETVL PASS incorrect hoist vsetvl instruction:
> 
> ...
>     10156: 0d9075d7          vsetvli a1,zero,e64,m2,ta,ma ---> pollute 'a1' register which will be used by following insns.
>     1015a: 01d586b3          add a3,a1,t4  --------> use 'a1'
>     1015e: 5e070257          vmv.v.v v4,v14
>     10162: b7032257          vmacc.vv v4,v6,v16
>     10166: 26440257          vand.vv v4,v4,v8
>     1016a: 22880227          vs2r.v v4,(a6)
>     1016e: 00b6b7b3          sltu a5,a3,a1
>     10172: 22888227          vs2r.v v4,(a7)
>     10176: 9e60b157          vmv2r.v v2,v6
>     1017a: 97ba                add a5,a5,a4
>     1017c: a6a62157          vmadd.vv v2,v12,v10
>     10180: 26240157          vand.vv v2,v2,v8
>     10184: 22830127          vs2r.v v2,(t1)
>     10188: 873e                mv a4,a5
>     1018a: 982a                add a6,a6,a0
>     1018c: 98aa                add a7,a7,a0
>     1018e: 932a                add t1,t1,a0
>     10190: 85b6                mv a1,a3       -----> set 'a1'
> ...
> 
> gcc/ChangeLog:
> 
>          * config/riscv/riscv-vsetvl.cc (anticipatable_occurrence_p): Fix incorrect anticipate info.
> 
> gcc/testsuite/ChangeLog:
> 
>          * gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c: Adapt test.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-24.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-25.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-26.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_back_prop-36.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-14.c: Ditto.
>          * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-15.c: Ditto.
OK.
 
Do we need to backport this to gcc-13?
 
jeff
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-08-10  4:35 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-09 10:51 [PATCH] RISC-V: Fix VLMAX AVL incorrect local anticipate [VSETVL PASS] Juzhe-Zhong
2023-08-09 17:01 ` Jeff Law
2023-08-09 23:43   ` Li, Pan2
2023-08-10  0:50   ` juzhe.zhong
2023-08-10  4:33     ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).