public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Fix VSETVL PASS regression
@ 2023-11-27 13:24 Juzhe-Zhong
  0 siblings, 0 replies; 3+ messages in thread
From: Juzhe-Zhong @ 2023-11-27 13:24 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong

This patch is regression fix patch, not an optimization patch.
Since trunk GCC generates redundant vsetvl than GCC-13.

This is the case:

bb 2:
  def a2 (vsetvl a2, zero)
bb 3:
  use a2
bb 4:
  use a2 (vle)

before this patch:

bb 2:
vsetvl a2 zero
bb 3:
vsetvl zero, zero ----> should be eliminated.
bb 4:
vle.v

The root cause is we didn't set bb 3 as transparent since the incorrect codes.
bb 3 didn't modify "a2" just use it, the VSETVL status from bb 2 can be available to bb 3 and bb 4:

bb 2 -> bb 3 -> bb4.

Another regression fix is anticipation calculation:

bb 4:
use a5 (sub)
use a5 (vle)

The vle VSETVL status should be considered as anticipated as long as both sub and vle a5 def are coming from same def.

Tested on zvl128b no regression.

I am going to test on zvl256/zvl512/zvl1024

	PR target/112713

gcc/ChangeLog:

	* config/riscv/riscv-vsetvl.cc (pre_vsetvl::compute_lcm_local_properties): Fix regression.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/vsetvl/pr112713-1.c: New test.
	* gcc.target/riscv/rvv/vsetvl/pr112713-2.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc              | 29 ++++++++----
 .../gcc.target/riscv/rvv/vsetvl/pr112713-1.c  | 24 ++++++++++
 .../gcc.target/riscv/rvv/vsetvl/pr112713-2.c  | 47 +++++++++++++++++++
 3 files changed, 91 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 74367ec8d8e..b3e07d4c3aa 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1433,9 +1433,23 @@ private:
 
   inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
   {
-    return info.has_vl ()
-	   && (find_access (i->uses (), REGNO (info.get_vl ()))
-	       || find_access (i->defs (), REGNO (info.get_vl ())));
+    if (info.has_vl ())
+      {
+	if (find_access (i->defs (), REGNO (info.get_vl ())))
+	  return true;
+	if (find_access (i->uses (), REGNO (info.get_vl ())))
+	  {
+	    resource_info resource = full_register (REGNO (info.get_vl ()));
+	    def_lookup dl1 = crtl->ssa->find_def (resource, i);
+	    def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
+	    if (dl1.matching_set () || dl2.matching_set ())
+	      return true;
+	    /* If their VLs are coming from same def, we still want to fuse
+	       their VSETVL demand info to gain better performance.  */
+	    return dl1.prev_def (i) != dl2.prev_def (i);
+	  }
+      }
+    return false;
   }
   inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
   {
@@ -1702,7 +1716,7 @@ public:
 	for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
 	     i = i->prev_nondebug_insn ())
 	  {
-	    // no def amd use of vl
+	    // no def and use of vl
 	    if (!ignore_vl && modify_or_use_vl_p (i, info))
 	      return false;
 
@@ -2635,11 +2649,8 @@ pre_vsetvl::compute_lcm_local_properties ()
 
 	      for (const insn_info *insn : bb->real_nondebug_insns ())
 		{
-		  if ((info.has_nonvlmax_reg_avl ()
-		       && find_access (insn->defs (), REGNO (info.get_avl ())))
-		      || (info.has_vl ()
-			  && find_access (insn->uses (),
-					  REGNO (info.get_vl ()))))
+		  if (info.has_nonvlmax_reg_avl ()
+		      && find_access (insn->defs (), REGNO (info.get_avl ())))
 		    {
 		      bitmap_clear_bit (m_transp[bb_index], i);
 		      break;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
new file mode 100644
index 00000000000..76402ab6167
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t
+foo (char const *buf, size_t len)
+{
+	size_t sum = 0;
+	size_t vl = __riscv_vsetvlmax_e8m8();
+	size_t step = vl * 4;
+	const char *it = buf, *end = buf + len;
+	for(; it + step <= end; ) {
+		it += vl;
+		vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+		vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
+		sum += __riscv_vcpop_m_b1(m3, vl);
+	}
+	return sum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c
new file mode 100644
index 00000000000..04539d998cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+static size_t
+utf8_count_rvv(char const *buf, size_t len)
+{
+	size_t sum = 0;
+	for (size_t vl; len > 0; len -= vl, buf += vl) {
+		vl = __riscv_vsetvl_e8m8(len);
+		vint8m8_t v = __riscv_vle8_v_i8m8((void*)buf, vl);
+		vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl);
+		sum += __riscv_vcpop_m_b1(mask, vl);
+	}
+	return sum;
+}
+
+size_t
+utf8_count_rvv_4x_tail(char const *buf, size_t len)
+{
+	size_t sum = 0;
+	size_t vl = __riscv_vsetvlmax_e8m8();
+	size_t step = vl * 4;
+	const char *it = buf, *end = buf + len;
+	for(; it + step <= end; ) {
+		vint8m8_t v0 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+		vint8m8_t v1 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+		vint8m8_t v2 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+		vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+		vbool1_t m0 = __riscv_vmsgt_vx_i8m8_b1(v0, -65, vl);
+		vbool1_t m1 = __riscv_vmsgt_vx_i8m8_b1(v1, -65, vl);
+		vbool1_t m2 = __riscv_vmsgt_vx_i8m8_b1(v2, -65, vl);
+		vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
+		sum += __riscv_vcpop_m_b1(m0, vl);
+		sum += __riscv_vcpop_m_b1(m1, vl);
+		sum += __riscv_vcpop_m_b1(m2, vl);
+		sum += __riscv_vcpop_m_b1(m3, vl);
+	}
+	return sum + utf8_count_rvv(it, end - it);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 2 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
-- 
2.36.3


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] RISC-V: Fix VSETVL PASS regression
  2023-12-01  0:51 Juzhe-Zhong
@ 2023-12-01  2:57 ` juzhe.zhong
  0 siblings, 0 replies; 3+ messages in thread
From: juzhe.zhong @ 2023-12-01  2:57 UTC (permalink / raw)
  To: 钟居哲, gcc-patches
  Cc: kito.cheng, Kito.cheng, jeffreyalaw, Robin Dapp

[-- Attachment #1: Type: text/plain, Size: 8220 bytes --]

All regressions (zve64d/zvl128b/zvl256b/zvl512b/zvl1024b) passed.


juzhe.zhong@rivai.ai
 
From: Juzhe-Zhong
Date: 2023-12-01 08:51
To: gcc-patches
CC: kito.cheng; kito.cheng; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
Subject: [PATCH] RISC-V: Fix VSETVL PASS regression
This patch fix 2 regression (one is bug regression, the other is performance regression).
Those 2 regressions are both we are comparing ratio for same AVL in wrong place.
 
1. BUG regression:
avl_single-84.c:
 
f0:
        li      a5,999424
        add     a1,a1,a5
        li      a4,299008
        add     a5,a0,a5
        addi    a3,a4,992
        addi    a5,a5,576
        addi    a1,a1,576
        vsetvli a4,zero,e8,m2,ta,ma
        add     a0,a0,a3
        vlm.v   v1,0(a5)
        vsm.v   v1,0(a1)
        vl1re64.v       v1,0(a0)
        beq     a2,zero,.L10
        li      a5,0
        vsetvli zero,zero,e64,m1,tu,ma   --->  This is totally incorrect since the ratio above is 4, wheras it is demanding ratio = 64 here.
.L3:
        fcvt.d.lu       fa5,a5
        addi    a5,a5,1
        fadd.d  fa5,fa5,fa0
        vfmv.s.f        v1,fa5
        bne     a5,a2,.L3
        vfmv.f.s        fa0,v1
        ret
.L10:
        vsetvli zero,zero,e64,m1,ta,ma
        vfmv.f.s        fa0,v1
        ret
 
2. Performance regression:
 
before this patch:
 
        vsetvli a5,a4,e8,m1,ta,ma
        vsetvli zero,a5,e32,m1,tu,ma
        vmv.s.x v2,zero
        vmv.s.x v1,zero
        vsetvli zero,a5,e32,m4,tu,ma
        vle32.v v4,0(a1)
        vfmul.vv        v4,v4,v4
        vfredosum.vs    v1,v4,v2
        vfmv.f.s        fa5,v1
        fsw     fa5,0(a0)
        sub     a4,a4,a5
        bne     a4,zero,.L2
        ret
 
After this patch:
 
vsetvli a5,a4,e32,m4,tu,ma
vle32.v v4,0(a1)
vmv.s.x v2,zero
vmv.s.x v1,zero
vfmul.vv v4,v4,v4
vfredosum.vs v1,v4,v2
vfmv.f.s fa5,v1
fsw fa5,0(a0)
sub a4,a4,a5
bne a4,zero,.L2
ret
 
Tested rv64gcv_zvfh_zfh passed no regression.
 
zvl256b/zvl512b/zvl1024b/zve64d is runing.
 
PR target/112776
 
gcc/ChangeLog:
 
* config/riscv/riscv-vsetvl.cc (pre_vsetvl::pre_global_vsetvl_info): Fix ratio.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/vsetvl/avl_single-84.c: Adapt test.
* gcc.target/riscv/rvv/vsetvl/pr111037-3.c: Ditto.
* gcc.target/riscv/rvv/vsetvl/pr112776.c: New test.
 
---
gcc/config/riscv/riscv-vsetvl.cc              | 13 ++++---
.../riscv/rvv/vsetvl/avl_single-84.c          |  6 ++--
.../gcc.target/riscv/rvv/vsetvl/pr111037-3.c  |  2 +-
.../gcc.target/riscv/rvv/vsetvl/pr112776.c    | 36 +++++++++++++++++++
4 files changed, 46 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
 
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index b3e07d4c3aa..1da95daeeb0 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1497,9 +1497,6 @@ private:
   {
     gcc_assert (prev.valid_p () && next.valid_p ());
-    if (prev.get_ratio () != next.get_ratio ())
-      return false;
-
     if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
       return false;
@@ -2188,7 +2185,7 @@ private:
     return true;
   }
-  bool preds_has_same_avl_p (const vsetvl_info &curr_info)
+  bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
   {
     gcc_assert (
       !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
@@ -2200,7 +2197,8 @@ private:
       {
const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
if (!prev_info.valid_p ()
-     || !m_dem.avl_available_p (prev_info, curr_info))
+     || !m_dem.avl_available_p (prev_info, curr_info)
+     || prev_info.get_ratio () != curr_info.get_ratio ())
  return false;
       }
@@ -3171,7 +3169,7 @@ pre_vsetvl::pre_global_vsetvl_info ()
  curr_info = block_info.local_infos[0];
}
       if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
-   && preds_has_same_avl_p (curr_info))
+   && preds_all_same_avl_and_ratio_p (curr_info))
curr_info.set_change_vtype_only ();
       vsetvl_info prev_info = vsetvl_info ();
@@ -3179,7 +3177,8 @@ pre_vsetvl::pre_global_vsetvl_info ()
       for (auto &curr_info : block_info.local_infos)
{
  if (prev_info.valid_p () && curr_info.valid_p ()
-       && m_dem.avl_available_p (prev_info, curr_info))
+       && m_dem.avl_available_p (prev_info, curr_info)
+       && prev_info.get_ratio () == curr_info.get_ratio ())
    curr_info.set_change_vtype_only ();
  prev_info = curr_info;
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
index a584dd97dc0..5cd0f285029 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
@@ -17,6 +17,6 @@ double f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned c
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-
+/* { dg-final { scan-assembler-not {vsetvli\s+zero,\s*zero} { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
index 0f40642c8b6..13344ecdd3b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
@@ -13,4 +13,4 @@ void foo(_Float16 y, int16_t z, int64_t *i64p)
}
/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
new file mode 100644
index 00000000000..853690178ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (float *r, const float *x)
+{
+  int i, k;
+
+  vfloat32m4_t x_vec;
+  vfloat32m4_t x_forward_vec;
+  vfloat32m4_t temp_vec;
+  vfloat32m1_t dst_vec;
+  vfloat32m1_t src_vec;
+
+  float result = 0.0f;
+  float shift_prev = 0.0f;
+
+  size_t n = 64;
+  for (size_t vl; n > 0; n -= vl)
+    {
+      vl = __riscv_vsetvl_e32m4 (n);
+      x_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+      x_forward_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+      temp_vec = __riscv_vfmul_vv_f32m4 (x_vec, x_forward_vec, vl);
+      src_vec = __riscv_vfmv_s_tu (src_vec, 0.0f, vl);
+      dst_vec = __riscv_vfmv_s_tu (dst_vec, 0.0f, vl);
+      dst_vec = __riscv_vfredosum_tu (dst_vec, temp_vec, src_vec, vl);
+      r[0] = __riscv_vfmv_f_s_f32m1_f32 (dst_vec);
+    }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 } } */
-- 
2.36.3
 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH] RISC-V: Fix VSETVL PASS regression
@ 2023-12-01  0:51 Juzhe-Zhong
  2023-12-01  2:57 ` juzhe.zhong
  0 siblings, 1 reply; 3+ messages in thread
From: Juzhe-Zhong @ 2023-12-01  0:51 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong

This patch fix 2 regression (one is bug regression, the other is performance regression).
Those 2 regressions are both we are comparing ratio for same AVL in wrong place.

1. BUG regression:
avl_single-84.c:

f0:
        li      a5,999424
        add     a1,a1,a5
        li      a4,299008
        add     a5,a0,a5
        addi    a3,a4,992
        addi    a5,a5,576
        addi    a1,a1,576
        vsetvli a4,zero,e8,m2,ta,ma
        add     a0,a0,a3
        vlm.v   v1,0(a5)
        vsm.v   v1,0(a1)
        vl1re64.v       v1,0(a0)
        beq     a2,zero,.L10
        li      a5,0
        vsetvli zero,zero,e64,m1,tu,ma   --->  This is totally incorrect since the ratio above is 4, wheras it is demanding ratio = 64 here.
.L3:
        fcvt.d.lu       fa5,a5
        addi    a5,a5,1
        fadd.d  fa5,fa5,fa0
        vfmv.s.f        v1,fa5
        bne     a5,a2,.L3
        vfmv.f.s        fa0,v1
        ret
.L10:
        vsetvli zero,zero,e64,m1,ta,ma
        vfmv.f.s        fa0,v1
        ret

2. Performance regression:

before this patch:

        vsetvli a5,a4,e8,m1,ta,ma
        vsetvli zero,a5,e32,m1,tu,ma
        vmv.s.x v2,zero
        vmv.s.x v1,zero
        vsetvli zero,a5,e32,m4,tu,ma
        vle32.v v4,0(a1)
        vfmul.vv        v4,v4,v4
        vfredosum.vs    v1,v4,v2
        vfmv.f.s        fa5,v1
        fsw     fa5,0(a0)
        sub     a4,a4,a5
        bne     a4,zero,.L2
        ret

After this patch:

	vsetvli	a5,a4,e32,m4,tu,ma
	vle32.v	v4,0(a1)
	vmv.s.x	v2,zero
	vmv.s.x	v1,zero
	vfmul.vv	v4,v4,v4
	vfredosum.vs	v1,v4,v2
	vfmv.f.s	fa5,v1
	fsw	fa5,0(a0)
	sub	a4,a4,a5
	bne	a4,zero,.L2
	ret

Tested rv64gcv_zvfh_zfh passed no regression.

zvl256b/zvl512b/zvl1024b/zve64d is runing.

	PR target/112776

gcc/ChangeLog:

	* config/riscv/riscv-vsetvl.cc (pre_vsetvl::pre_global_vsetvl_info): Fix ratio.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/vsetvl/avl_single-84.c: Adapt test.
	* gcc.target/riscv/rvv/vsetvl/pr111037-3.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/pr112776.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc              | 13 ++++---
 .../riscv/rvv/vsetvl/avl_single-84.c          |  6 ++--
 .../gcc.target/riscv/rvv/vsetvl/pr111037-3.c  |  2 +-
 .../gcc.target/riscv/rvv/vsetvl/pr112776.c    | 36 +++++++++++++++++++
 4 files changed, 46 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index b3e07d4c3aa..1da95daeeb0 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1497,9 +1497,6 @@ private:
   {
     gcc_assert (prev.valid_p () && next.valid_p ());
 
-    if (prev.get_ratio () != next.get_ratio ())
-      return false;
-
     if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
       return false;
 
@@ -2188,7 +2185,7 @@ private:
     return true;
   }
 
-  bool preds_has_same_avl_p (const vsetvl_info &curr_info)
+  bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
   {
     gcc_assert (
       !bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
@@ -2200,7 +2197,8 @@ private:
       {
 	const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
 	if (!prev_info.valid_p ()
-	    || !m_dem.avl_available_p (prev_info, curr_info))
+	    || !m_dem.avl_available_p (prev_info, curr_info)
+	    || prev_info.get_ratio () != curr_info.get_ratio ())
 	  return false;
       }
 
@@ -3171,7 +3169,7 @@ pre_vsetvl::pre_global_vsetvl_info ()
 	  curr_info = block_info.local_infos[0];
 	}
       if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
-	  && preds_has_same_avl_p (curr_info))
+	  && preds_all_same_avl_and_ratio_p (curr_info))
 	curr_info.set_change_vtype_only ();
 
       vsetvl_info prev_info = vsetvl_info ();
@@ -3179,7 +3177,8 @@ pre_vsetvl::pre_global_vsetvl_info ()
       for (auto &curr_info : block_info.local_infos)
 	{
 	  if (prev_info.valid_p () && curr_info.valid_p ()
-	      && m_dem.avl_available_p (prev_info, curr_info))
+	      && m_dem.avl_available_p (prev_info, curr_info)
+	      && prev_info.get_ratio () == curr_info.get_ratio ())
 	    curr_info.set_change_vtype_only ();
 	  prev_info = curr_info;
 	}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
index a584dd97dc0..5cd0f285029 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
@@ -17,6 +17,6 @@ double f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned c
 }
 
 /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-
+/* { dg-final { scan-assembler-not {vsetvli\s+zero,\s*zero} { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
index 0f40642c8b6..13344ecdd3b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
@@ -13,4 +13,4 @@ void foo(_Float16 y, int16_t z, int64_t *i64p)
 }
 
 /* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
new file mode 100644
index 00000000000..853690178ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (float *r, const float *x)
+{
+  int i, k;
+
+  vfloat32m4_t x_vec;
+  vfloat32m4_t x_forward_vec;
+  vfloat32m4_t temp_vec;
+  vfloat32m1_t dst_vec;
+  vfloat32m1_t src_vec;
+
+  float result = 0.0f;
+  float shift_prev = 0.0f;
+
+  size_t n = 64;
+  for (size_t vl; n > 0; n -= vl)
+    {
+      vl = __riscv_vsetvl_e32m4 (n);
+      x_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+      x_forward_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+      temp_vec = __riscv_vfmul_vv_f32m4 (x_vec, x_forward_vec, vl);
+      src_vec = __riscv_vfmv_s_tu (src_vec, 0.0f, vl);
+      dst_vec = __riscv_vfmv_s_tu (dst_vec, 0.0f, vl);
+      dst_vec = __riscv_vfredosum_tu (dst_vec, temp_vec, src_vec, vl);
+      r[0] = __riscv_vfmv_f_s_f32m1_f32 (dst_vec);
+    }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 } } */
-- 
2.36.3


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-12-01  2:57 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-27 13:24 [PATCH] RISC-V: Fix VSETVL PASS regression Juzhe-Zhong
2023-12-01  0:51 Juzhe-Zhong
2023-12-01  2:57 ` juzhe.zhong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).