* [PATCH] RISC-V: Fix VSETVL PASS regression
@ 2023-12-01 0:51 Juzhe-Zhong
2023-12-01 2:57 ` juzhe.zhong
0 siblings, 1 reply; 3+ messages in thread
From: Juzhe-Zhong @ 2023-12-01 0:51 UTC (permalink / raw)
To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong
This patch fix 2 regression (one is bug regression, the other is performance regression).
Those 2 regressions are both we are comparing ratio for same AVL in wrong place.
1. BUG regression:
avl_single-84.c:
f0:
li a5,999424
add a1,a1,a5
li a4,299008
add a5,a0,a5
addi a3,a4,992
addi a5,a5,576
addi a1,a1,576
vsetvli a4,zero,e8,m2,ta,ma
add a0,a0,a3
vlm.v v1,0(a5)
vsm.v v1,0(a1)
vl1re64.v v1,0(a0)
beq a2,zero,.L10
li a5,0
vsetvli zero,zero,e64,m1,tu,ma ---> This is totally incorrect since the ratio above is 4, wheras it is demanding ratio = 64 here.
.L3:
fcvt.d.lu fa5,a5
addi a5,a5,1
fadd.d fa5,fa5,fa0
vfmv.s.f v1,fa5
bne a5,a2,.L3
vfmv.f.s fa0,v1
ret
.L10:
vsetvli zero,zero,e64,m1,ta,ma
vfmv.f.s fa0,v1
ret
2. Performance regression:
before this patch:
vsetvli a5,a4,e8,m1,ta,ma
vsetvli zero,a5,e32,m1,tu,ma
vmv.s.x v2,zero
vmv.s.x v1,zero
vsetvli zero,a5,e32,m4,tu,ma
vle32.v v4,0(a1)
vfmul.vv v4,v4,v4
vfredosum.vs v1,v4,v2
vfmv.f.s fa5,v1
fsw fa5,0(a0)
sub a4,a4,a5
bne a4,zero,.L2
ret
After this patch:
vsetvli a5,a4,e32,m4,tu,ma
vle32.v v4,0(a1)
vmv.s.x v2,zero
vmv.s.x v1,zero
vfmul.vv v4,v4,v4
vfredosum.vs v1,v4,v2
vfmv.f.s fa5,v1
fsw fa5,0(a0)
sub a4,a4,a5
bne a4,zero,.L2
ret
Tested rv64gcv_zvfh_zfh passed no regression.
zvl256b/zvl512b/zvl1024b/zve64d is runing.
PR target/112776
gcc/ChangeLog:
* config/riscv/riscv-vsetvl.cc (pre_vsetvl::pre_global_vsetvl_info): Fix ratio.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/vsetvl/avl_single-84.c: Adapt test.
* gcc.target/riscv/rvv/vsetvl/pr111037-3.c: Ditto.
* gcc.target/riscv/rvv/vsetvl/pr112776.c: New test.
---
gcc/config/riscv/riscv-vsetvl.cc | 13 ++++---
.../riscv/rvv/vsetvl/avl_single-84.c | 6 ++--
.../gcc.target/riscv/rvv/vsetvl/pr111037-3.c | 2 +-
.../gcc.target/riscv/rvv/vsetvl/pr112776.c | 36 +++++++++++++++++++
4 files changed, 46 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index b3e07d4c3aa..1da95daeeb0 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1497,9 +1497,6 @@ private:
{
gcc_assert (prev.valid_p () && next.valid_p ());
- if (prev.get_ratio () != next.get_ratio ())
- return false;
-
if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
return false;
@@ -2188,7 +2185,7 @@ private:
return true;
}
- bool preds_has_same_avl_p (const vsetvl_info &curr_info)
+ bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
{
gcc_assert (
!bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
@@ -2200,7 +2197,8 @@ private:
{
const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
if (!prev_info.valid_p ()
- || !m_dem.avl_available_p (prev_info, curr_info))
+ || !m_dem.avl_available_p (prev_info, curr_info)
+ || prev_info.get_ratio () != curr_info.get_ratio ())
return false;
}
@@ -3171,7 +3169,7 @@ pre_vsetvl::pre_global_vsetvl_info ()
curr_info = block_info.local_infos[0];
}
if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
- && preds_has_same_avl_p (curr_info))
+ && preds_all_same_avl_and_ratio_p (curr_info))
curr_info.set_change_vtype_only ();
vsetvl_info prev_info = vsetvl_info ();
@@ -3179,7 +3177,8 @@ pre_vsetvl::pre_global_vsetvl_info ()
for (auto &curr_info : block_info.local_infos)
{
if (prev_info.valid_p () && curr_info.valid_p ()
- && m_dem.avl_available_p (prev_info, curr_info))
+ && m_dem.avl_available_p (prev_info, curr_info)
+ && prev_info.get_ratio () == curr_info.get_ratio ())
curr_info.set_change_vtype_only ();
prev_info = curr_info;
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
index a584dd97dc0..5cd0f285029 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
@@ -17,6 +17,6 @@ double f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned c
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-
+/* { dg-final { scan-assembler-not {vsetvli\s+zero,\s*zero} { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
index 0f40642c8b6..13344ecdd3b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
@@ -13,4 +13,4 @@ void foo(_Float16 y, int16_t z, int64_t *i64p)
}
/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
new file mode 100644
index 00000000000..853690178ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (float *r, const float *x)
+{
+ int i, k;
+
+ vfloat32m4_t x_vec;
+ vfloat32m4_t x_forward_vec;
+ vfloat32m4_t temp_vec;
+ vfloat32m1_t dst_vec;
+ vfloat32m1_t src_vec;
+
+ float result = 0.0f;
+ float shift_prev = 0.0f;
+
+ size_t n = 64;
+ for (size_t vl; n > 0; n -= vl)
+ {
+ vl = __riscv_vsetvl_e32m4 (n);
+ x_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+ x_forward_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+ temp_vec = __riscv_vfmul_vv_f32m4 (x_vec, x_forward_vec, vl);
+ src_vec = __riscv_vfmv_s_tu (src_vec, 0.0f, vl);
+ dst_vec = __riscv_vfmv_s_tu (dst_vec, 0.0f, vl);
+ dst_vec = __riscv_vfredosum_tu (dst_vec, temp_vec, src_vec, vl);
+ r[0] = __riscv_vfmv_f_s_f32m1_f32 (dst_vec);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 } } */
--
2.36.3
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] RISC-V: Fix VSETVL PASS regression
2023-12-01 0:51 [PATCH] RISC-V: Fix VSETVL PASS regression Juzhe-Zhong
@ 2023-12-01 2:57 ` juzhe.zhong
0 siblings, 0 replies; 3+ messages in thread
From: juzhe.zhong @ 2023-12-01 2:57 UTC (permalink / raw)
To: 钟居哲, gcc-patches
Cc: kito.cheng, Kito.cheng, jeffreyalaw, Robin Dapp
[-- Attachment #1: Type: text/plain, Size: 8220 bytes --]
All regressions (zve64d/zvl128b/zvl256b/zvl512b/zvl1024b) passed.
juzhe.zhong@rivai.ai
From: Juzhe-Zhong
Date: 2023-12-01 08:51
To: gcc-patches
CC: kito.cheng; kito.cheng; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
Subject: [PATCH] RISC-V: Fix VSETVL PASS regression
This patch fix 2 regression (one is bug regression, the other is performance regression).
Those 2 regressions are both we are comparing ratio for same AVL in wrong place.
1. BUG regression:
avl_single-84.c:
f0:
li a5,999424
add a1,a1,a5
li a4,299008
add a5,a0,a5
addi a3,a4,992
addi a5,a5,576
addi a1,a1,576
vsetvli a4,zero,e8,m2,ta,ma
add a0,a0,a3
vlm.v v1,0(a5)
vsm.v v1,0(a1)
vl1re64.v v1,0(a0)
beq a2,zero,.L10
li a5,0
vsetvli zero,zero,e64,m1,tu,ma ---> This is totally incorrect since the ratio above is 4, wheras it is demanding ratio = 64 here.
.L3:
fcvt.d.lu fa5,a5
addi a5,a5,1
fadd.d fa5,fa5,fa0
vfmv.s.f v1,fa5
bne a5,a2,.L3
vfmv.f.s fa0,v1
ret
.L10:
vsetvli zero,zero,e64,m1,ta,ma
vfmv.f.s fa0,v1
ret
2. Performance regression:
before this patch:
vsetvli a5,a4,e8,m1,ta,ma
vsetvli zero,a5,e32,m1,tu,ma
vmv.s.x v2,zero
vmv.s.x v1,zero
vsetvli zero,a5,e32,m4,tu,ma
vle32.v v4,0(a1)
vfmul.vv v4,v4,v4
vfredosum.vs v1,v4,v2
vfmv.f.s fa5,v1
fsw fa5,0(a0)
sub a4,a4,a5
bne a4,zero,.L2
ret
After this patch:
vsetvli a5,a4,e32,m4,tu,ma
vle32.v v4,0(a1)
vmv.s.x v2,zero
vmv.s.x v1,zero
vfmul.vv v4,v4,v4
vfredosum.vs v1,v4,v2
vfmv.f.s fa5,v1
fsw fa5,0(a0)
sub a4,a4,a5
bne a4,zero,.L2
ret
Tested rv64gcv_zvfh_zfh passed no regression.
zvl256b/zvl512b/zvl1024b/zve64d is runing.
PR target/112776
gcc/ChangeLog:
* config/riscv/riscv-vsetvl.cc (pre_vsetvl::pre_global_vsetvl_info): Fix ratio.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/vsetvl/avl_single-84.c: Adapt test.
* gcc.target/riscv/rvv/vsetvl/pr111037-3.c: Ditto.
* gcc.target/riscv/rvv/vsetvl/pr112776.c: New test.
---
gcc/config/riscv/riscv-vsetvl.cc | 13 ++++---
.../riscv/rvv/vsetvl/avl_single-84.c | 6 ++--
.../gcc.target/riscv/rvv/vsetvl/pr111037-3.c | 2 +-
.../gcc.target/riscv/rvv/vsetvl/pr112776.c | 36 +++++++++++++++++++
4 files changed, 46 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index b3e07d4c3aa..1da95daeeb0 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1497,9 +1497,6 @@ private:
{
gcc_assert (prev.valid_p () && next.valid_p ());
- if (prev.get_ratio () != next.get_ratio ())
- return false;
-
if (next.has_vl () && next.vl_used_by_non_rvv_insn_p ())
return false;
@@ -2188,7 +2185,7 @@ private:
return true;
}
- bool preds_has_same_avl_p (const vsetvl_info &curr_info)
+ bool preds_all_same_avl_and_ratio_p (const vsetvl_info &curr_info)
{
gcc_assert (
!bitmap_empty_p (m_vsetvl_def_in[curr_info.get_bb ()->index ()]));
@@ -2200,7 +2197,8 @@ private:
{
const vsetvl_info &prev_info = *m_vsetvl_def_exprs[expr_index];
if (!prev_info.valid_p ()
- || !m_dem.avl_available_p (prev_info, curr_info))
+ || !m_dem.avl_available_p (prev_info, curr_info)
+ || prev_info.get_ratio () != curr_info.get_ratio ())
return false;
}
@@ -3171,7 +3169,7 @@ pre_vsetvl::pre_global_vsetvl_info ()
curr_info = block_info.local_infos[0];
}
if (curr_info.valid_p () && !curr_info.vl_used_by_non_rvv_insn_p ()
- && preds_has_same_avl_p (curr_info))
+ && preds_all_same_avl_and_ratio_p (curr_info))
curr_info.set_change_vtype_only ();
vsetvl_info prev_info = vsetvl_info ();
@@ -3179,7 +3177,8 @@ pre_vsetvl::pre_global_vsetvl_info ()
for (auto &curr_info : block_info.local_infos)
{
if (prev_info.valid_p () && curr_info.valid_p ()
- && m_dem.avl_available_p (prev_info, curr_info))
+ && m_dem.avl_available_p (prev_info, curr_info)
+ && prev_info.get_ratio () == curr_info.get_ratio ())
curr_info.set_change_vtype_only ();
prev_info = curr_info;
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
index a584dd97dc0..5cd0f285029 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-84.c
@@ -17,6 +17,6 @@ double f0 (int8_t * restrict in, int8_t * restrict out, int n, int m, unsigned c
}
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-
+/* { dg-final { scan-assembler-not {vsetvli\s+zero,\s*zero} { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
index 0f40642c8b6..13344ecdd3b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr111037-3.c
@@ -13,4 +13,4 @@ void foo(_Float16 y, int16_t z, int64_t *i64p)
}
/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
new file mode 100644
index 00000000000..853690178ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112776.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (float *r, const float *x)
+{
+ int i, k;
+
+ vfloat32m4_t x_vec;
+ vfloat32m4_t x_forward_vec;
+ vfloat32m4_t temp_vec;
+ vfloat32m1_t dst_vec;
+ vfloat32m1_t src_vec;
+
+ float result = 0.0f;
+ float shift_prev = 0.0f;
+
+ size_t n = 64;
+ for (size_t vl; n > 0; n -= vl)
+ {
+ vl = __riscv_vsetvl_e32m4 (n);
+ x_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+ x_forward_vec = __riscv_vle32_v_f32m4 (&x[0], vl);
+ temp_vec = __riscv_vfmul_vv_f32m4 (x_vec, x_forward_vec, vl);
+ src_vec = __riscv_vfmv_s_tu (src_vec, 0.0f, vl);
+ dst_vec = __riscv_vfmv_s_tu (dst_vec, 0.0f, vl);
+ dst_vec = __riscv_vfredosum_tu (dst_vec, temp_vec, src_vec, vl);
+ r[0] = __riscv_vfmv_f_s_f32m1_f32 (dst_vec);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 } } */
--
2.36.3
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH] RISC-V: Fix VSETVL PASS regression
@ 2023-11-27 13:24 Juzhe-Zhong
0 siblings, 0 replies; 3+ messages in thread
From: Juzhe-Zhong @ 2023-11-27 13:24 UTC (permalink / raw)
To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong
This patch is regression fix patch, not an optimization patch.
Since trunk GCC generates redundant vsetvl than GCC-13.
This is the case:
bb 2:
def a2 (vsetvl a2, zero)
bb 3:
use a2
bb 4:
use a2 (vle)
before this patch:
bb 2:
vsetvl a2 zero
bb 3:
vsetvl zero, zero ----> should be eliminated.
bb 4:
vle.v
The root cause is we didn't set bb 3 as transparent since the incorrect codes.
bb 3 didn't modify "a2" just use it, the VSETVL status from bb 2 can be available to bb 3 and bb 4:
bb 2 -> bb 3 -> bb4.
Another regression fix is anticipation calculation:
bb 4:
use a5 (sub)
use a5 (vle)
The vle VSETVL status should be considered as anticipated as long as both sub and vle a5 def are coming from same def.
Tested on zvl128b no regression.
I am going to test on zvl256/zvl512/zvl1024
PR target/112713
gcc/ChangeLog:
* config/riscv/riscv-vsetvl.cc (pre_vsetvl::compute_lcm_local_properties): Fix regression.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/vsetvl/pr112713-1.c: New test.
* gcc.target/riscv/rvv/vsetvl/pr112713-2.c: New test.
---
gcc/config/riscv/riscv-vsetvl.cc | 29 ++++++++----
.../gcc.target/riscv/rvv/vsetvl/pr112713-1.c | 24 ++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr112713-2.c | 47 +++++++++++++++++++
3 files changed, 91 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 74367ec8d8e..b3e07d4c3aa 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1433,9 +1433,23 @@ private:
inline bool modify_or_use_vl_p (insn_info *i, const vsetvl_info &info)
{
- return info.has_vl ()
- && (find_access (i->uses (), REGNO (info.get_vl ()))
- || find_access (i->defs (), REGNO (info.get_vl ())));
+ if (info.has_vl ())
+ {
+ if (find_access (i->defs (), REGNO (info.get_vl ())))
+ return true;
+ if (find_access (i->uses (), REGNO (info.get_vl ())))
+ {
+ resource_info resource = full_register (REGNO (info.get_vl ()));
+ def_lookup dl1 = crtl->ssa->find_def (resource, i);
+ def_lookup dl2 = crtl->ssa->find_def (resource, info.get_insn ());
+ if (dl1.matching_set () || dl2.matching_set ())
+ return true;
+ /* If their VLs are coming from same def, we still want to fuse
+ their VSETVL demand info to gain better performance. */
+ return dl1.prev_def (i) != dl2.prev_def (i);
+ }
+ }
+ return false;
}
inline bool modify_avl_p (insn_info *i, const vsetvl_info &info)
{
@@ -1702,7 +1716,7 @@ public:
for (insn_info *i = next_insn->prev_nondebug_insn (); i != prev_insn;
i = i->prev_nondebug_insn ())
{
- // no def amd use of vl
+ // no def and use of vl
if (!ignore_vl && modify_or_use_vl_p (i, info))
return false;
@@ -2635,11 +2649,8 @@ pre_vsetvl::compute_lcm_local_properties ()
for (const insn_info *insn : bb->real_nondebug_insns ())
{
- if ((info.has_nonvlmax_reg_avl ()
- && find_access (insn->defs (), REGNO (info.get_avl ())))
- || (info.has_vl ()
- && find_access (insn->uses (),
- REGNO (info.get_vl ()))))
+ if (info.has_nonvlmax_reg_avl ()
+ && find_access (insn->defs (), REGNO (info.get_avl ())))
{
bitmap_clear_bit (m_transp[bb_index], i);
break;
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
new file mode 100644
index 00000000000..76402ab6167
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t
+foo (char const *buf, size_t len)
+{
+ size_t sum = 0;
+ size_t vl = __riscv_vsetvlmax_e8m8();
+ size_t step = vl * 4;
+ const char *it = buf, *end = buf + len;
+ for(; it + step <= end; ) {
+ it += vl;
+ vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
+ sum += __riscv_vcpop_m_b1(m3, vl);
+ }
+ return sum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c
new file mode 100644
index 00000000000..04539d998cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112713-2.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+static size_t
+utf8_count_rvv(char const *buf, size_t len)
+{
+ size_t sum = 0;
+ for (size_t vl; len > 0; len -= vl, buf += vl) {
+ vl = __riscv_vsetvl_e8m8(len);
+ vint8m8_t v = __riscv_vle8_v_i8m8((void*)buf, vl);
+ vbool1_t mask = __riscv_vmsgt_vx_i8m8_b1(v, -65, vl);
+ sum += __riscv_vcpop_m_b1(mask, vl);
+ }
+ return sum;
+}
+
+size_t
+utf8_count_rvv_4x_tail(char const *buf, size_t len)
+{
+ size_t sum = 0;
+ size_t vl = __riscv_vsetvlmax_e8m8();
+ size_t step = vl * 4;
+ const char *it = buf, *end = buf + len;
+ for(; it + step <= end; ) {
+ vint8m8_t v0 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vint8m8_t v1 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vint8m8_t v2 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vint8m8_t v3 = __riscv_vle8_v_i8m8((void*)it, vl); it += vl;
+ vbool1_t m0 = __riscv_vmsgt_vx_i8m8_b1(v0, -65, vl);
+ vbool1_t m1 = __riscv_vmsgt_vx_i8m8_b1(v1, -65, vl);
+ vbool1_t m2 = __riscv_vmsgt_vx_i8m8_b1(v2, -65, vl);
+ vbool1_t m3 = __riscv_vmsgt_vx_i8m8_b1(v3, -65, vl);
+ sum += __riscv_vcpop_m_b1(m0, vl);
+ sum += __riscv_vcpop_m_b1(m1, vl);
+ sum += __riscv_vcpop_m_b1(m2, vl);
+ sum += __riscv_vcpop_m_b1(m3, vl);
+ }
+ return sum + utf8_count_rvv(it, end - it);
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 2 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */
--
2.36.3
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-12-01 2:57 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-01 0:51 [PATCH] RISC-V: Fix VSETVL PASS regression Juzhe-Zhong
2023-12-01 2:57 ` juzhe.zhong
-- strict thread matches above, loose matches on Subject: below --
2023-11-27 13:24 Juzhe-Zhong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).