* [PATCH V3] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
@ 2023-05-08 23:17 juzhe.zhong
0 siblings, 0 replies; only message in thread
From: juzhe.zhong @ 2023-05-08 23:17 UTC (permalink / raw)
To: gcc-patches; +Cc: kito.cheng, Juzhe-Zhong
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Rebase to trunk and send V3 patch for:
https://gcc.gnu.org/pipermail/gcc-patches/2023-May/617821.html
This patch is fixing: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109743.
This issue happens is because we are currently very conservative in optimization of user vsetvli.
Consider this following case:
bb 1:
vsetvli a5,a4... (demand AVL = a4).
bb 2:
RVV insn use a5 (demand AVL = a5).
LCM will hoist vsetvl of bb 2 into bb 1.
We don't do AVL propagation for this situation since it's complicated that
we should analyze the code sequence between vsetvli in bb 1 and RVV insn in bb 2.
They are not necessary the consecutive blocks.
This patch is doing the optimizations after LCM, we will check and eliminate the vsetvli
in LCM inserted edge if such vsetvli is redundant. Such approach is much simplier and safe.
code:
void
foo2 (int32_t *a, int32_t *b, int n)
{
if (n <= 0)
return;
int i = n;
size_t vl = __riscv_vsetvl_e32m1 (i);
for (; i >= 0; i--)
{
vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
__riscv_vse32_v_i32m1 (b, v, vl);
if (i >= vl)
continue;
if (i == 0)
return;
vl = __riscv_vsetvl_e32m1 (i);
}
}
Before this patch:
foo2:
.LFB2:
.cfi_startproc
ble a2,zero,.L1
mv a4,a2
li a3,-1
vsetvli a5,a2,e32,m1,ta,mu
vsetvli zero,a5,e32,m1,ta,ma <- can be eliminated.
.L5:
vle32.v v1,0(a0)
vse32.v v1,0(a1)
bgeu a4,a5,.L3
.L10:
beq a2,zero,.L1
vsetvli a5,a4,e32,m1,ta,mu
addi a4,a4,-1
vsetvli zero,a5,e32,m1,ta,ma <- can be eliminated.
vle32.v v1,0(a0)
vse32.v v1,0(a1)
addiw a2,a2,-1
bltu a4,a5,.L10
.L3:
addiw a2,a2,-1
addi a4,a4,-1
bne a2,a3,.L5
.L1:
ret
After this patch:
f:
ble a2,zero,.L1
mv a4,a2
li a3,-1
vsetvli a5,a2,e32,m1,ta,ma
.L5:
vle32.v v1,0(a0)
vse32.v v1,0(a1)
bgeu a4,a5,.L3
.L10:
beq a2,zero,.L1
vsetvli a5,a4,e32,m1,ta,ma
addi a4,a4,-1
vle32.v v1,0(a0)
vse32.v v1,0(a1)
addiw a2,a2,-1
bltu a4,a5,.L10
.L3:
addiw a2,a2,-1
addi a4,a4,-1
bne a2,a3,.L5
.L1:
ret
PR target/109743
gcc/ChangeLog:
* config/riscv/riscv-vsetvl.cc (pass_vsetvl::local_eliminate_vsetvl_insn): Enhance local optimizations for LCM.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/vsetvl/pr109743-1.c: New test.
* gcc.target/riscv/rvv/vsetvl/pr109743-2.c: New test.
* gcc.target/riscv/rvv/vsetvl/pr109743-3.c: New test.
* gcc.target/riscv/rvv/vsetvl/pr109743-4.c: New test.
---
gcc/config/riscv/riscv-vsetvl.cc | 47 ++++++++++++++++++-
.../gcc.target/riscv/rvv/vsetvl/pr109743-1.c | 26 ++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-2.c | 27 +++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-3.c | 28 +++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-4.c | 28 +++++++++++
5 files changed, 155 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index d4d6f336ef9..72aa2bfcf6f 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -4026,7 +4026,8 @@ pass_vsetvl::local_eliminate_vsetvl_insn (const vector_insn_info &dem) const
{
if (i->is_call () || i->is_asm ()
|| find_access (i->defs (), VL_REGNUM)
- || find_access (i->defs (), VTYPE_REGNUM))
+ || find_access (i->defs (), VTYPE_REGNUM)
+ || find_access (i->defs (), REGNO (vl)))
return;
if (has_vtype_op (i->rtl ()))
@@ -4051,6 +4052,50 @@ pass_vsetvl::local_eliminate_vsetvl_insn (const vector_insn_info &dem) const
return;
}
}
+
+ /* Here we optimize the VSETVL is hoisted by LCM:
+
+ Before LCM:
+ bb 1:
+ vsetvli a5,a2,e32,m1,ta,mu
+ bb 2:
+ vsetvli zero,a5,e32,m1,ta,mu
+ ...
+
+ After LCM:
+ bb 1:
+ vsetvli a5,a2,e32,m1,ta,mu
+ LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
+ bb 2:
+ ...
+ Such instruction can not be accessed in RTL_SSA when we don't re-init
+ the new RTL_SSA framework but it is definetely at the END of the block. */
+ rtx_insn *end_vsetvl = BB_END (bb->cfg_bb ());
+ if (!vsetvl_discard_result_insn_p (end_vsetvl))
+ {
+ if (JUMP_P (end_vsetvl)
+ && vsetvl_discard_result_insn_p (PREV_INSN (end_vsetvl)))
+ end_vsetvl = PREV_INSN (end_vsetvl);
+ else
+ return;
+ }
+
+ if (single_succ_p (bb->cfg_bb ()))
+ {
+ edge e = single_succ_edge (bb->cfg_bb ());
+ auto require = get_block_info (e->dest).local_dem;
+ const auto reaching_out = get_block_info (bb->cfg_bb ()).reaching_out;
+ if (require.get_avl_source ()
+ && require.skip_avl_compatible_p (reaching_out)
+ && reaching_out.get_insn () == insn
+ && get_vl (insn->rtl ()) == get_avl (end_vsetvl))
+ {
+ require.set_avl_info (reaching_out.get_avl_info ());
+ require = reaching_out.merge (require, LOCAL_MERGE);
+ change_vsetvl_insn (insn, require);
+ eliminate_insn (end_vsetvl);
+ }
+ }
}
}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
new file mode 100644
index 00000000000..f30275c8280
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+ if (n <= 0)
+ return;
+ int i = n;
+ size_t vl = __riscv_vsetvl_e32m1 (i);
+ for (; i >= 0; i--)
+ {
+ vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+ __riscv_vse32_v_i32m1 (b, v, vl);
+
+ if (i >= vl)
+ continue;
+ if (i == 0)
+ return;
+ vl = __riscv_vsetvl_e32m1 (i);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
new file mode 100644
index 00000000000..5f6647bb916
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+ if (n <= 0)
+ return;
+ int i = n;
+ size_t vl = __riscv_vsetvl_e8mf4 (i);
+ for (; i >= 0; i--)
+ {
+ vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+ __riscv_vse32_v_i32m1 (b, v, vl);
+
+ if (i >= vl)
+ continue;
+ if (i == 0)
+ return;
+ vl = __riscv_vsetvl_e32m1 (i);
+ }
+}
+
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
new file mode 100644
index 00000000000..5dbc871ed12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+ if (n <= 0)
+ return;
+ int i = n;
+ size_t vl = __riscv_vsetvl_e8mf2 (i);
+ for (; i >= 0; i--)
+ {
+ vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+ __riscv_vse32_v_i32m1 (b, v, vl);
+
+ if (i >= vl)
+ continue;
+ if (i == 0)
+ return;
+ vl = __riscv_vsetvl_e32m1 (i);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
new file mode 100644
index 00000000000..edd12855f58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void
+f (int32_t *a, int32_t *b, int n)
+{
+ if (n <= 0)
+ return;
+ int i = n;
+ size_t vl = __riscv_vsetvl_e8mf4 (i);
+ for (; i >= 0; i--)
+ {
+ vint32m1_t v = __riscv_vle32_v_i32m1 (a + i, vl);
+ v = __riscv_vle32_v_i32m1_tu (v, a + i + 100, vl);
+ __riscv_vse32_v_i32m1 (b + i, v, vl);
+
+ if (i >= vl)
+ continue;
+ if (i == 0)
+ return;
+ vl = __riscv_vsetvl_e8mf4 (i);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
--
2.36.3
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-05-08 23:17 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-08 23:17 [PATCH V3] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743] juzhe.zhong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).