public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
@ 2023-05-06 11:14 juzhe.zhong
  2023-05-08  1:34 ` juzhe.zhong
  0 siblings, 1 reply; 4+ messages in thread
From: juzhe.zhong @ 2023-05-06 11:14 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, Juzhe-Zhong

From: Juzhe-Zhong <juzhe.zhong@rivai.ai>

This patch is fixing: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109743.

This issue happens is because we are currently very conservative in optimization of user vsetvli.

Consider this following case:

bb 1:
  vsetvli a5,a4... (demand AVL = a4).
bb 2:
  RVV insn use a5 (demand AVL = a5).

LCM will hoist vsetvl of bb 2 into bb 1.
We don't do AVL propagation for this situation since it's complicated that
we should analyze the code sequence between vsetvli in bb 1 and RVV insn in bb 2.
They are not necessary the consecutive blocks.

This patch is doing the optimizations after LCM, we will check and eliminate the vsetvli
in LCM inserted edge if such vsetvli is redundant. Such approach is much simplier and safe.

code:
void
foo2 (int32_t *a, int32_t *b, int n)
{
  if (n <= 0)
      return;
  int i = n;
  size_t vl = __riscv_vsetvl_e32m1 (i);

  for (; i >= 0; i--)
  {
    vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
    __riscv_vse32_v_i32m1 (b, v, vl);

    if (i >= vl)
      continue;

    if (i == 0)
      return;

    vl = __riscv_vsetvl_e32m1 (i);
  }
}

Before this patch:
foo2:
.LFB2:
        .cfi_startproc
        ble     a2,zero,.L1
        mv      a4,a2
        li      a3,-1
        vsetvli a5,a2,e32,m1,ta,mu
        vsetvli zero,a5,e32,m1,ta,ma  <- can be eliminated.
.L5:
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        bgeu    a4,a5,.L3
.L10:
        beq     a2,zero,.L1
        vsetvli a5,a4,e32,m1,ta,mu
        addi    a4,a4,-1
        vsetvli zero,a5,e32,m1,ta,ma  <- can be eliminated.
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        addiw   a2,a2,-1
        bltu    a4,a5,.L10
.L3:
        addiw   a2,a2,-1
        addi    a4,a4,-1
        bne     a2,a3,.L5
.L1:
        ret

After this patch:
f:
        ble     a2,zero,.L1
        mv      a4,a2
        li      a3,-1
        vsetvli a5,a2,e32,m1,ta,ma
.L5:
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        bgeu    a4,a5,.L3
.L10:
        beq     a2,zero,.L1
        vsetvli a5,a4,e32,m1,ta,ma
        addi    a4,a4,-1
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        addiw   a2,a2,-1
        bltu    a4,a5,.L10
.L3:
        addiw   a2,a2,-1
        addi    a4,a4,-1
        bne     a2,a3,.L5
.L1:
        ret

        PR target/109743

gcc/ChangeLog:

        * config/riscv/riscv-vsetvl.cc (pass_vsetvl::commit_vsetvls): Add optimization for LCM inserted edge.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/vsetvl/pr109743-1.c: New test.
        * gcc.target/riscv/rvv/vsetvl/pr109743-2.c: New test.
        * gcc.target/riscv/rvv/vsetvl/pr109743-3.c: New test.
        * gcc.target/riscv/rvv/vsetvl/pr109743-4.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc              | 42 +++++++++++++++++++
 .../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  | 26 ++++++++++++
 .../gcc.target/riscv/rvv/vsetvl/pr109743-2.c  | 27 ++++++++++++
 .../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  | 28 +++++++++++++
 .../gcc.target/riscv/rvv/vsetvl/pr109743-4.c  | 28 +++++++++++++
 5 files changed, 151 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index f55907a410e..fcee7fdf323 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -3834,6 +3834,48 @@ pass_vsetvl::commit_vsetvls (void)
 	      const vector_insn_info *require
 		= m_vector_manager->vector_exprs[i];
 	      gcc_assert (require->valid_or_dirty_p ());
+
+	      /* Here we optimize the VSETVL is hoisted by LCM:
+
+		 Before LCM:
+		   bb 1:
+		     vsetvli a5,a2,e32,m1,ta,mu
+		   bb 2:
+		     vsetvli zero,a5,e32,m1,ta,mu
+		     ...
+
+		 After LCM:
+		   bb 1:
+		     vsetvli a5,a2,e32,m1,ta,mu
+		     LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
+		   bb 2:
+		     ...
+	       */
+	      const basic_block pred_cfg_bb = eg->src;
+	      const auto block_info
+		= m_vector_manager->vector_block_infos[pred_cfg_bb->index];
+	      const insn_info *pred_insn = block_info.reaching_out.get_insn ();
+	      if (pred_insn && vsetvl_insn_p (pred_insn->rtl ())
+		  && require->get_avl_source ()
+		  && require->get_avl_source ()->insn ()
+		  && require->skip_avl_compatible_p (block_info.reaching_out))
+		{
+		  vector_insn_info new_info = *require;
+		  new_info.set_avl_info (
+		    block_info.reaching_out.get_avl_info ());
+		  new_info
+		    = block_info.reaching_out.merge (new_info, LOCAL_MERGE);
+		  change_vsetvl_insn (pred_insn, new_info);
+		  bitmap_clear_bit (m_vector_manager->vector_insert[ed], i);
+		  if (dump_file)
+		    fprintf (
+		      dump_file,
+		      "\nLCM INSERTED edge %d from bb %d to bb %d for VSETVL "
+		      "expr[%ld] is removed\n",
+		      ed, eg->src->index, eg->dest->index, i);
+		  continue;
+		}
+
 	      rtl_profile_for_edge (eg);
 	      start_sequence ();
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
new file mode 100644
index 00000000000..f30275c8280
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+    if (n <= 0)
+      return;
+    int i = n;
+    size_t vl = __riscv_vsetvl_e32m1 (i);
+    for (; i >= 0; i--)
+      {
+        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+        __riscv_vse32_v_i32m1 (b, v, vl);
+
+        if (i >= vl)
+          continue;
+        if (i == 0)
+          return;
+        vl = __riscv_vsetvl_e32m1 (i);
+      }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
new file mode 100644
index 00000000000..5f6647bb916
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+    if (n <= 0)
+      return;
+    int i = n;
+    size_t vl = __riscv_vsetvl_e8mf4 (i);
+    for (; i >= 0; i--)
+      {
+        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+        __riscv_vse32_v_i32m1 (b, v, vl);
+
+        if (i >= vl)
+          continue;
+        if (i == 0)
+          return;
+        vl = __riscv_vsetvl_e32m1 (i);
+      }
+}
+
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
new file mode 100644
index 00000000000..5dbc871ed12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+    if (n <= 0)
+      return;
+    int i = n;
+    size_t vl = __riscv_vsetvl_e8mf2 (i);
+    for (; i >= 0; i--)
+      {
+        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+        __riscv_vse32_v_i32m1 (b, v, vl);
+
+        if (i >= vl)
+          continue;
+        if (i == 0)
+          return;
+        vl = __riscv_vsetvl_e32m1 (i);
+      }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
new file mode 100644
index 00000000000..edd12855f58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void
+f (int32_t *a, int32_t *b, int n)
+{
+  if (n <= 0)
+    return;
+  int i = n;
+  size_t vl = __riscv_vsetvl_e8mf4 (i);
+  for (; i >= 0; i--)
+    {
+      vint32m1_t v = __riscv_vle32_v_i32m1 (a + i, vl);
+      v = __riscv_vle32_v_i32m1_tu (v, a + i + 100, vl);
+      __riscv_vse32_v_i32m1 (b + i, v, vl);
+
+      if (i >= vl)
+	continue;
+      if (i == 0)
+	return;
+      vl = __riscv_vsetvl_e8mf4 (i);
+    }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-- 
2.36.3


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
  2023-05-06 11:14 [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743] juzhe.zhong
@ 2023-05-08  1:34 ` juzhe.zhong
  2023-05-08  9:53   ` Kito Cheng
  0 siblings, 1 reply; 4+ messages in thread
From: juzhe.zhong @ 2023-05-08  1:34 UTC (permalink / raw)
  To: 钟居哲, gcc-patches; +Cc: kito.cheng

[-- Attachment #1: Type: text/plain, Size: 11095 bytes --]

Gentle ping this patch.

Is this Ok for trunk? Thanks.


juzhe.zhong@rivai.ai
 
From: juzhe.zhong
Date: 2023-05-06 19:14
To: gcc-patches
CC: kito.cheng; Juzhe-Zhong
Subject: [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
 
This patch is fixing: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109743.
 
This issue happens is because we are currently very conservative in optimization of user vsetvli.
 
Consider this following case:
 
bb 1:
  vsetvli a5,a4... (demand AVL = a4).
bb 2:
  RVV insn use a5 (demand AVL = a5).
 
LCM will hoist vsetvl of bb 2 into bb 1.
We don't do AVL propagation for this situation since it's complicated that
we should analyze the code sequence between vsetvli in bb 1 and RVV insn in bb 2.
They are not necessary the consecutive blocks.
 
This patch is doing the optimizations after LCM, we will check and eliminate the vsetvli
in LCM inserted edge if such vsetvli is redundant. Such approach is much simplier and safe.
 
code:
void
foo2 (int32_t *a, int32_t *b, int n)
{
  if (n <= 0)
      return;
  int i = n;
  size_t vl = __riscv_vsetvl_e32m1 (i);
 
  for (; i >= 0; i--)
  {
    vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
    __riscv_vse32_v_i32m1 (b, v, vl);
 
    if (i >= vl)
      continue;
 
    if (i == 0)
      return;
 
    vl = __riscv_vsetvl_e32m1 (i);
  }
}
 
Before this patch:
foo2:
.LFB2:
        .cfi_startproc
        ble     a2,zero,.L1
        mv      a4,a2
        li      a3,-1
        vsetvli a5,a2,e32,m1,ta,mu
        vsetvli zero,a5,e32,m1,ta,ma  <- can be eliminated.
.L5:
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        bgeu    a4,a5,.L3
.L10:
        beq     a2,zero,.L1
        vsetvli a5,a4,e32,m1,ta,mu
        addi    a4,a4,-1
        vsetvli zero,a5,e32,m1,ta,ma  <- can be eliminated.
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        addiw   a2,a2,-1
        bltu    a4,a5,.L10
.L3:
        addiw   a2,a2,-1
        addi    a4,a4,-1
        bne     a2,a3,.L5
.L1:
        ret
 
After this patch:
f:
        ble     a2,zero,.L1
        mv      a4,a2
        li      a3,-1
        vsetvli a5,a2,e32,m1,ta,ma
.L5:
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        bgeu    a4,a5,.L3
.L10:
        beq     a2,zero,.L1
        vsetvli a5,a4,e32,m1,ta,ma
        addi    a4,a4,-1
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        addiw   a2,a2,-1
        bltu    a4,a5,.L10
.L3:
        addiw   a2,a2,-1
        addi    a4,a4,-1
        bne     a2,a3,.L5
.L1:
        ret
 
        PR target/109743
 
gcc/ChangeLog:
 
        * config/riscv/riscv-vsetvl.cc (pass_vsetvl::commit_vsetvls): Add optimization for LCM inserted edge.
 
gcc/testsuite/ChangeLog:
 
        * gcc.target/riscv/rvv/vsetvl/pr109743-1.c: New test.
        * gcc.target/riscv/rvv/vsetvl/pr109743-2.c: New test.
        * gcc.target/riscv/rvv/vsetvl/pr109743-3.c: New test.
        * gcc.target/riscv/rvv/vsetvl/pr109743-4.c: New test.
 
---
gcc/config/riscv/riscv-vsetvl.cc              | 42 +++++++++++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  | 26 ++++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-2.c  | 27 ++++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  | 28 +++++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-4.c  | 28 +++++++++++++
5 files changed, 151 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
 
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index f55907a410e..fcee7fdf323 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -3834,6 +3834,48 @@ pass_vsetvl::commit_vsetvls (void)
      const vector_insn_info *require
= m_vector_manager->vector_exprs[i];
      gcc_assert (require->valid_or_dirty_p ());
+
+       /* Here we optimize the VSETVL is hoisted by LCM:
+
+ Before LCM:
+    bb 1:
+      vsetvli a5,a2,e32,m1,ta,mu
+    bb 2:
+      vsetvli zero,a5,e32,m1,ta,mu
+      ...
+
+ After LCM:
+    bb 1:
+      vsetvli a5,a2,e32,m1,ta,mu
+      LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
+    bb 2:
+      ...
+        */
+       const basic_block pred_cfg_bb = eg->src;
+       const auto block_info
+ = m_vector_manager->vector_block_infos[pred_cfg_bb->index];
+       const insn_info *pred_insn = block_info.reaching_out.get_insn ();
+       if (pred_insn && vsetvl_insn_p (pred_insn->rtl ())
+   && require->get_avl_source ()
+   && require->get_avl_source ()->insn ()
+   && require->skip_avl_compatible_p (block_info.reaching_out))
+ {
+   vector_insn_info new_info = *require;
+   new_info.set_avl_info (
+     block_info.reaching_out.get_avl_info ());
+   new_info
+     = block_info.reaching_out.merge (new_info, LOCAL_MERGE);
+   change_vsetvl_insn (pred_insn, new_info);
+   bitmap_clear_bit (m_vector_manager->vector_insert[ed], i);
+   if (dump_file)
+     fprintf (
+       dump_file,
+       "\nLCM INSERTED edge %d from bb %d to bb %d for VSETVL "
+       "expr[%ld] is removed\n",
+       ed, eg->src->index, eg->dest->index, i);
+   continue;
+ }
+
      rtl_profile_for_edge (eg);
      start_sequence ();
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
new file mode 100644
index 00000000000..f30275c8280
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+    if (n <= 0)
+      return;
+    int i = n;
+    size_t vl = __riscv_vsetvl_e32m1 (i);
+    for (; i >= 0; i--)
+      {
+        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+        __riscv_vse32_v_i32m1 (b, v, vl);
+
+        if (i >= vl)
+          continue;
+        if (i == 0)
+          return;
+        vl = __riscv_vsetvl_e32m1 (i);
+      }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
new file mode 100644
index 00000000000..5f6647bb916
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+    if (n <= 0)
+      return;
+    int i = n;
+    size_t vl = __riscv_vsetvl_e8mf4 (i);
+    for (; i >= 0; i--)
+      {
+        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+        __riscv_vse32_v_i32m1 (b, v, vl);
+
+        if (i >= vl)
+          continue;
+        if (i == 0)
+          return;
+        vl = __riscv_vsetvl_e32m1 (i);
+      }
+}
+
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
new file mode 100644
index 00000000000..5dbc871ed12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+    if (n <= 0)
+      return;
+    int i = n;
+    size_t vl = __riscv_vsetvl_e8mf2 (i);
+    for (; i >= 0; i--)
+      {
+        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+        __riscv_vse32_v_i32m1 (b, v, vl);
+
+        if (i >= vl)
+          continue;
+        if (i == 0)
+          return;
+        vl = __riscv_vsetvl_e32m1 (i);
+      }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
new file mode 100644
index 00000000000..edd12855f58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void
+f (int32_t *a, int32_t *b, int n)
+{
+  if (n <= 0)
+    return;
+  int i = n;
+  size_t vl = __riscv_vsetvl_e8mf4 (i);
+  for (; i >= 0; i--)
+    {
+      vint32m1_t v = __riscv_vle32_v_i32m1 (a + i, vl);
+      v = __riscv_vle32_v_i32m1_tu (v, a + i + 100, vl);
+      __riscv_vse32_v_i32m1 (b + i, v, vl);
+
+      if (i >= vl)
+ continue;
+      if (i == 0)
+ return;
+      vl = __riscv_vsetvl_e8mf4 (i);
+    }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-- 
2.36.3
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
  2023-05-08  1:34 ` juzhe.zhong
@ 2023-05-08  9:53   ` Kito Cheng
  2023-05-08 14:42     ` 钟居哲
  0 siblings, 1 reply; 4+ messages in thread
From: Kito Cheng @ 2023-05-08  9:53 UTC (permalink / raw)
  To: juzhe.zhong; +Cc: gcc-patches

I am wondering if it is possible to do this on
local_eliminate_vsetvl_insn? I feel this is sort of local elimination,
so putting them together would be better than handling that in many
different places.

On Mon, May 8, 2023 at 9:35 AM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> Gentle ping this patch.
>
> Is this Ok for trunk? Thanks.
>
>
> juzhe.zhong@rivai.ai
>
> From: juzhe.zhong
> Date: 2023-05-06 19:14
> To: gcc-patches
> CC: kito.cheng; Juzhe-Zhong
> Subject: [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> This patch is fixing: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109743.
>
> This issue happens is because we are currently very conservative in optimization of user vsetvli.
>
> Consider this following case:
>
> bb 1:
>   vsetvli a5,a4... (demand AVL = a4).
> bb 2:
>   RVV insn use a5 (demand AVL = a5).
>
> LCM will hoist vsetvl of bb 2 into bb 1.
> We don't do AVL propagation for this situation since it's complicated that
> we should analyze the code sequence between vsetvli in bb 1 and RVV insn in bb 2.
> They are not necessary the consecutive blocks.
>
> This patch is doing the optimizations after LCM, we will check and eliminate the vsetvli
> in LCM inserted edge if such vsetvli is redundant. Such approach is much simplier and safe.
>
> code:
> void
> foo2 (int32_t *a, int32_t *b, int n)
> {
>   if (n <= 0)
>       return;
>   int i = n;
>   size_t vl = __riscv_vsetvl_e32m1 (i);
>
>   for (; i >= 0; i--)
>   {
>     vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
>     __riscv_vse32_v_i32m1 (b, v, vl);
>
>     if (i >= vl)
>       continue;
>
>     if (i == 0)
>       return;
>
>     vl = __riscv_vsetvl_e32m1 (i);
>   }
> }
>
> Before this patch:
> foo2:
> .LFB2:
>         .cfi_startproc
>         ble     a2,zero,.L1
>         mv      a4,a2
>         li      a3,-1
>         vsetvli a5,a2,e32,m1,ta,mu
>         vsetvli zero,a5,e32,m1,ta,ma  <- can be eliminated.
> .L5:
>         vle32.v v1,0(a0)
>         vse32.v v1,0(a1)
>         bgeu    a4,a5,.L3
> .L10:
>         beq     a2,zero,.L1
>         vsetvli a5,a4,e32,m1,ta,mu
>         addi    a4,a4,-1
>         vsetvli zero,a5,e32,m1,ta,ma  <- can be eliminated.
>         vle32.v v1,0(a0)
>         vse32.v v1,0(a1)
>         addiw   a2,a2,-1
>         bltu    a4,a5,.L10
> .L3:
>         addiw   a2,a2,-1
>         addi    a4,a4,-1
>         bne     a2,a3,.L5
> .L1:
>         ret
>
> After this patch:
> f:
>         ble     a2,zero,.L1
>         mv      a4,a2
>         li      a3,-1
>         vsetvli a5,a2,e32,m1,ta,ma
> .L5:
>         vle32.v v1,0(a0)
>         vse32.v v1,0(a1)
>         bgeu    a4,a5,.L3
> .L10:
>         beq     a2,zero,.L1
>         vsetvli a5,a4,e32,m1,ta,ma
>         addi    a4,a4,-1
>         vle32.v v1,0(a0)
>         vse32.v v1,0(a1)
>         addiw   a2,a2,-1
>         bltu    a4,a5,.L10
> .L3:
>         addiw   a2,a2,-1
>         addi    a4,a4,-1
>         bne     a2,a3,.L5
> .L1:
>         ret
>
>         PR target/109743
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-vsetvl.cc (pass_vsetvl::commit_vsetvls): Add optimization for LCM inserted edge.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/vsetvl/pr109743-1.c: New test.
>         * gcc.target/riscv/rvv/vsetvl/pr109743-2.c: New test.
>         * gcc.target/riscv/rvv/vsetvl/pr109743-3.c: New test.
>         * gcc.target/riscv/rvv/vsetvl/pr109743-4.c: New test.
>
> ---
> gcc/config/riscv/riscv-vsetvl.cc              | 42 +++++++++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  | 26 ++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/pr109743-2.c  | 27 ++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  | 28 +++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/pr109743-4.c  | 28 +++++++++++++
> 5 files changed, 151 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
>
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
> index f55907a410e..fcee7fdf323 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -3834,6 +3834,48 @@ pass_vsetvl::commit_vsetvls (void)
>       const vector_insn_info *require
> = m_vector_manager->vector_exprs[i];
>       gcc_assert (require->valid_or_dirty_p ());
> +
> +       /* Here we optimize the VSETVL is hoisted by LCM:
> +
> + Before LCM:
> +    bb 1:
> +      vsetvli a5,a2,e32,m1,ta,mu
> +    bb 2:
> +      vsetvli zero,a5,e32,m1,ta,mu
> +      ...
> +
> + After LCM:
> +    bb 1:
> +      vsetvli a5,a2,e32,m1,ta,mu
> +      LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
> +    bb 2:
> +      ...
> +        */
> +       const basic_block pred_cfg_bb = eg->src;
> +       const auto block_info
> + = m_vector_manager->vector_block_infos[pred_cfg_bb->index];
> +       const insn_info *pred_insn = block_info.reaching_out.get_insn ();
> +       if (pred_insn && vsetvl_insn_p (pred_insn->rtl ())
> +   && require->get_avl_source ()
> +   && require->get_avl_source ()->insn ()
> +   && require->skip_avl_compatible_p (block_info.reaching_out))
> + {
> +   vector_insn_info new_info = *require;
> +   new_info.set_avl_info (
> +     block_info.reaching_out.get_avl_info ());
> +   new_info
> +     = block_info.reaching_out.merge (new_info, LOCAL_MERGE);
> +   change_vsetvl_insn (pred_insn, new_info);
> +   bitmap_clear_bit (m_vector_manager->vector_insert[ed], i);
> +   if (dump_file)
> +     fprintf (
> +       dump_file,
> +       "\nLCM INSERTED edge %d from bb %d to bb %d for VSETVL "
> +       "expr[%ld] is removed\n",
> +       ed, eg->src->index, eg->dest->index, i);
> +   continue;
> + }
> +
>       rtl_profile_for_edge (eg);
>       start_sequence ();
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> new file mode 100644
> index 00000000000..f30275c8280
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int32_t * a, int32_t * b, int n)
> +{
> +    if (n <= 0)
> +      return;
> +    int i = n;
> +    size_t vl = __riscv_vsetvl_e32m1 (i);
> +    for (; i >= 0; i--)
> +      {
> +        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
> +        __riscv_vse32_v_i32m1 (b, v, vl);
> +
> +        if (i >= vl)
> +          continue;
> +        if (i == 0)
> +          return;
> +        vl = __riscv_vsetvl_e32m1 (i);
> +      }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
> new file mode 100644
> index 00000000000..5f6647bb916
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int32_t * a, int32_t * b, int n)
> +{
> +    if (n <= 0)
> +      return;
> +    int i = n;
> +    size_t vl = __riscv_vsetvl_e8mf4 (i);
> +    for (; i >= 0; i--)
> +      {
> +        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
> +        __riscv_vse32_v_i32m1 (b, v, vl);
> +
> +        if (i >= vl)
> +          continue;
> +        if (i == 0)
> +          return;
> +        vl = __riscv_vsetvl_e32m1 (i);
> +      }
> +}
> +
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> new file mode 100644
> index 00000000000..5dbc871ed12
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int32_t * a, int32_t * b, int n)
> +{
> +    if (n <= 0)
> +      return;
> +    int i = n;
> +    size_t vl = __riscv_vsetvl_e8mf2 (i);
> +    for (; i >= 0; i--)
> +      {
> +        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
> +        __riscv_vse32_v_i32m1 (b, v, vl);
> +
> +        if (i >= vl)
> +          continue;
> +        if (i == 0)
> +          return;
> +        vl = __riscv_vsetvl_e32m1 (i);
> +      }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
> new file mode 100644
> index 00000000000..edd12855f58
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +f (int32_t *a, int32_t *b, int n)
> +{
> +  if (n <= 0)
> +    return;
> +  int i = n;
> +  size_t vl = __riscv_vsetvl_e8mf4 (i);
> +  for (; i >= 0; i--)
> +    {
> +      vint32m1_t v = __riscv_vle32_v_i32m1 (a + i, vl);
> +      v = __riscv_vle32_v_i32m1_tu (v, a + i + 100, vl);
> +      __riscv_vse32_v_i32m1 (b + i, v, vl);
> +
> +      if (i >= vl)
> + continue;
> +      if (i == 0)
> + return;
> +      vl = __riscv_vsetvl_e8mf4 (i);
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> --
> 2.36.3
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: Re: [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
  2023-05-08  9:53   ` Kito Cheng
@ 2023-05-08 14:42     ` 钟居哲
  0 siblings, 0 replies; 4+ messages in thread
From: 钟居哲 @ 2023-05-08 14:42 UTC (permalink / raw)
  To: kito.cheng; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 12320 bytes --]

Ok. Address comment and V2 patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-May/617821.html 

Thanks.


juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-05-08 17:53
To: juzhe.zhong@rivai.ai
CC: gcc-patches
Subject: Re: [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
I am wondering if it is possible to do this on
local_eliminate_vsetvl_insn? I feel this is sort of local elimination,
so putting them together would be better than handling that in many
different places.
 
On Mon, May 8, 2023 at 9:35 AM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> Gentle ping this patch.
>
> Is this Ok for trunk? Thanks.
>
>
> juzhe.zhong@rivai.ai
>
> From: juzhe.zhong
> Date: 2023-05-06 19:14
> To: gcc-patches
> CC: kito.cheng; Juzhe-Zhong
> Subject: [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> This patch is fixing: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109743.
>
> This issue happens is because we are currently very conservative in optimization of user vsetvli.
>
> Consider this following case:
>
> bb 1:
>   vsetvli a5,a4... (demand AVL = a4).
> bb 2:
>   RVV insn use a5 (demand AVL = a5).
>
> LCM will hoist vsetvl of bb 2 into bb 1.
> We don't do AVL propagation for this situation since it's complicated that
> we should analyze the code sequence between vsetvli in bb 1 and RVV insn in bb 2.
> They are not necessary the consecutive blocks.
>
> This patch is doing the optimizations after LCM, we will check and eliminate the vsetvli
> in LCM inserted edge if such vsetvli is redundant. Such approach is much simplier and safe.
>
> code:
> void
> foo2 (int32_t *a, int32_t *b, int n)
> {
>   if (n <= 0)
>       return;
>   int i = n;
>   size_t vl = __riscv_vsetvl_e32m1 (i);
>
>   for (; i >= 0; i--)
>   {
>     vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
>     __riscv_vse32_v_i32m1 (b, v, vl);
>
>     if (i >= vl)
>       continue;
>
>     if (i == 0)
>       return;
>
>     vl = __riscv_vsetvl_e32m1 (i);
>   }
> }
>
> Before this patch:
> foo2:
> .LFB2:
>         .cfi_startproc
>         ble     a2,zero,.L1
>         mv      a4,a2
>         li      a3,-1
>         vsetvli a5,a2,e32,m1,ta,mu
>         vsetvli zero,a5,e32,m1,ta,ma  <- can be eliminated.
> .L5:
>         vle32.v v1,0(a0)
>         vse32.v v1,0(a1)
>         bgeu    a4,a5,.L3
> .L10:
>         beq     a2,zero,.L1
>         vsetvli a5,a4,e32,m1,ta,mu
>         addi    a4,a4,-1
>         vsetvli zero,a5,e32,m1,ta,ma  <- can be eliminated.
>         vle32.v v1,0(a0)
>         vse32.v v1,0(a1)
>         addiw   a2,a2,-1
>         bltu    a4,a5,.L10
> .L3:
>         addiw   a2,a2,-1
>         addi    a4,a4,-1
>         bne     a2,a3,.L5
> .L1:
>         ret
>
> After this patch:
> f:
>         ble     a2,zero,.L1
>         mv      a4,a2
>         li      a3,-1
>         vsetvli a5,a2,e32,m1,ta,ma
> .L5:
>         vle32.v v1,0(a0)
>         vse32.v v1,0(a1)
>         bgeu    a4,a5,.L3
> .L10:
>         beq     a2,zero,.L1
>         vsetvli a5,a4,e32,m1,ta,ma
>         addi    a4,a4,-1
>         vle32.v v1,0(a0)
>         vse32.v v1,0(a1)
>         addiw   a2,a2,-1
>         bltu    a4,a5,.L10
> .L3:
>         addiw   a2,a2,-1
>         addi    a4,a4,-1
>         bne     a2,a3,.L5
> .L1:
>         ret
>
>         PR target/109743
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-vsetvl.cc (pass_vsetvl::commit_vsetvls): Add optimization for LCM inserted edge.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/vsetvl/pr109743-1.c: New test.
>         * gcc.target/riscv/rvv/vsetvl/pr109743-2.c: New test.
>         * gcc.target/riscv/rvv/vsetvl/pr109743-3.c: New test.
>         * gcc.target/riscv/rvv/vsetvl/pr109743-4.c: New test.
>
> ---
> gcc/config/riscv/riscv-vsetvl.cc              | 42 +++++++++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  | 26 ++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/pr109743-2.c  | 27 ++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  | 28 +++++++++++++
> .../gcc.target/riscv/rvv/vsetvl/pr109743-4.c  | 28 +++++++++++++
> 5 files changed, 151 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
>
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
> index f55907a410e..fcee7fdf323 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -3834,6 +3834,48 @@ pass_vsetvl::commit_vsetvls (void)
>       const vector_insn_info *require
> = m_vector_manager->vector_exprs[i];
>       gcc_assert (require->valid_or_dirty_p ());
> +
> +       /* Here we optimize the VSETVL is hoisted by LCM:
> +
> + Before LCM:
> +    bb 1:
> +      vsetvli a5,a2,e32,m1,ta,mu
> +    bb 2:
> +      vsetvli zero,a5,e32,m1,ta,mu
> +      ...
> +
> + After LCM:
> +    bb 1:
> +      vsetvli a5,a2,e32,m1,ta,mu
> +      LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
> +    bb 2:
> +      ...
> +        */
> +       const basic_block pred_cfg_bb = eg->src;
> +       const auto block_info
> + = m_vector_manager->vector_block_infos[pred_cfg_bb->index];
> +       const insn_info *pred_insn = block_info.reaching_out.get_insn ();
> +       if (pred_insn && vsetvl_insn_p (pred_insn->rtl ())
> +   && require->get_avl_source ()
> +   && require->get_avl_source ()->insn ()
> +   && require->skip_avl_compatible_p (block_info.reaching_out))
> + {
> +   vector_insn_info new_info = *require;
> +   new_info.set_avl_info (
> +     block_info.reaching_out.get_avl_info ());
> +   new_info
> +     = block_info.reaching_out.merge (new_info, LOCAL_MERGE);
> +   change_vsetvl_insn (pred_insn, new_info);
> +   bitmap_clear_bit (m_vector_manager->vector_insert[ed], i);
> +   if (dump_file)
> +     fprintf (
> +       dump_file,
> +       "\nLCM INSERTED edge %d from bb %d to bb %d for VSETVL "
> +       "expr[%ld] is removed\n",
> +       ed, eg->src->index, eg->dest->index, i);
> +   continue;
> + }
> +
>       rtl_profile_for_edge (eg);
>       start_sequence ();
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> new file mode 100644
> index 00000000000..f30275c8280
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int32_t * a, int32_t * b, int n)
> +{
> +    if (n <= 0)
> +      return;
> +    int i = n;
> +    size_t vl = __riscv_vsetvl_e32m1 (i);
> +    for (; i >= 0; i--)
> +      {
> +        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
> +        __riscv_vse32_v_i32m1 (b, v, vl);
> +
> +        if (i >= vl)
> +          continue;
> +        if (i == 0)
> +          return;
> +        vl = __riscv_vsetvl_e32m1 (i);
> +      }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
> new file mode 100644
> index 00000000000..5f6647bb916
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int32_t * a, int32_t * b, int n)
> +{
> +    if (n <= 0)
> +      return;
> +    int i = n;
> +    size_t vl = __riscv_vsetvl_e8mf4 (i);
> +    for (; i >= 0; i--)
> +      {
> +        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
> +        __riscv_vse32_v_i32m1 (b, v, vl);
> +
> +        if (i >= vl)
> +          continue;
> +        if (i == 0)
> +          return;
> +        vl = __riscv_vsetvl_e32m1 (i);
> +      }
> +}
> +
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> new file mode 100644
> index 00000000000..5dbc871ed12
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void f (int32_t * a, int32_t * b, int n)
> +{
> +    if (n <= 0)
> +      return;
> +    int i = n;
> +    size_t vl = __riscv_vsetvl_e8mf2 (i);
> +    for (; i >= 0; i--)
> +      {
> +        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
> +        __riscv_vse32_v_i32m1 (b, v, vl);
> +
> +        if (i >= vl)
> +          continue;
> +        if (i == 0)
> +          return;
> +        vl = __riscv_vsetvl_e32m1 (i);
> +      }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
> new file mode 100644
> index 00000000000..edd12855f58
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void
> +f (int32_t *a, int32_t *b, int n)
> +{
> +  if (n <= 0)
> +    return;
> +  int i = n;
> +  size_t vl = __riscv_vsetvl_e8mf4 (i);
> +  for (; i >= 0; i--)
> +    {
> +      vint32m1_t v = __riscv_vle32_v_i32m1 (a + i, vl);
> +      v = __riscv_vle32_v_i32m1_tu (v, a + i + 100, vl);
> +      __riscv_vse32_v_i32m1 (b + i, v, vl);
> +
> +      if (i >= vl)
> + continue;
> +      if (i == 0)
> + return;
> +      vl = __riscv_vsetvl_e8mf4 (i);
> +    }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> --
> 2.36.3
>
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-05-08 14:42 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-06 11:14 [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743] juzhe.zhong
2023-05-08  1:34 ` juzhe.zhong
2023-05-08  9:53   ` Kito Cheng
2023-05-08 14:42     ` 钟居哲

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).