public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: "juzhe.zhong@rivai.ai" <juzhe.zhong@rivai.ai>
To: 钟居哲 <juzhe.zhong@rivai.ai>, gcc-patches <gcc-patches@gcc.gnu.org>
Cc: kito.cheng <kito.cheng@gmail.com>
Subject: Re: [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
Date: Mon, 8 May 2023 09:34:54 +0800	[thread overview]
Message-ID: <AE59571EF8CD112F+2023050809345420752344@rivai.ai> (raw)
In-Reply-To: <20230506111449.2128575-1-juzhe.zhong@rivai.ai>

[-- Attachment #1: Type: text/plain, Size: 11095 bytes --]

Gentle ping this patch.

Is this Ok for trunk? Thanks.


juzhe.zhong@rivai.ai
 
From: juzhe.zhong
Date: 2023-05-06 19:14
To: gcc-patches
CC: kito.cheng; Juzhe-Zhong
Subject: [PATCH] RISC-V: Optimize vsetvli of LCM INSERTED edge for user vsetvli [PR 109743]
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
 
This patch is fixing: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109743.
 
This issue happens is because we are currently very conservative in optimization of user vsetvli.
 
Consider this following case:
 
bb 1:
  vsetvli a5,a4... (demand AVL = a4).
bb 2:
  RVV insn use a5 (demand AVL = a5).
 
LCM will hoist vsetvl of bb 2 into bb 1.
We don't do AVL propagation for this situation since it's complicated that
we should analyze the code sequence between vsetvli in bb 1 and RVV insn in bb 2.
They are not necessary the consecutive blocks.
 
This patch is doing the optimizations after LCM, we will check and eliminate the vsetvli
in LCM inserted edge if such vsetvli is redundant. Such approach is much simplier and safe.
 
code:
void
foo2 (int32_t *a, int32_t *b, int n)
{
  if (n <= 0)
      return;
  int i = n;
  size_t vl = __riscv_vsetvl_e32m1 (i);
 
  for (; i >= 0; i--)
  {
    vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
    __riscv_vse32_v_i32m1 (b, v, vl);
 
    if (i >= vl)
      continue;
 
    if (i == 0)
      return;
 
    vl = __riscv_vsetvl_e32m1 (i);
  }
}
 
Before this patch:
foo2:
.LFB2:
        .cfi_startproc
        ble     a2,zero,.L1
        mv      a4,a2
        li      a3,-1
        vsetvli a5,a2,e32,m1,ta,mu
        vsetvli zero,a5,e32,m1,ta,ma  <- can be eliminated.
.L5:
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        bgeu    a4,a5,.L3
.L10:
        beq     a2,zero,.L1
        vsetvli a5,a4,e32,m1,ta,mu
        addi    a4,a4,-1
        vsetvli zero,a5,e32,m1,ta,ma  <- can be eliminated.
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        addiw   a2,a2,-1
        bltu    a4,a5,.L10
.L3:
        addiw   a2,a2,-1
        addi    a4,a4,-1
        bne     a2,a3,.L5
.L1:
        ret
 
After this patch:
f:
        ble     a2,zero,.L1
        mv      a4,a2
        li      a3,-1
        vsetvli a5,a2,e32,m1,ta,ma
.L5:
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        bgeu    a4,a5,.L3
.L10:
        beq     a2,zero,.L1
        vsetvli a5,a4,e32,m1,ta,ma
        addi    a4,a4,-1
        vle32.v v1,0(a0)
        vse32.v v1,0(a1)
        addiw   a2,a2,-1
        bltu    a4,a5,.L10
.L3:
        addiw   a2,a2,-1
        addi    a4,a4,-1
        bne     a2,a3,.L5
.L1:
        ret
 
        PR target/109743
 
gcc/ChangeLog:
 
        * config/riscv/riscv-vsetvl.cc (pass_vsetvl::commit_vsetvls): Add optimization for LCM inserted edge.
 
gcc/testsuite/ChangeLog:
 
        * gcc.target/riscv/rvv/vsetvl/pr109743-1.c: New test.
        * gcc.target/riscv/rvv/vsetvl/pr109743-2.c: New test.
        * gcc.target/riscv/rvv/vsetvl/pr109743-3.c: New test.
        * gcc.target/riscv/rvv/vsetvl/pr109743-4.c: New test.
 
---
gcc/config/riscv/riscv-vsetvl.cc              | 42 +++++++++++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  | 26 ++++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-2.c  | 27 ++++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  | 28 +++++++++++++
.../gcc.target/riscv/rvv/vsetvl/pr109743-4.c  | 28 +++++++++++++
5 files changed, 151 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
 
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index f55907a410e..fcee7fdf323 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -3834,6 +3834,48 @@ pass_vsetvl::commit_vsetvls (void)
      const vector_insn_info *require
= m_vector_manager->vector_exprs[i];
      gcc_assert (require->valid_or_dirty_p ());
+
+       /* Here we optimize the VSETVL is hoisted by LCM:
+
+ Before LCM:
+    bb 1:
+      vsetvli a5,a2,e32,m1,ta,mu
+    bb 2:
+      vsetvli zero,a5,e32,m1,ta,mu
+      ...
+
+ After LCM:
+    bb 1:
+      vsetvli a5,a2,e32,m1,ta,mu
+      LCM INSERTED: vsetvli zero,a5,e32,m1,ta,mu --> eliminate
+    bb 2:
+      ...
+        */
+       const basic_block pred_cfg_bb = eg->src;
+       const auto block_info
+ = m_vector_manager->vector_block_infos[pred_cfg_bb->index];
+       const insn_info *pred_insn = block_info.reaching_out.get_insn ();
+       if (pred_insn && vsetvl_insn_p (pred_insn->rtl ())
+   && require->get_avl_source ()
+   && require->get_avl_source ()->insn ()
+   && require->skip_avl_compatible_p (block_info.reaching_out))
+ {
+   vector_insn_info new_info = *require;
+   new_info.set_avl_info (
+     block_info.reaching_out.get_avl_info ());
+   new_info
+     = block_info.reaching_out.merge (new_info, LOCAL_MERGE);
+   change_vsetvl_insn (pred_insn, new_info);
+   bitmap_clear_bit (m_vector_manager->vector_insert[ed], i);
+   if (dump_file)
+     fprintf (
+       dump_file,
+       "\nLCM INSERTED edge %d from bb %d to bb %d for VSETVL "
+       "expr[%ld] is removed\n",
+       ed, eg->src->index, eg->dest->index, i);
+   continue;
+ }
+
      rtl_profile_for_edge (eg);
      start_sequence ();
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
new file mode 100644
index 00000000000..f30275c8280
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+    if (n <= 0)
+      return;
+    int i = n;
+    size_t vl = __riscv_vsetvl_e32m1 (i);
+    for (; i >= 0; i--)
+      {
+        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+        __riscv_vse32_v_i32m1 (b, v, vl);
+
+        if (i >= vl)
+          continue;
+        if (i == 0)
+          return;
+        vl = __riscv_vsetvl_e32m1 (i);
+      }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
new file mode 100644
index 00000000000..5f6647bb916
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+    if (n <= 0)
+      return;
+    int i = n;
+    size_t vl = __riscv_vsetvl_e8mf4 (i);
+    for (; i >= 0; i--)
+      {
+        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+        __riscv_vse32_v_i32m1 (b, v, vl);
+
+        if (i >= vl)
+          continue;
+        if (i == 0)
+          return;
+        vl = __riscv_vsetvl_e32m1 (i);
+      }
+}
+
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
new file mode 100644
index 00000000000..5dbc871ed12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void f (int32_t * a, int32_t * b, int n)
+{
+    if (n <= 0)
+      return;
+    int i = n;
+    size_t vl = __riscv_vsetvl_e8mf2 (i);
+    for (; i >= 0; i--)
+      {
+        vint32m1_t v = __riscv_vle32_v_i32m1 (a, vl);
+        __riscv_vse32_v_i32m1 (b, v, vl);
+
+        if (i >= vl)
+          continue;
+        if (i == 0)
+          return;
+        vl = __riscv_vsetvl_e32m1 (i);
+      }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
new file mode 100644
index 00000000000..edd12855f58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void
+f (int32_t *a, int32_t *b, int n)
+{
+  if (n <= 0)
+    return;
+  int i = n;
+  size_t vl = __riscv_vsetvl_e8mf4 (i);
+  for (; i >= 0; i--)
+    {
+      vint32m1_t v = __riscv_vle32_v_i32m1 (a + i, vl);
+      v = __riscv_vle32_v_i32m1_tu (v, a + i + 100, vl);
+      __riscv_vse32_v_i32m1 (b + i, v, vl);
+
+      if (i >= vl)
+ continue;
+      if (i == 0)
+ return;
+      vl = __riscv_vsetvl_e8mf4 (i);
+    }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-- 
2.36.3
 

  reply	other threads:[~2023-05-08  1:35 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-06 11:14 juzhe.zhong
2023-05-08  1:34 ` juzhe.zhong [this message]
2023-05-08  9:53   ` Kito Cheng
2023-05-08 14:42     ` 钟居哲

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=AE59571EF8CD112F+2023050809345420752344@rivai.ai \
    --to=juzhe.zhong@rivai.ai \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=kito.cheng@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).