public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH V2] RISC-V: Fix incorrect demand info merge in local vsetvli optimization [PR109748]
@ 2023-05-05 14:12 juzhe.zhong
  2023-05-06  2:29 ` Kito Cheng
  0 siblings, 1 reply; 2+ messages in thread
From: juzhe.zhong @ 2023-05-05 14:12 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, Juzhe-Zhong

From: Juzhe-Zhong <juzhe.zhong@rivai.ai>

This patch is fixing my recent optimization patch:
https://github.com/gcc-mirror/gcc/commit/d51f2456ee51bd59a79b4725ca0e488c25260bbf

In that patch, the new_info = parse_insn (i) is not correct.
Since consider the following case:
       
vsetvli a5,a4, e8,m1
..
vsetvli zero,a5, e32, m4
vle8.v
vmacc.vv
...

Since we have backward demand fusion in Phase 1, so the real demand of "vle8.v" is e32, m4.
However, if we use parse_insn (vle8.v) = e8, m1 which is not correct.

So this patch we change new_info = new_info.parse_insn (i)
into:

vector_insn_info new_info = m_vector_manager->vector_insn_infos[i->uid ()];

So that, we can correctly optimize codes into:

vsetvli a5,a4, e32, m4
..
.. (vsetvli zero,a5, e32, m4 is removed)
vle8.v
vmacc.vv

Since m_vector_manager->vector_insn_infos is the member variable of pass_vsetvl class.
We remove static void function "local_eliminate_vsetvl_insn", and make it as the member function
of pass_vsetvl class.

        PR target/109748

gcc/ChangeLog:

        * config/riscv/riscv-vsetvl.cc (local_eliminate_vsetvl_insn): Remove it.
        (pass_vsetvl::local_eliminate_vsetvl_insn): New function.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/vsetvl/pr109748.c: New test.

---
 gcc/config/riscv/riscv-vsetvl.cc              | 102 ++++++++++--------
 .../gcc.target/riscv/rvv/vsetvl/pr109748.c    |  36 +++++++
 2 files changed, 93 insertions(+), 45 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 39b4d21210b..e1efd7b1c40 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1056,51 +1056,6 @@ change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info)
   change_insn (rinsn, new_pat);
 }
 
-static void
-local_eliminate_vsetvl_insn (const vector_insn_info &dem)
-{
-  const insn_info *insn = dem.get_insn ();
-  if (!insn || insn->is_artificial ())
-    return;
-  rtx_insn *rinsn = insn->rtl ();
-  const bb_info *bb = insn->bb ();
-  if (vsetvl_insn_p (rinsn))
-    {
-      rtx vl = get_vl (rinsn);
-      for (insn_info *i = insn->next_nondebug_insn ();
-	   real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
-	{
-	  if (i->is_call () || i->is_asm ()
-	      || find_access (i->defs (), VL_REGNUM)
-	      || find_access (i->defs (), VTYPE_REGNUM))
-	    return;
-
-	  if (has_vtype_op (i->rtl ()))
-	    {
-	      if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ())))
-		return;
-	      rtx avl = get_avl (i->rtl ());
-	      if (avl != vl)
-		return;
-	      set_info *def = find_access (i->uses (), REGNO (avl))->def ();
-	      if (def->insn () != insn)
-		return;
-
-	      vector_insn_info new_info;
-	      new_info.parse_insn (i);
-	      if (!new_info.skip_avl_compatible_p (dem))
-		return;
-
-	      new_info.set_avl_info (dem.get_avl_info ());
-	      new_info = dem.merge (new_info, LOCAL_MERGE);
-	      change_vsetvl_insn (insn, new_info);
-	      eliminate_insn (PREV_INSN (i->rtl ()));
-	      return;
-	    }
-	}
-    }
-}
-
 static bool
 source_equal_p (insn_info *insn1, insn_info *insn2)
 {
@@ -2672,6 +2627,7 @@ private:
   void pre_vsetvl (void);
 
   /* Phase 5.  */
+  void local_eliminate_vsetvl_insn (const vector_insn_info &) const;
   void cleanup_insns (void) const;
 
   /* Phase 6.  */
@@ -3993,6 +3949,62 @@ pass_vsetvl::pre_vsetvl (void)
     commit_edge_insertions ();
 }
 
+/* Local user vsetvl optimizaiton:
+
+     Case 1:
+       vsetvl a5,a4,e8,mf8
+       ...
+       vsetvl zero,a5,e8,mf8 --> Eliminate directly.
+
+     Case 2:
+       vsetvl a5,a4,e8,mf8    --> vsetvl a5,a4,e32,mf2
+       ...
+       vsetvl zero,a5,e32,mf2 --> Eliminate directly.  */
+void
+pass_vsetvl::local_eliminate_vsetvl_insn (const vector_insn_info &dem) const
+{
+  const insn_info *insn = dem.get_insn ();
+  if (!insn || insn->is_artificial ())
+    return;
+  rtx_insn *rinsn = insn->rtl ();
+  const bb_info *bb = insn->bb ();
+  if (vsetvl_insn_p (rinsn))
+    {
+      rtx vl = get_vl (rinsn);
+      for (insn_info *i = insn->next_nondebug_insn ();
+	   real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
+	{
+	  if (i->is_call () || i->is_asm ()
+	      || find_access (i->defs (), VL_REGNUM)
+	      || find_access (i->defs (), VTYPE_REGNUM))
+	    return;
+
+	  if (has_vtype_op (i->rtl ()))
+	    {
+	      if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ())))
+		return;
+	      rtx avl = get_avl (i->rtl ());
+	      if (avl != vl)
+		return;
+	      set_info *def = find_access (i->uses (), REGNO (avl))->def ();
+	      if (def->insn () != insn)
+		return;
+
+	      vector_insn_info new_info
+		= m_vector_manager->vector_insn_infos[i->uid ()];
+	      if (!new_info.skip_avl_compatible_p (dem))
+		return;
+
+	      new_info.set_avl_info (dem.get_avl_info ());
+	      new_info = dem.merge (new_info, LOCAL_MERGE);
+	      change_vsetvl_insn (insn, new_info);
+	      eliminate_insn (PREV_INSN (i->rtl ()));
+	      return;
+	    }
+	}
+    }
+}
+
 /* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
    implicitly. Since we will emit VSETVL instruction and make RVV instructions
    depending on VL/VTYPE global status registers, we remove the such AVL operand
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c
new file mode 100644
index 00000000000..81c42c5a82a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+int byte_mac_vec(unsigned char *a, unsigned char *b, int len) {
+  size_t vlmax = __riscv_vsetvlmax_e8m1();
+  vint32m4_t vec_s = __riscv_vmv_v_x_i32m4(0, vlmax);
+  vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vlmax);
+  int k = len;
+
+  for (size_t vl; k > 0; k -= vl, a += vl, b += vl) {
+      vl = __riscv_vsetvl_e8m1(k);
+
+      vuint8m1_t a8s = __riscv_vle8_v_u8m1(a, vl);
+      vuint8m1_t b8s = __riscv_vle8_v_u8m1(b, vl);
+      vuint32m4_t a8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl);
+      vuint32m4_t b8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl);
+
+      vint32m4_t a8s_as_i32 = __riscv_vreinterpret_v_u32m4_i32m4(a8s_extended);
+      vint32m4_t b8s_as_i32 = __riscv_vreinterpret_v_u32m4_i32m4(b8s_extended);
+
+      vec_s = __riscv_vmacc_vv_i32m4_tu(vec_s, a8s_as_i32, b8s_as_i32, vl);
+  }
+
+  vint32m1_t vec_sum = __riscv_vredsum_vs_i32m4_i32m1(vec_s, vec_zero, __riscv_vsetvl_e32m4(len));
+  int sum = __riscv_vmv_x_s_i32m1_i32(vec_sum);
+
+  return sum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-- 
2.36.3


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH V2] RISC-V: Fix incorrect demand info merge in local vsetvli optimization [PR109748]
  2023-05-05 14:12 [PATCH V2] RISC-V: Fix incorrect demand info merge in local vsetvli optimization [PR109748] juzhe.zhong
@ 2023-05-06  2:29 ` Kito Cheng
  0 siblings, 0 replies; 2+ messages in thread
From: Kito Cheng @ 2023-05-06  2:29 UTC (permalink / raw)
  To: juzhe.zhong; +Cc: gcc-patches

Thanks, committed to trunk!

On Fri, May 5, 2023 at 10:13 PM <juzhe.zhong@rivai.ai> wrote:
>
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> This patch is fixing my recent optimization patch:
> https://github.com/gcc-mirror/gcc/commit/d51f2456ee51bd59a79b4725ca0e488c25260bbf
>
> In that patch, the new_info = parse_insn (i) is not correct.
> Since consider the following case:
>
> vsetvli a5,a4, e8,m1
> ..
> vsetvli zero,a5, e32, m4
> vle8.v
> vmacc.vv
> ...
>
> Since we have backward demand fusion in Phase 1, so the real demand of "vle8.v" is e32, m4.
> However, if we use parse_insn (vle8.v) = e8, m1 which is not correct.
>
> So this patch we change new_info = new_info.parse_insn (i)
> into:
>
> vector_insn_info new_info = m_vector_manager->vector_insn_infos[i->uid ()];
>
> So that, we can correctly optimize codes into:
>
> vsetvli a5,a4, e32, m4
> ..
> .. (vsetvli zero,a5, e32, m4 is removed)
> vle8.v
> vmacc.vv
>
> Since m_vector_manager->vector_insn_infos is the member variable of pass_vsetvl class.
> We remove static void function "local_eliminate_vsetvl_insn", and make it as the member function
> of pass_vsetvl class.
>
>         PR target/109748
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-vsetvl.cc (local_eliminate_vsetvl_insn): Remove it.
>         (pass_vsetvl::local_eliminate_vsetvl_insn): New function.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/vsetvl/pr109748.c: New test.
>
> ---
>  gcc/config/riscv/riscv-vsetvl.cc              | 102 ++++++++++--------
>  .../gcc.target/riscv/rvv/vsetvl/pr109748.c    |  36 +++++++
>  2 files changed, 93 insertions(+), 45 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c
>
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
> index 39b4d21210b..e1efd7b1c40 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -1056,51 +1056,6 @@ change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info)
>    change_insn (rinsn, new_pat);
>  }
>
> -static void
> -local_eliminate_vsetvl_insn (const vector_insn_info &dem)
> -{
> -  const insn_info *insn = dem.get_insn ();
> -  if (!insn || insn->is_artificial ())
> -    return;
> -  rtx_insn *rinsn = insn->rtl ();
> -  const bb_info *bb = insn->bb ();
> -  if (vsetvl_insn_p (rinsn))
> -    {
> -      rtx vl = get_vl (rinsn);
> -      for (insn_info *i = insn->next_nondebug_insn ();
> -          real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
> -       {
> -         if (i->is_call () || i->is_asm ()
> -             || find_access (i->defs (), VL_REGNUM)
> -             || find_access (i->defs (), VTYPE_REGNUM))
> -           return;
> -
> -         if (has_vtype_op (i->rtl ()))
> -           {
> -             if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ())))
> -               return;
> -             rtx avl = get_avl (i->rtl ());
> -             if (avl != vl)
> -               return;
> -             set_info *def = find_access (i->uses (), REGNO (avl))->def ();
> -             if (def->insn () != insn)
> -               return;
> -
> -             vector_insn_info new_info;
> -             new_info.parse_insn (i);
> -             if (!new_info.skip_avl_compatible_p (dem))
> -               return;
> -
> -             new_info.set_avl_info (dem.get_avl_info ());
> -             new_info = dem.merge (new_info, LOCAL_MERGE);
> -             change_vsetvl_insn (insn, new_info);
> -             eliminate_insn (PREV_INSN (i->rtl ()));
> -             return;
> -           }
> -       }
> -    }
> -}
> -
>  static bool
>  source_equal_p (insn_info *insn1, insn_info *insn2)
>  {
> @@ -2672,6 +2627,7 @@ private:
>    void pre_vsetvl (void);
>
>    /* Phase 5.  */
> +  void local_eliminate_vsetvl_insn (const vector_insn_info &) const;
>    void cleanup_insns (void) const;
>
>    /* Phase 6.  */
> @@ -3993,6 +3949,62 @@ pass_vsetvl::pre_vsetvl (void)
>      commit_edge_insertions ();
>  }
>
> +/* Local user vsetvl optimizaiton:
> +
> +     Case 1:
> +       vsetvl a5,a4,e8,mf8
> +       ...
> +       vsetvl zero,a5,e8,mf8 --> Eliminate directly.
> +
> +     Case 2:
> +       vsetvl a5,a4,e8,mf8    --> vsetvl a5,a4,e32,mf2
> +       ...
> +       vsetvl zero,a5,e32,mf2 --> Eliminate directly.  */
> +void
> +pass_vsetvl::local_eliminate_vsetvl_insn (const vector_insn_info &dem) const
> +{
> +  const insn_info *insn = dem.get_insn ();
> +  if (!insn || insn->is_artificial ())
> +    return;
> +  rtx_insn *rinsn = insn->rtl ();
> +  const bb_info *bb = insn->bb ();
> +  if (vsetvl_insn_p (rinsn))
> +    {
> +      rtx vl = get_vl (rinsn);
> +      for (insn_info *i = insn->next_nondebug_insn ();
> +          real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ())
> +       {
> +         if (i->is_call () || i->is_asm ()
> +             || find_access (i->defs (), VL_REGNUM)
> +             || find_access (i->defs (), VTYPE_REGNUM))
> +           return;
> +
> +         if (has_vtype_op (i->rtl ()))
> +           {
> +             if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ())))
> +               return;
> +             rtx avl = get_avl (i->rtl ());
> +             if (avl != vl)
> +               return;
> +             set_info *def = find_access (i->uses (), REGNO (avl))->def ();
> +             if (def->insn () != insn)
> +               return;
> +
> +             vector_insn_info new_info
> +               = m_vector_manager->vector_insn_infos[i->uid ()];
> +             if (!new_info.skip_avl_compatible_p (dem))
> +               return;
> +
> +             new_info.set_avl_info (dem.get_avl_info ());
> +             new_info = dem.merge (new_info, LOCAL_MERGE);
> +             change_vsetvl_insn (insn, new_info);
> +             eliminate_insn (PREV_INSN (i->rtl ()));
> +             return;
> +           }
> +       }
> +    }
> +}
> +
>  /* Before VSETVL PASS, RVV instructions pattern is depending on AVL operand
>     implicitly. Since we will emit VSETVL instruction and make RVV instructions
>     depending on VL/VTYPE global status registers, we remove the such AVL operand
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c
> new file mode 100644
> index 00000000000..81c42c5a82a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109748.c
> @@ -0,0 +1,36 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +int byte_mac_vec(unsigned char *a, unsigned char *b, int len) {
> +  size_t vlmax = __riscv_vsetvlmax_e8m1();
> +  vint32m4_t vec_s = __riscv_vmv_v_x_i32m4(0, vlmax);
> +  vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vlmax);
> +  int k = len;
> +
> +  for (size_t vl; k > 0; k -= vl, a += vl, b += vl) {
> +      vl = __riscv_vsetvl_e8m1(k);
> +
> +      vuint8m1_t a8s = __riscv_vle8_v_u8m1(a, vl);
> +      vuint8m1_t b8s = __riscv_vle8_v_u8m1(b, vl);
> +      vuint32m4_t a8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl);
> +      vuint32m4_t b8s_extended = __riscv_vzext_vf4_u32m4(a8s, vl);
> +
> +      vint32m4_t a8s_as_i32 = __riscv_vreinterpret_v_u32m4_i32m4(a8s_extended);
> +      vint32m4_t b8s_as_i32 = __riscv_vreinterpret_v_u32m4_i32m4(b8s_extended);
> +
> +      vec_s = __riscv_vmacc_vv_i32m4_tu(vec_s, a8s_as_i32, b8s_as_i32, vl);
> +  }
> +
> +  vint32m1_t vec_sum = __riscv_vredsum_vs_i32m4_i32m1(vec_s, vec_zero, __riscv_vsetvl_e32m4(len));
> +  int sum = __riscv_vmv_x_s_i32m1_i32(vec_sum);
> +
> +  return sum;
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m4,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 4 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> --
> 2.36.3
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-05-06  2:29 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-05 14:12 [PATCH V2] RISC-V: Fix incorrect demand info merge in local vsetvli optimization [PR109748] juzhe.zhong
2023-05-06  2:29 ` Kito Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).