public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/riscv/heads/gcc-13-with-riscv-opts)] RISC-V: Optimize VSETVL codegen of SELECT_VL with LEN_MASK_{LOAD, STORE}
@ 2023-06-26 20:57 Jeff Law
  0 siblings, 0 replies; 2+ messages in thread
From: Jeff Law @ 2023-06-26 20:57 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7a6bb7a44565c3a94ce941bcb7a8c957c864dcab

commit 7a6bb7a44565c3a94ce941bcb7a8c957c864dcab
Author: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Date:   Sun Jun 25 20:20:57 2023 +0800

    RISC-V: Optimize VSETVL codegen of SELECT_VL with LEN_MASK_{LOAD, STORE}
    
    This patch is depending on LEN_MASK_{LOAD,STORE} patch:
    https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622742.html
    
    After enabling the LEN_MASK_{LOAD,STORE}, I notice that there is a case that VSETVL PASS need to be optimized:
    
    void
    f (int32_t *__restrict a,
       int32_t *__restrict b,
       int32_t *__restrict cond,
       int n)
    {
      for (int i = 0; i < 8; i++)
        if (cond[i])
          a[i] = b[i];
    }
    
    Before this patch:
    f:
            vsetivli        a5,8,e8,mf4,tu,mu   --> Propagate "8" to the following vsetvl
            vsetvli zero,a5,e32,m1,ta,ma
            vle32.v v0,0(a2)
            vsetvli a6,zero,e32,m1,ta,ma
            li      a3,8
            vmsne.vi        v0,v0,0
            vsetvli zero,a5,e32,m1,ta,ma
            vle32.v v1,0(a1),v0.t
            vse32.v v1,0(a0),v0.t
            sub     a4,a3,a5
            beq     a3,a5,.L6
            slli    a5,a5,2
            add     a2,a2,a5
            add     a1,a1,a5
            add     a0,a0,a5
            vsetvli a5,a4,e8,mf4,tu,mu     --> Propagate "a4" to the following vsetvl
            vsetvli zero,a5,e32,m1,ta,ma
            vle32.v v0,0(a2)
            vsetvli a6,zero,e32,m1,ta,ma
            vmsne.vi        v0,v0,0
            vsetvli zero,a5,e32,m1,ta,ma
            vle32.v v1,0(a1),v0.t
            vse32.v v1,0(a0),v0.t
    .L6:
            ret
    
    Current VSETLV PASS only enable AVL propagation of VLMAX AVL ("zero").
    Now, we enable AVL propagation of immediate && conservative non-VLMAX.
    
    After this patch:
    
    f:
            vsetivli        a5,8,e8,mf4,ta,ma
            vle32.v v0,0(a2)
            vsetvli a6,zero,e32,m1,ta,ma
            li      a3,8
            vmsne.vi        v0,v0,0
            vsetivli        zero,8,e32,m1,ta,ma
            vle32.v v1,0(a1),v0.t
            vse32.v v1,0(a0),v0.t
            sub     a4,a3,a5
            beq     a3,a5,.L6
            slli    a5,a5,2
            vsetvli a4,a4,e8,mf4,ta,ma
            add     a2,a2,a5
            vle32.v v0,0(a2)
            add     a1,a1,a5
            vsetvli a6,zero,e32,m1,ta,ma
            add     a0,a0,a5
            vmsne.vi        v0,v0,0
            vsetvli zero,a4,e32,m1,ta,ma
            vle32.v v1,0(a1),v0.t
            vse32.v v1,0(a0),v0.t
    .L6:
            ret
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-vsetvl.cc (vector_insn_info::parse_insn): Ehance
            AVL propagation.
            * config/riscv/riscv-vsetvl.h: New function.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/partial/select_vl-1.c: Add dump checks.
            * gcc.target/riscv/rvv/autovec/partial/select_vl-2.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc                   | 48 ++++++++++++++++++++--
 gcc/config/riscv/riscv-vsetvl.h                    |  2 +
 .../riscv/rvv/autovec/partial/select_vl-1.c        |  5 ++-
 .../riscv/rvv/autovec/partial/select_vl-2.c        | 25 +++++++++++
 4 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 971c3f90742..2d576e8d5c1 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -2003,9 +2003,51 @@ vector_insn_info::parse_insn (insn_info *insn)
   new_info.parse_insn (def_insn);
   if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ()))
     return;
-  /* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl.  */
-  if (vlmax_avl_p (new_info.get_avl ()))
-    set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
+
+  if (new_info.has_avl ())
+    {
+      if (new_info.has_avl_imm ())
+	set_avl_info (avl_info (new_info.get_avl (), nullptr));
+      else
+	{
+	  if (vlmax_avl_p (new_info.get_avl ()))
+	    set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
+	  else
+	    {
+	      /* Conservatively propagate non-VLMAX AVL of user vsetvl:
+		 1. The user vsetvl should be same block with the rvv insn.
+		 2. The user vsetvl is the only def insn of rvv insn.
+		 3. The AVL is not modified between def-use chain.
+		 4. The VL is only used by insn within EBB.
+	       */
+	      bool modified_p = false;
+	      for (insn_info *i = def_insn->next_nondebug_insn ();
+		   real_insn_and_same_bb_p (i, get_insn ()->bb ());
+		   i = i->next_nondebug_insn ())
+		{
+		  if (find_access (i->defs (), REGNO (new_info.get_avl ())))
+		    {
+		      modified_p = true;
+		      break;
+		    }
+		}
+
+	      bool has_live_out_use = false;
+	      for (use_info *use : m_avl.get_source ()->all_uses ())
+		{
+		  if (use->is_live_out_use ())
+		    {
+		      has_live_out_use = true;
+		      break;
+		    }
+		}
+	      if (!modified_p && !has_live_out_use
+		  && def_insn == m_avl.get_source ()->insn ()
+		  && m_insn->bb () == def_insn->bb ())
+		set_avl_info (new_info.get_avl_info ());
+	    }
+	}
+    }
 
   if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ())
     m_demands[DEMAND_NONZERO_AVL] = true;
diff --git a/gcc/config/riscv/riscv-vsetvl.h b/gcc/config/riscv/riscv-vsetvl.h
index 4257451bb74..87cdd2e886e 100644
--- a/gcc/config/riscv/riscv-vsetvl.h
+++ b/gcc/config/riscv/riscv-vsetvl.h
@@ -180,6 +180,7 @@ public:
   bool has_avl_reg () const { return get_value () && REG_P (get_value ()); }
   bool has_avl_no_reg () const { return !get_value (); }
   bool has_non_zero_avl () const;
+  bool has_avl () const { return get_value (); }
 };
 
 /* Basic structure to save VL/VTYPE information.  */
@@ -219,6 +220,7 @@ public:
   bool has_avl_reg () const { return m_avl.has_avl_reg (); }
   bool has_avl_no_reg () const { return m_avl.has_avl_no_reg (); }
   bool has_non_zero_avl () const { return m_avl.has_non_zero_avl (); };
+  bool has_avl () const { return m_avl.has_avl (); }
 
   rtx get_avl () const { return m_avl.get_value (); }
   const avl_info &get_avl_info () const { return m_avl; }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
index 74bbf40ee9f..e27090d79cf 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fno-tree-loop-distribute-patterns -fdump-tree-optimized-details" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fno-tree-loop-distribute-patterns -fdump-tree-optimized-details" } */
 
 #include <stdint-gcc.h>
 
@@ -20,7 +20,10 @@
   TEST_TYPE (uint32_t)                                                         \
   TEST_TYPE (int64_t)                                                          \
   TEST_TYPE (uint64_t)                                                         \
+  TEST_TYPE (_Float16)                                                         \
   TEST_TYPE (float)                                                            \
   TEST_TYPE (double)
 
 TEST_ALL ()
+
+/* { dg-final { scan-tree-dump-times "\.SELECT_VL" 11 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-2.c
new file mode 100644
index 00000000000..eac7cbc757b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-2.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=scalable -fno-schedule-insns --param riscv-autovec-lmul=m1 -O3 -ftree-vectorize" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <stdint-gcc.h>
+
+/*
+** foo:
+**	vsetivli\t[a-x0-9]+,\s*8,\s*e(8?|16?|32?|64),\s*m(1?|2?|4?|8?|f2?|f4?|f8),\s*t[au],\s*m[au]
+**	vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+**	...
+**	vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e(8?|16?|32?|64),\s*m(1?|2?|4?|8?|f2?|f4?|f8),\s*t[au],\s*m[au]
+**	add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+**	vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+**	...
+*/
+void
+foo (int32_t *__restrict a,
+     int32_t *__restrict b,
+     int32_t *__restrict cond)
+{
+  for (int i = 0; i < 8; i++)
+    if (cond[i])
+      a[i] = b[i];
+}

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [gcc(refs/vendors/riscv/heads/gcc-13-with-riscv-opts)] RISC-V: Optimize VSETVL codegen of SELECT_VL with LEN_MASK_{LOAD, STORE}
@ 2023-07-14  2:49 Jeff Law
  0 siblings, 0 replies; 2+ messages in thread
From: Jeff Law @ 2023-07-14  2:49 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7913dc223ed239bc828268d05e648398d25d2265

commit 7913dc223ed239bc828268d05e648398d25d2265
Author: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Date:   Sun Jun 25 20:20:57 2023 +0800

    RISC-V: Optimize VSETVL codegen of SELECT_VL with LEN_MASK_{LOAD, STORE}
    
    This patch is depending on LEN_MASK_{LOAD,STORE} patch:
    https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622742.html
    
    After enabling the LEN_MASK_{LOAD,STORE}, I notice that there is a case that VSETVL PASS need to be optimized:
    
    void
    f (int32_t *__restrict a,
       int32_t *__restrict b,
       int32_t *__restrict cond,
       int n)
    {
      for (int i = 0; i < 8; i++)
        if (cond[i])
          a[i] = b[i];
    }
    
    Before this patch:
    f:
            vsetivli        a5,8,e8,mf4,tu,mu   --> Propagate "8" to the following vsetvl
            vsetvli zero,a5,e32,m1,ta,ma
            vle32.v v0,0(a2)
            vsetvli a6,zero,e32,m1,ta,ma
            li      a3,8
            vmsne.vi        v0,v0,0
            vsetvli zero,a5,e32,m1,ta,ma
            vle32.v v1,0(a1),v0.t
            vse32.v v1,0(a0),v0.t
            sub     a4,a3,a5
            beq     a3,a5,.L6
            slli    a5,a5,2
            add     a2,a2,a5
            add     a1,a1,a5
            add     a0,a0,a5
            vsetvli a5,a4,e8,mf4,tu,mu     --> Propagate "a4" to the following vsetvl
            vsetvli zero,a5,e32,m1,ta,ma
            vle32.v v0,0(a2)
            vsetvli a6,zero,e32,m1,ta,ma
            vmsne.vi        v0,v0,0
            vsetvli zero,a5,e32,m1,ta,ma
            vle32.v v1,0(a1),v0.t
            vse32.v v1,0(a0),v0.t
    .L6:
            ret
    
    Current VSETLV PASS only enable AVL propagation of VLMAX AVL ("zero").
    Now, we enable AVL propagation of immediate && conservative non-VLMAX.
    
    After this patch:
    
    f:
            vsetivli        a5,8,e8,mf4,ta,ma
            vle32.v v0,0(a2)
            vsetvli a6,zero,e32,m1,ta,ma
            li      a3,8
            vmsne.vi        v0,v0,0
            vsetivli        zero,8,e32,m1,ta,ma
            vle32.v v1,0(a1),v0.t
            vse32.v v1,0(a0),v0.t
            sub     a4,a3,a5
            beq     a3,a5,.L6
            slli    a5,a5,2
            vsetvli a4,a4,e8,mf4,ta,ma
            add     a2,a2,a5
            vle32.v v0,0(a2)
            add     a1,a1,a5
            vsetvli a6,zero,e32,m1,ta,ma
            add     a0,a0,a5
            vmsne.vi        v0,v0,0
            vsetvli zero,a4,e32,m1,ta,ma
            vle32.v v1,0(a1),v0.t
            vse32.v v1,0(a0),v0.t
    .L6:
            ret
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-vsetvl.cc (vector_insn_info::parse_insn): Ehance
            AVL propagation.
            * config/riscv/riscv-vsetvl.h: New function.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/partial/select_vl-1.c: Add dump checks.
            * gcc.target/riscv/rvv/autovec/partial/select_vl-2.c: New test.

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc                   | 48 ++++++++++++++++++++--
 gcc/config/riscv/riscv-vsetvl.h                    |  2 +
 .../riscv/rvv/autovec/partial/select_vl-1.c        |  5 ++-
 .../riscv/rvv/autovec/partial/select_vl-2.c        | 25 +++++++++++
 4 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 971c3f90742..2d576e8d5c1 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -2003,9 +2003,51 @@ vector_insn_info::parse_insn (insn_info *insn)
   new_info.parse_insn (def_insn);
   if (!same_vlmax_p (new_info) && !scalar_move_insn_p (insn->rtl ()))
     return;
-  /* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl.  */
-  if (vlmax_avl_p (new_info.get_avl ()))
-    set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
+
+  if (new_info.has_avl ())
+    {
+      if (new_info.has_avl_imm ())
+	set_avl_info (avl_info (new_info.get_avl (), nullptr));
+      else
+	{
+	  if (vlmax_avl_p (new_info.get_avl ()))
+	    set_avl_info (avl_info (new_info.get_avl (), get_avl_source ()));
+	  else
+	    {
+	      /* Conservatively propagate non-VLMAX AVL of user vsetvl:
+		 1. The user vsetvl should be same block with the rvv insn.
+		 2. The user vsetvl is the only def insn of rvv insn.
+		 3. The AVL is not modified between def-use chain.
+		 4. The VL is only used by insn within EBB.
+	       */
+	      bool modified_p = false;
+	      for (insn_info *i = def_insn->next_nondebug_insn ();
+		   real_insn_and_same_bb_p (i, get_insn ()->bb ());
+		   i = i->next_nondebug_insn ())
+		{
+		  if (find_access (i->defs (), REGNO (new_info.get_avl ())))
+		    {
+		      modified_p = true;
+		      break;
+		    }
+		}
+
+	      bool has_live_out_use = false;
+	      for (use_info *use : m_avl.get_source ()->all_uses ())
+		{
+		  if (use->is_live_out_use ())
+		    {
+		      has_live_out_use = true;
+		      break;
+		    }
+		}
+	      if (!modified_p && !has_live_out_use
+		  && def_insn == m_avl.get_source ()->insn ()
+		  && m_insn->bb () == def_insn->bb ())
+		set_avl_info (new_info.get_avl_info ());
+	    }
+	}
+    }
 
   if (scalar_move_insn_p (insn->rtl ()) && m_avl.has_non_zero_avl ())
     m_demands[DEMAND_NONZERO_AVL] = true;
diff --git a/gcc/config/riscv/riscv-vsetvl.h b/gcc/config/riscv/riscv-vsetvl.h
index 4257451bb74..87cdd2e886e 100644
--- a/gcc/config/riscv/riscv-vsetvl.h
+++ b/gcc/config/riscv/riscv-vsetvl.h
@@ -180,6 +180,7 @@ public:
   bool has_avl_reg () const { return get_value () && REG_P (get_value ()); }
   bool has_avl_no_reg () const { return !get_value (); }
   bool has_non_zero_avl () const;
+  bool has_avl () const { return get_value (); }
 };
 
 /* Basic structure to save VL/VTYPE information.  */
@@ -219,6 +220,7 @@ public:
   bool has_avl_reg () const { return m_avl.has_avl_reg (); }
   bool has_avl_no_reg () const { return m_avl.has_avl_no_reg (); }
   bool has_non_zero_avl () const { return m_avl.has_non_zero_avl (); };
+  bool has_avl () const { return m_avl.has_avl (); }
 
   rtx get_avl () const { return m_avl.get_value (); }
   const avl_info &get_avl_info () const { return m_avl; }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
index 74bbf40ee9f..e27090d79cf 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fno-tree-loop-distribute-patterns -fdump-tree-optimized-details" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fno-tree-loop-distribute-patterns -fdump-tree-optimized-details" } */
 
 #include <stdint-gcc.h>
 
@@ -20,7 +20,10 @@
   TEST_TYPE (uint32_t)                                                         \
   TEST_TYPE (int64_t)                                                          \
   TEST_TYPE (uint64_t)                                                         \
+  TEST_TYPE (_Float16)                                                         \
   TEST_TYPE (float)                                                            \
   TEST_TYPE (double)
 
 TEST_ALL ()
+
+/* { dg-final { scan-tree-dump-times "\.SELECT_VL" 11 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-2.c
new file mode 100644
index 00000000000..eac7cbc757b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-2.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=scalable -fno-schedule-insns --param riscv-autovec-lmul=m1 -O3 -ftree-vectorize" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <stdint-gcc.h>
+
+/*
+** foo:
+**	vsetivli\t[a-x0-9]+,\s*8,\s*e(8?|16?|32?|64),\s*m(1?|2?|4?|8?|f2?|f4?|f8),\s*t[au],\s*m[au]
+**	vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+**	...
+**	vsetvli\t[a-x0-9]+,\s*[a-x0-9]+,\s*e(8?|16?|32?|64),\s*m(1?|2?|4?|8?|f2?|f4?|f8),\s*t[au],\s*m[au]
+**	add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+**	vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
+**	...
+*/
+void
+foo (int32_t *__restrict a,
+     int32_t *__restrict b,
+     int32_t *__restrict cond)
+{
+  for (int i = 0; i < 8; i++)
+    if (cond[i])
+      a[i] = b[i];
+}

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-07-14  2:49 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-26 20:57 [gcc(refs/vendors/riscv/heads/gcc-13-with-riscv-opts)] RISC-V: Optimize VSETVL codegen of SELECT_VL with LEN_MASK_{LOAD, STORE} Jeff Law
2023-07-14  2:49 Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).