public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
@ 2023-11-08  6:10 Juzhe-Zhong
  2023-11-08  6:16 ` Kito Cheng
  0 siblings, 1 reply; 8+ messages in thread
From: Juzhe-Zhong @ 2023-11-08  6:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong

Since our user vsetvl intrinsics are defined as just calculate the VL output
which is the number of the elements to be processed. Such intrinsics do not
have any side effects.  We should normalize them when they have same ratio.

E.g __riscv_vsetvl_e8mf8 result is same as __riscv_vsetvl_e64m1.

Normalize them can allow us have better codegen.
Consider this following example:

#include "riscv_vector.h"

void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
  
  size_t vl;
  if (cond)
    vl = __riscv_vsetvl_e32m1(avl);
  else
    vl = __riscv_vsetvl_e16mf2(avl);
  for (size_t i = 0; i < n; i += 1) {
    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
    __riscv_vse32_v_i32m1(out, c, vl);
  }
}

Before this patch:

foo:
        beq     a5,zero,.L2
        vsetvli a6,a6,e32,m1,tu,ma
.L3:
        li      a5,0
        beq     a4,zero,.L9
.L4:
        vle32.v v1,0(a0)
        addi    a5,a5,1
        vle32.v v1,0(a1)
        vle32.v v1,0(a2)
        vse32.v v1,0(a3)
        bne     a4,a5,.L4
.L9:
        ret
.L2:
        vsetvli zero,a6,e32,m1,tu,ma
        j       .L3

After this patch:

foo:
	li	a5,0
	vsetvli	zero,a6,e32,m1,tu,ma
	beq	a4,zero,.L9
.L4:
	vle32.v	v1,0(a0)
	addi	a5,a5,1
	vle32.v	v1,0(a1)
	vle32.v	v1,0(a2)
	vse32.v	v1,0(a3)
	bne	a4,a5,.L4
.L9:
	ret

	PR target/112092

gcc/ChangeLog:

	* config/riscv/riscv-vector-builtins-bases.cc: Normalize the vsetvls.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/vsetvl/pr109743-1.c: Adapt test.
	* gcc.target/riscv/rvv/vsetvl/pr109743-3.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/vsetvl-11.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/vsetvl-15.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/vsetvl-22.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c: Ditto.
	* gcc.target/riscv/rvv/vsetvl/pr112092-1.c: New test.
	* gcc.target/riscv/rvv/vsetvl/pr112092-2.c: New test.

---
 .../riscv/riscv-vector-builtins-bases.cc      | 24 +++++++++++++-----
 .../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  |  2 +-
 .../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  |  3 +--
 .../gcc.target/riscv/rvv/vsetvl/pr112092-1.c  | 25 +++++++++++++++++++
 .../gcc.target/riscv/rvv/vsetvl/pr112092-2.c  | 25 +++++++++++++++++++
 .../gcc.target/riscv/rvv/vsetvl/vsetvl-11.c   |  2 +-
 .../gcc.target/riscv/rvv/vsetvl/vsetvl-15.c   |  2 +-
 .../gcc.target/riscv/rvv/vsetvl/vsetvl-22.c   |  2 +-
 .../riscv/rvv/vsetvl/vsetvlmax-13.c           |  4 +--
 .../riscv/rvv/vsetvl/vsetvlmax-15.c           |  6 ++---
 .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c |  4 +--
 .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c |  2 +-
 .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c |  4 +--
 13 files changed, 83 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 0298b7987a1..d70468542ee 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -131,19 +131,31 @@ public:
 
     tree type = builtin_types[e.type.index].vector;
     machine_mode mode = TYPE_MODE (type);
-    machine_mode inner_mode = GET_MODE_INNER (mode);
+    /* Normalize same RATO (SEW/LMUL) into same vsetvl instruction.
+
+	 - e8,mf8/e16,mf4/e32,mf2/e64,m1 --> e8mf8
+	 - e8,mf4/e16,mf2/e32,m1/e64,m2  --> e8mf4
+	 - e8,mf2/e16,m1/e32,m2/e64,m4   --> e8mf2
+	 - e8,m1/e16,m2/e32,m4/e64,m8    --> e8m1
+	 - e8,m2/e16,m4/e32,m8           --> e8m2
+	 - e8,m4/e16,m8                  --> e8m4
+	 - e8,m8                         --> e8m8
+    */
     /* SEW.  */
-    e.add_input_operand (Pmode,
-			 gen_int_mode (GET_MODE_BITSIZE (inner_mode), Pmode));
+    e.add_input_operand (Pmode, gen_int_mode (8, Pmode));
 
     /* LMUL.  */
-    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (mode), Pmode));
+    machine_mode e8_mode
+      = get_vector_mode (QImode, GET_MODE_NUNITS (mode)).require ();
+    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (e8_mode), Pmode));
 
     /* TAIL_ANY.  */
-    e.add_input_operand (Pmode, gen_int_mode (get_prefer_tail_policy (), Pmode));
+    e.add_input_operand (Pmode,
+			 gen_int_mode (get_prefer_tail_policy (), Pmode));
 
     /* MASK_ANY.  */
-    e.add_input_operand (Pmode, gen_int_mode (get_prefer_mask_policy (), Pmode));
+    e.add_input_operand (Pmode,
+			 gen_int_mode (get_prefer_mask_policy (), Pmode));
     return e.generate_insn (code_for_vsetvl_no_side_effects (Pmode));
   }
 };
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
index f30275c8280..e927829c151 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
@@ -22,5 +22,5 @@ void f (int32_t * a, int32_t * b, int n)
       }
 }
 
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
 /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
index 5dbc871ed12..98630a1ca02 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
@@ -22,7 +22,6 @@ void f (int32_t * a, int32_t * b, int n)
       }
 }
 
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
 /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
 /* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
new file mode 100644
index 00000000000..857ab0cd1e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
+  
+  size_t vl;
+  if (cond == 1)
+    vl = __riscv_vsetvl_e32m1(avl);
+  else if (cond == 2)
+    vl = __riscv_vsetvl_e8mf4(avl);
+  else if (cond == 2)
+    vl = __riscv_vsetvl_e16mf2(avl);
+  else
+    vl = __riscv_vsetvl_e64m2(avl);
+  for (size_t i = 0; i < n; i += 1) {
+    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
+    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
+    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
+    __riscv_vse32_v_i32m1(out, c, vl);
+  }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
new file mode 100644
index 00000000000..2b0844ff977
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include "riscv_vector.h"
+
+void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond) {
+  
+  size_t vl;
+  if (cond == 1)
+    vl = __riscv_vsetvlmax_e32m1();
+  else if (cond == 2)
+    vl = __riscv_vsetvlmax_e8mf4();
+  else if (cond == 2)
+    vl = __riscv_vsetvlmax_e16mf2();
+  else
+    vl = __riscv_vsetvlmax_e64m2();
+  for (size_t i = 0; i < n; i += 1) {
+    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
+    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
+    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
+    __riscv_vse32_v_i32m1(out, c, vl);
+  }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
index 8936d3be6a7..b665bb84eb7 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
@@ -18,4 +18,4 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
   }
 }
 
-/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
index c677a81d706..a1954e7a09a 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
@@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
   }
 }
 
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
 /* { dg-final { scan-assembler-times {srli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
index 103f4238c76..98df431bced 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
@@ -18,4 +18,4 @@ void f(int8_t *base, int8_t *out, size_t vl, size_t m, size_t k) {
 
 /* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
 /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
index 1b92cb876cc..0160d732fc3 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
@@ -22,6 +22,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
 }
 
 /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
 /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
\ No newline at end of file
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
index 79a6f271997..a8497210473 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
@@ -18,6 +18,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
   }
 }
 
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
index 0a3a5a3d2d7..0e6b14ee3e9 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
@@ -17,5 +17,5 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
   }
 }
 
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
index 0ee04ee04c9..15e99537462 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
@@ -17,7 +17,7 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
   }
 }
 
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
 /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
 /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
 /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
index a0335a34645..caa2103da5d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
@@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
   }
 }
 
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
-/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
-- 
2.36.3


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
  2023-11-08  6:10 [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092] Juzhe-Zhong
@ 2023-11-08  6:16 ` Kito Cheng
  2023-11-08  6:23   ` juzhe.zhong
  2023-11-08  6:32   ` juzhe.zhong
  0 siblings, 2 replies; 8+ messages in thread
From: Kito Cheng @ 2023-11-08  6:16 UTC (permalink / raw)
  To: Juzhe-Zhong; +Cc: gcc-patches, kito.cheng, jeffreyalaw, rdapp.gcc

I thought vsetvli insertion will try to merge them into one for those
cases? Could you explain few more reasons why they are not fused now?
Not an objection since I could imageing that would be easier to
process, just wondering why.

On Wed, Nov 8, 2023 at 2:11 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
> Since our user vsetvl intrinsics are defined as just calculate the VL output
> which is the number of the elements to be processed. Such intrinsics do not
> have any side effects.  We should normalize them when they have same ratio.
>
> E.g __riscv_vsetvl_e8mf8 result is same as __riscv_vsetvl_e64m1.
>
> Normalize them can allow us have better codegen.
> Consider this following example:
>
> #include "riscv_vector.h"
>
> void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
>
>   size_t vl;
>   if (cond)
>     vl = __riscv_vsetvl_e32m1(avl);
>   else
>     vl = __riscv_vsetvl_e16mf2(avl);
>   for (size_t i = 0; i < n; i += 1) {
>     vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
>     vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
>     vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
>     __riscv_vse32_v_i32m1(out, c, vl);
>   }
> }
>
> Before this patch:
>
> foo:
>         beq     a5,zero,.L2
>         vsetvli a6,a6,e32,m1,tu,ma
> .L3:
>         li      a5,0
>         beq     a4,zero,.L9
> .L4:
>         vle32.v v1,0(a0)
>         addi    a5,a5,1
>         vle32.v v1,0(a1)
>         vle32.v v1,0(a2)
>         vse32.v v1,0(a3)
>         bne     a4,a5,.L4
> .L9:
>         ret
> .L2:
>         vsetvli zero,a6,e32,m1,tu,ma
>         j       .L3
>
> After this patch:
>
> foo:
>         li      a5,0
>         vsetvli zero,a6,e32,m1,tu,ma
>         beq     a4,zero,.L9
> .L4:
>         vle32.v v1,0(a0)
>         addi    a5,a5,1
>         vle32.v v1,0(a1)
>         vle32.v v1,0(a2)
>         vse32.v v1,0(a3)
>         bne     a4,a5,.L4
> .L9:
>         ret
>
>         PR target/112092
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-vector-builtins-bases.cc: Normalize the vsetvls.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/vsetvl/pr109743-1.c: Adapt test.
>         * gcc.target/riscv/rvv/vsetvl/pr109743-3.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvl-11.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvl-15.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvl-22.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/pr112092-1.c: New test.
>         * gcc.target/riscv/rvv/vsetvl/pr112092-2.c: New test.
>
> ---
>  .../riscv/riscv-vector-builtins-bases.cc      | 24 +++++++++++++-----
>  .../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  |  3 +--
>  .../gcc.target/riscv/rvv/vsetvl/pr112092-1.c  | 25 +++++++++++++++++++
>  .../gcc.target/riscv/rvv/vsetvl/pr112092-2.c  | 25 +++++++++++++++++++
>  .../gcc.target/riscv/rvv/vsetvl/vsetvl-11.c   |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/vsetvl-15.c   |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/vsetvl-22.c   |  2 +-
>  .../riscv/rvv/vsetvl/vsetvlmax-13.c           |  4 +--
>  .../riscv/rvv/vsetvl/vsetvlmax-15.c           |  6 ++---
>  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c |  4 +--
>  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c |  4 +--
>  13 files changed, 83 insertions(+), 22 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index 0298b7987a1..d70468542ee 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -131,19 +131,31 @@ public:
>
>      tree type = builtin_types[e.type.index].vector;
>      machine_mode mode = TYPE_MODE (type);
> -    machine_mode inner_mode = GET_MODE_INNER (mode);
> +    /* Normalize same RATO (SEW/LMUL) into same vsetvl instruction.
> +
> +        - e8,mf8/e16,mf4/e32,mf2/e64,m1 --> e8mf8
> +        - e8,mf4/e16,mf2/e32,m1/e64,m2  --> e8mf4
> +        - e8,mf2/e16,m1/e32,m2/e64,m4   --> e8mf2
> +        - e8,m1/e16,m2/e32,m4/e64,m8    --> e8m1
> +        - e8,m2/e16,m4/e32,m8           --> e8m2
> +        - e8,m4/e16,m8                  --> e8m4
> +        - e8,m8                         --> e8m8
> +    */
>      /* SEW.  */
> -    e.add_input_operand (Pmode,
> -                        gen_int_mode (GET_MODE_BITSIZE (inner_mode), Pmode));
> +    e.add_input_operand (Pmode, gen_int_mode (8, Pmode));
>
>      /* LMUL.  */
> -    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (mode), Pmode));
> +    machine_mode e8_mode
> +      = get_vector_mode (QImode, GET_MODE_NUNITS (mode)).require ();
> +    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (e8_mode), Pmode));
>
>      /* TAIL_ANY.  */
> -    e.add_input_operand (Pmode, gen_int_mode (get_prefer_tail_policy (), Pmode));
> +    e.add_input_operand (Pmode,
> +                        gen_int_mode (get_prefer_tail_policy (), Pmode));
>
>      /* MASK_ANY.  */
> -    e.add_input_operand (Pmode, gen_int_mode (get_prefer_mask_policy (), Pmode));
> +    e.add_input_operand (Pmode,
> +                        gen_int_mode (get_prefer_mask_policy (), Pmode));
>      return e.generate_insn (code_for_vsetvl_no_side_effects (Pmode));
>    }
>  };
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> index f30275c8280..e927829c151 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> @@ -22,5 +22,5 @@ void f (int32_t * a, int32_t * b, int n)
>        }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> index 5dbc871ed12..98630a1ca02 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> @@ -22,7 +22,6 @@ void f (int32_t * a, int32_t * b, int n)
>        }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> new file mode 100644
> index 00000000000..857ab0cd1e6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
> +
> +  size_t vl;
> +  if (cond == 1)
> +    vl = __riscv_vsetvl_e32m1(avl);
> +  else if (cond == 2)
> +    vl = __riscv_vsetvl_e8mf4(avl);
> +  else if (cond == 2)
> +    vl = __riscv_vsetvl_e16mf2(avl);
> +  else
> +    vl = __riscv_vsetvl_e64m2(avl);
> +  for (size_t i = 0; i < n; i += 1) {
> +    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> +    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> +    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> +    __riscv_vse32_v_i32m1(out, c, vl);
> +  }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> new file mode 100644
> index 00000000000..2b0844ff977
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond) {
> +
> +  size_t vl;
> +  if (cond == 1)
> +    vl = __riscv_vsetvlmax_e32m1();
> +  else if (cond == 2)
> +    vl = __riscv_vsetvlmax_e8mf4();
> +  else if (cond == 2)
> +    vl = __riscv_vsetvlmax_e16mf2();
> +  else
> +    vl = __riscv_vsetvlmax_e64m2();
> +  for (size_t i = 0; i < n; i += 1) {
> +    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> +    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> +    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> +    __riscv_vse32_v_i32m1(out, c, vl);
> +  }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> index 8936d3be6a7..b665bb84eb7 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> @@ -18,4 +18,4 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> index c677a81d706..a1954e7a09a 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> @@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {srli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> index 103f4238c76..98df431bced 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> @@ -18,4 +18,4 @@ void f(int8_t *base, int8_t *out, size_t vl, size_t m, size_t k) {
>
>  /* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> index 1b92cb876cc..0160d732fc3 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> @@ -22,6 +22,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>  }
>
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> \ No newline at end of file
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> index 79a6f271997..a8497210473 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> @@ -18,6 +18,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> index 0a3a5a3d2d7..0e6b14ee3e9 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> @@ -17,5 +17,5 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> index 0ee04ee04c9..15e99537462 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> @@ -17,7 +17,7 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> index a0335a34645..caa2103da5d 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> @@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> --
> 2.36.3
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
  2023-11-08  6:16 ` Kito Cheng
@ 2023-11-08  6:23   ` juzhe.zhong
  2023-11-08  6:32   ` juzhe.zhong
  1 sibling, 0 replies; 8+ messages in thread
From: juzhe.zhong @ 2023-11-08  6:23 UTC (permalink / raw)
  To: kito.cheng; +Cc: gcc-patches, Kito.cheng, jeffreyalaw, Robin Dapp

[-- Attachment #1: Type: text/plain, Size: 20701 bytes --]

before VSETVL PASS. The code is as follows:

bb 1:
vsetvli e16mf2 -> set a6
bb 2:
vsetvli e32m1 -> set a6
bb 3:
...
vle (use a6) e32m1 TU
vle (use a6) e32m1 TU
vse (use a6) e32m1 TU

VSETVL PASS only do  VSETVL information fusion, it doesn't do the CFG block fusion.

VSETVL PASS succeed on following fusion:

Change bb 1 vsetvli e16mf2 -> e32m1TU
Change bb 2 vsetvli e32m1 -> e32m1TU

But VSETVL pass can't remove bb1 and bb2, can create a new block said bb 4 to hold vsetvli e32m1TU

So you will see:
bb 1:
vsetvli e32m1TU
bb 2:
vsetvli e32m1TU
bb 3:
...
vle
vle
vse

with this patch, since vsetvl e16mf2 and vsetvl e32m1 are normalized into same vsetvl e8mf4
Then, the before the VSETVL PASS, we will see:

bb 1
vsetvli e8mf4
bb 2:
...
vle
vle
vse

Since the later vle/vle/vse is using e32m1TU, then VSETVL fuse them into bb1 change vsetvli e8mf4 into:

bb 1
vsetvli e32m1TU
bb 2:
...
vle
vle
vse


juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-11-08 14:16
To: Juzhe-Zhong
CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
Subject: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
I thought vsetvli insertion will try to merge them into one for those
cases? Could you explain few more reasons why they are not fused now?
Not an objection since I could imageing that would be easier to
process, just wondering why.
 
On Wed, Nov 8, 2023 at 2:11 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
> Since our user vsetvl intrinsics are defined as just calculate the VL output
> which is the number of the elements to be processed. Such intrinsics do not
> have any side effects.  We should normalize them when they have same ratio.
>
> E.g __riscv_vsetvl_e8mf8 result is same as __riscv_vsetvl_e64m1.
>
> Normalize them can allow us have better codegen.
> Consider this following example:
>
> #include "riscv_vector.h"
>
> void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
>
>   size_t vl;
>   if (cond)
>     vl = __riscv_vsetvl_e32m1(avl);
>   else
>     vl = __riscv_vsetvl_e16mf2(avl);
>   for (size_t i = 0; i < n; i += 1) {
>     vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
>     vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
>     vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
>     __riscv_vse32_v_i32m1(out, c, vl);
>   }
> }
>
> Before this patch:
>
> foo:
>         beq     a5,zero,.L2
>         vsetvli a6,a6,e32,m1,tu,ma
> .L3:
>         li      a5,0
>         beq     a4,zero,.L9
> .L4:
>         vle32.v v1,0(a0)
>         addi    a5,a5,1
>         vle32.v v1,0(a1)
>         vle32.v v1,0(a2)
>         vse32.v v1,0(a3)
>         bne     a4,a5,.L4
> .L9:
>         ret
> .L2:
>         vsetvli zero,a6,e32,m1,tu,ma
>         j       .L3
>
> After this patch:
>
> foo:
>         li      a5,0
>         vsetvli zero,a6,e32,m1,tu,ma
>         beq     a4,zero,.L9
> .L4:
>         vle32.v v1,0(a0)
>         addi    a5,a5,1
>         vle32.v v1,0(a1)
>         vle32.v v1,0(a2)
>         vse32.v v1,0(a3)
>         bne     a4,a5,.L4
> .L9:
>         ret
>
>         PR target/112092
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-vector-builtins-bases.cc: Normalize the vsetvls.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/vsetvl/pr109743-1.c: Adapt test.
>         * gcc.target/riscv/rvv/vsetvl/pr109743-3.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvl-11.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvl-15.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvl-22.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/pr112092-1.c: New test.
>         * gcc.target/riscv/rvv/vsetvl/pr112092-2.c: New test.
>
> ---
>  .../riscv/riscv-vector-builtins-bases.cc      | 24 +++++++++++++-----
>  .../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  |  3 +--
>  .../gcc.target/riscv/rvv/vsetvl/pr112092-1.c  | 25 +++++++++++++++++++
>  .../gcc.target/riscv/rvv/vsetvl/pr112092-2.c  | 25 +++++++++++++++++++
>  .../gcc.target/riscv/rvv/vsetvl/vsetvl-11.c   |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/vsetvl-15.c   |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/vsetvl-22.c   |  2 +-
>  .../riscv/rvv/vsetvl/vsetvlmax-13.c           |  4 +--
>  .../riscv/rvv/vsetvl/vsetvlmax-15.c           |  6 ++---
>  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c |  4 +--
>  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c |  4 +--
>  13 files changed, 83 insertions(+), 22 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index 0298b7987a1..d70468542ee 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -131,19 +131,31 @@ public:
>
>      tree type = builtin_types[e.type.index].vector;
>      machine_mode mode = TYPE_MODE (type);
> -    machine_mode inner_mode = GET_MODE_INNER (mode);
> +    /* Normalize same RATO (SEW/LMUL) into same vsetvl instruction.
> +
> +        - e8,mf8/e16,mf4/e32,mf2/e64,m1 --> e8mf8
> +        - e8,mf4/e16,mf2/e32,m1/e64,m2  --> e8mf4
> +        - e8,mf2/e16,m1/e32,m2/e64,m4   --> e8mf2
> +        - e8,m1/e16,m2/e32,m4/e64,m8    --> e8m1
> +        - e8,m2/e16,m4/e32,m8           --> e8m2
> +        - e8,m4/e16,m8                  --> e8m4
> +        - e8,m8                         --> e8m8
> +    */
>      /* SEW.  */
> -    e.add_input_operand (Pmode,
> -                        gen_int_mode (GET_MODE_BITSIZE (inner_mode), Pmode));
> +    e.add_input_operand (Pmode, gen_int_mode (8, Pmode));
>
>      /* LMUL.  */
> -    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (mode), Pmode));
> +    machine_mode e8_mode
> +      = get_vector_mode (QImode, GET_MODE_NUNITS (mode)).require ();
> +    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (e8_mode), Pmode));
>
>      /* TAIL_ANY.  */
> -    e.add_input_operand (Pmode, gen_int_mode (get_prefer_tail_policy (), Pmode));
> +    e.add_input_operand (Pmode,
> +                        gen_int_mode (get_prefer_tail_policy (), Pmode));
>
>      /* MASK_ANY.  */
> -    e.add_input_operand (Pmode, gen_int_mode (get_prefer_mask_policy (), Pmode));
> +    e.add_input_operand (Pmode,
> +                        gen_int_mode (get_prefer_mask_policy (), Pmode));
>      return e.generate_insn (code_for_vsetvl_no_side_effects (Pmode));
>    }
>  };
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> index f30275c8280..e927829c151 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> @@ -22,5 +22,5 @@ void f (int32_t * a, int32_t * b, int n)
>        }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> index 5dbc871ed12..98630a1ca02 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> @@ -22,7 +22,6 @@ void f (int32_t * a, int32_t * b, int n)
>        }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> new file mode 100644
> index 00000000000..857ab0cd1e6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
> +
> +  size_t vl;
> +  if (cond == 1)
> +    vl = __riscv_vsetvl_e32m1(avl);
> +  else if (cond == 2)
> +    vl = __riscv_vsetvl_e8mf4(avl);
> +  else if (cond == 2)
> +    vl = __riscv_vsetvl_e16mf2(avl);
> +  else
> +    vl = __riscv_vsetvl_e64m2(avl);
> +  for (size_t i = 0; i < n; i += 1) {
> +    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> +    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> +    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> +    __riscv_vse32_v_i32m1(out, c, vl);
> +  }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> new file mode 100644
> index 00000000000..2b0844ff977
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond) {
> +
> +  size_t vl;
> +  if (cond == 1)
> +    vl = __riscv_vsetvlmax_e32m1();
> +  else if (cond == 2)
> +    vl = __riscv_vsetvlmax_e8mf4();
> +  else if (cond == 2)
> +    vl = __riscv_vsetvlmax_e16mf2();
> +  else
> +    vl = __riscv_vsetvlmax_e64m2();
> +  for (size_t i = 0; i < n; i += 1) {
> +    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> +    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> +    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> +    __riscv_vse32_v_i32m1(out, c, vl);
> +  }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> index 8936d3be6a7..b665bb84eb7 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> @@ -18,4 +18,4 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> index c677a81d706..a1954e7a09a 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> @@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {srli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> index 103f4238c76..98df431bced 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> @@ -18,4 +18,4 @@ void f(int8_t *base, int8_t *out, size_t vl, size_t m, size_t k) {
>
>  /* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> index 1b92cb876cc..0160d732fc3 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> @@ -22,6 +22,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>  }
>
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> \ No newline at end of file
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> index 79a6f271997..a8497210473 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> @@ -18,6 +18,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> index 0a3a5a3d2d7..0e6b14ee3e9 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> @@ -17,5 +17,5 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> index 0ee04ee04c9..15e99537462 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> @@ -17,7 +17,7 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> index a0335a34645..caa2103da5d 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> @@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> --
> 2.36.3
>
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
  2023-11-08  6:16 ` Kito Cheng
  2023-11-08  6:23   ` juzhe.zhong
@ 2023-11-08  6:32   ` juzhe.zhong
  2023-11-08  6:33     ` Kito Cheng
  1 sibling, 1 reply; 8+ messages in thread
From: juzhe.zhong @ 2023-11-08  6:32 UTC (permalink / raw)
  To: kito.cheng; +Cc: gcc-patches, Kito.cheng, jeffreyalaw, Robin Dapp

[-- Attachment #1: Type: text/plain, Size: 20118 bytes --]

More details:

bb 1   bb 2
  \    /
   bb 3

VSETVL PASS can only do VSETVL demand fusion, fuse demand from bb 3 to bb 1, and fuse demand from bb 3 to bb2.
We are not able to remove block bb 1 and bb 2 and create new bb 4 to hold the vsetvl if bb 1 and bb 2 has the same vsetvl:

bb 4 (new block)
  |
bb 3

I don't think we should do this on VSETVL PASS.


juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-11-08 14:16
To: Juzhe-Zhong
CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
Subject: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
I thought vsetvli insertion will try to merge them into one for those
cases? Could you explain few more reasons why they are not fused now?
Not an objection since I could imageing that would be easier to
process, just wondering why.
 
On Wed, Nov 8, 2023 at 2:11 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
> Since our user vsetvl intrinsics are defined as just calculate the VL output
> which is the number of the elements to be processed. Such intrinsics do not
> have any side effects.  We should normalize them when they have same ratio.
>
> E.g __riscv_vsetvl_e8mf8 result is same as __riscv_vsetvl_e64m1.
>
> Normalize them can allow us have better codegen.
> Consider this following example:
>
> #include "riscv_vector.h"
>
> void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
>
>   size_t vl;
>   if (cond)
>     vl = __riscv_vsetvl_e32m1(avl);
>   else
>     vl = __riscv_vsetvl_e16mf2(avl);
>   for (size_t i = 0; i < n; i += 1) {
>     vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
>     vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
>     vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
>     __riscv_vse32_v_i32m1(out, c, vl);
>   }
> }
>
> Before this patch:
>
> foo:
>         beq     a5,zero,.L2
>         vsetvli a6,a6,e32,m1,tu,ma
> .L3:
>         li      a5,0
>         beq     a4,zero,.L9
> .L4:
>         vle32.v v1,0(a0)
>         addi    a5,a5,1
>         vle32.v v1,0(a1)
>         vle32.v v1,0(a2)
>         vse32.v v1,0(a3)
>         bne     a4,a5,.L4
> .L9:
>         ret
> .L2:
>         vsetvli zero,a6,e32,m1,tu,ma
>         j       .L3
>
> After this patch:
>
> foo:
>         li      a5,0
>         vsetvli zero,a6,e32,m1,tu,ma
>         beq     a4,zero,.L9
> .L4:
>         vle32.v v1,0(a0)
>         addi    a5,a5,1
>         vle32.v v1,0(a1)
>         vle32.v v1,0(a2)
>         vse32.v v1,0(a3)
>         bne     a4,a5,.L4
> .L9:
>         ret
>
>         PR target/112092
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv-vector-builtins-bases.cc: Normalize the vsetvls.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/vsetvl/pr109743-1.c: Adapt test.
>         * gcc.target/riscv/rvv/vsetvl/pr109743-3.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvl-11.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvl-15.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvl-22.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c: Ditto.
>         * gcc.target/riscv/rvv/vsetvl/pr112092-1.c: New test.
>         * gcc.target/riscv/rvv/vsetvl/pr112092-2.c: New test.
>
> ---
>  .../riscv/riscv-vector-builtins-bases.cc      | 24 +++++++++++++-----
>  .../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  |  3 +--
>  .../gcc.target/riscv/rvv/vsetvl/pr112092-1.c  | 25 +++++++++++++++++++
>  .../gcc.target/riscv/rvv/vsetvl/pr112092-2.c  | 25 +++++++++++++++++++
>  .../gcc.target/riscv/rvv/vsetvl/vsetvl-11.c   |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/vsetvl-15.c   |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/vsetvl-22.c   |  2 +-
>  .../riscv/rvv/vsetvl/vsetvlmax-13.c           |  4 +--
>  .../riscv/rvv/vsetvl/vsetvlmax-15.c           |  6 ++---
>  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c |  4 +--
>  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c |  2 +-
>  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c |  4 +--
>  13 files changed, 83 insertions(+), 22 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index 0298b7987a1..d70468542ee 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -131,19 +131,31 @@ public:
>
>      tree type = builtin_types[e.type.index].vector;
>      machine_mode mode = TYPE_MODE (type);
> -    machine_mode inner_mode = GET_MODE_INNER (mode);
> +    /* Normalize same RATO (SEW/LMUL) into same vsetvl instruction.
> +
> +        - e8,mf8/e16,mf4/e32,mf2/e64,m1 --> e8mf8
> +        - e8,mf4/e16,mf2/e32,m1/e64,m2  --> e8mf4
> +        - e8,mf2/e16,m1/e32,m2/e64,m4   --> e8mf2
> +        - e8,m1/e16,m2/e32,m4/e64,m8    --> e8m1
> +        - e8,m2/e16,m4/e32,m8           --> e8m2
> +        - e8,m4/e16,m8                  --> e8m4
> +        - e8,m8                         --> e8m8
> +    */
>      /* SEW.  */
> -    e.add_input_operand (Pmode,
> -                        gen_int_mode (GET_MODE_BITSIZE (inner_mode), Pmode));
> +    e.add_input_operand (Pmode, gen_int_mode (8, Pmode));
>
>      /* LMUL.  */
> -    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (mode), Pmode));
> +    machine_mode e8_mode
> +      = get_vector_mode (QImode, GET_MODE_NUNITS (mode)).require ();
> +    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (e8_mode), Pmode));
>
>      /* TAIL_ANY.  */
> -    e.add_input_operand (Pmode, gen_int_mode (get_prefer_tail_policy (), Pmode));
> +    e.add_input_operand (Pmode,
> +                        gen_int_mode (get_prefer_tail_policy (), Pmode));
>
>      /* MASK_ANY.  */
> -    e.add_input_operand (Pmode, gen_int_mode (get_prefer_mask_policy (), Pmode));
> +    e.add_input_operand (Pmode,
> +                        gen_int_mode (get_prefer_mask_policy (), Pmode));
>      return e.generate_insn (code_for_vsetvl_no_side_effects (Pmode));
>    }
>  };
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> index f30275c8280..e927829c151 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> @@ -22,5 +22,5 @@ void f (int32_t * a, int32_t * b, int n)
>        }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> index 5dbc871ed12..98630a1ca02 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> @@ -22,7 +22,6 @@ void f (int32_t * a, int32_t * b, int n)
>        }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> new file mode 100644
> index 00000000000..857ab0cd1e6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
> +
> +  size_t vl;
> +  if (cond == 1)
> +    vl = __riscv_vsetvl_e32m1(avl);
> +  else if (cond == 2)
> +    vl = __riscv_vsetvl_e8mf4(avl);
> +  else if (cond == 2)
> +    vl = __riscv_vsetvl_e16mf2(avl);
> +  else
> +    vl = __riscv_vsetvl_e64m2(avl);
> +  for (size_t i = 0; i < n; i += 1) {
> +    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> +    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> +    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> +    __riscv_vse32_v_i32m1(out, c, vl);
> +  }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> new file mode 100644
> index 00000000000..2b0844ff977
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond) {
> +
> +  size_t vl;
> +  if (cond == 1)
> +    vl = __riscv_vsetvlmax_e32m1();
> +  else if (cond == 2)
> +    vl = __riscv_vsetvlmax_e8mf4();
> +  else if (cond == 2)
> +    vl = __riscv_vsetvlmax_e16mf2();
> +  else
> +    vl = __riscv_vsetvlmax_e64m2();
> +  for (size_t i = 0; i < n; i += 1) {
> +    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> +    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> +    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> +    __riscv_vse32_v_i32m1(out, c, vl);
> +  }
> +}
> +
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> index 8936d3be6a7..b665bb84eb7 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> @@ -18,4 +18,4 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> index c677a81d706..a1954e7a09a 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> @@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {srli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> index 103f4238c76..98df431bced 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> @@ -18,4 +18,4 @@ void f(int8_t *base, int8_t *out, size_t vl, size_t m, size_t k) {
>
>  /* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> index 1b92cb876cc..0160d732fc3 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> @@ -22,6 +22,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>  }
>
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> \ No newline at end of file
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> index 79a6f271997..a8497210473 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> @@ -18,6 +18,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> index 0a3a5a3d2d7..0e6b14ee3e9 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> @@ -17,5 +17,5 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> index 0ee04ee04c9..15e99537462 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> @@ -17,7 +17,7 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
>  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> index a0335a34645..caa2103da5d 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> @@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
>    }
>  }
>
> -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> --
> 2.36.3
>
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
  2023-11-08  6:32   ` juzhe.zhong
@ 2023-11-08  6:33     ` Kito Cheng
  2023-11-08  6:36       ` juzhe.zhong
  2023-12-04 17:14       ` Maciej W. Rozycki
  0 siblings, 2 replies; 8+ messages in thread
From: Kito Cheng @ 2023-11-08  6:33 UTC (permalink / raw)
  To: juzhe.zhong; +Cc: gcc-patches, Kito.cheng, jeffreyalaw, Robin Dapp

OK, then LGTM, thanks for the explanation :)

On Wed, Nov 8, 2023 at 2:33 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> More details:
>
> bb 1   bb 2
>   \    /
>    bb 3
>
> VSETVL PASS can only do VSETVL demand fusion, fuse demand from bb 3 to bb 1, and fuse demand from bb 3 to bb2.
> We are not able to remove block bb 1 and bb 2 and create new bb 4 to hold the vsetvl if bb 1 and bb 2 has the same vsetvl:
>
> bb 4 (new block)
>   |
> bb 3
>
> I don't think we should do this on VSETVL PASS.
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-11-08 14:16
> To: Juzhe-Zhong
> CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
> Subject: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
> I thought vsetvli insertion will try to merge them into one for those
> cases? Could you explain few more reasons why they are not fused now?
> Not an objection since I could imageing that would be easier to
> process, just wondering why.
>
> On Wed, Nov 8, 2023 at 2:11 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
> >
> > Since our user vsetvl intrinsics are defined as just calculate the VL output
> > which is the number of the elements to be processed. Such intrinsics do not
> > have any side effects.  We should normalize them when they have same ratio.
> >
> > E.g __riscv_vsetvl_e8mf8 result is same as __riscv_vsetvl_e64m1.
> >
> > Normalize them can allow us have better codegen.
> > Consider this following example:
> >
> > #include "riscv_vector.h"
> >
> > void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
> >
> >   size_t vl;
> >   if (cond)
> >     vl = __riscv_vsetvl_e32m1(avl);
> >   else
> >     vl = __riscv_vsetvl_e16mf2(avl);
> >   for (size_t i = 0; i < n; i += 1) {
> >     vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> >     vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> >     vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> >     __riscv_vse32_v_i32m1(out, c, vl);
> >   }
> > }
> >
> > Before this patch:
> >
> > foo:
> >         beq     a5,zero,.L2
> >         vsetvli a6,a6,e32,m1,tu,ma
> > .L3:
> >         li      a5,0
> >         beq     a4,zero,.L9
> > .L4:
> >         vle32.v v1,0(a0)
> >         addi    a5,a5,1
> >         vle32.v v1,0(a1)
> >         vle32.v v1,0(a2)
> >         vse32.v v1,0(a3)
> >         bne     a4,a5,.L4
> > .L9:
> >         ret
> > .L2:
> >         vsetvli zero,a6,e32,m1,tu,ma
> >         j       .L3
> >
> > After this patch:
> >
> > foo:
> >         li      a5,0
> >         vsetvli zero,a6,e32,m1,tu,ma
> >         beq     a4,zero,.L9
> > .L4:
> >         vle32.v v1,0(a0)
> >         addi    a5,a5,1
> >         vle32.v v1,0(a1)
> >         vle32.v v1,0(a2)
> >         vse32.v v1,0(a3)
> >         bne     a4,a5,.L4
> > .L9:
> >         ret
> >
> >         PR target/112092
> >
> > gcc/ChangeLog:
> >
> >         * config/riscv/riscv-vector-builtins-bases.cc: Normalize the vsetvls.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/riscv/rvv/vsetvl/pr109743-1.c: Adapt test.
> >         * gcc.target/riscv/rvv/vsetvl/pr109743-3.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvl-11.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvl-15.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvl-22.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/pr112092-1.c: New test.
> >         * gcc.target/riscv/rvv/vsetvl/pr112092-2.c: New test.
> >
> > ---
> >  .../riscv/riscv-vector-builtins-bases.cc      | 24 +++++++++++++-----
> >  .../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  |  2 +-
> >  .../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  |  3 +--
> >  .../gcc.target/riscv/rvv/vsetvl/pr112092-1.c  | 25 +++++++++++++++++++
> >  .../gcc.target/riscv/rvv/vsetvl/pr112092-2.c  | 25 +++++++++++++++++++
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvl-11.c   |  2 +-
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvl-15.c   |  2 +-
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvl-22.c   |  2 +-
> >  .../riscv/rvv/vsetvl/vsetvlmax-13.c           |  4 +--
> >  .../riscv/rvv/vsetvl/vsetvlmax-15.c           |  6 ++---
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c |  4 +--
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c |  2 +-
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c |  4 +--
> >  13 files changed, 83 insertions(+), 22 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> >
> > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> > index 0298b7987a1..d70468542ee 100644
> > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> > @@ -131,19 +131,31 @@ public:
> >
> >      tree type = builtin_types[e.type.index].vector;
> >      machine_mode mode = TYPE_MODE (type);
> > -    machine_mode inner_mode = GET_MODE_INNER (mode);
> > +    /* Normalize same RATO (SEW/LMUL) into same vsetvl instruction.
> > +
> > +        - e8,mf8/e16,mf4/e32,mf2/e64,m1 --> e8mf8
> > +        - e8,mf4/e16,mf2/e32,m1/e64,m2  --> e8mf4
> > +        - e8,mf2/e16,m1/e32,m2/e64,m4   --> e8mf2
> > +        - e8,m1/e16,m2/e32,m4/e64,m8    --> e8m1
> > +        - e8,m2/e16,m4/e32,m8           --> e8m2
> > +        - e8,m4/e16,m8                  --> e8m4
> > +        - e8,m8                         --> e8m8
> > +    */
> >      /* SEW.  */
> > -    e.add_input_operand (Pmode,
> > -                        gen_int_mode (GET_MODE_BITSIZE (inner_mode), Pmode));
> > +    e.add_input_operand (Pmode, gen_int_mode (8, Pmode));
> >
> >      /* LMUL.  */
> > -    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (mode), Pmode));
> > +    machine_mode e8_mode
> > +      = get_vector_mode (QImode, GET_MODE_NUNITS (mode)).require ();
> > +    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (e8_mode), Pmode));
> >
> >      /* TAIL_ANY.  */
> > -    e.add_input_operand (Pmode, gen_int_mode (get_prefer_tail_policy (), Pmode));
> > +    e.add_input_operand (Pmode,
> > +                        gen_int_mode (get_prefer_tail_policy (), Pmode));
> >
> >      /* MASK_ANY.  */
> > -    e.add_input_operand (Pmode, gen_int_mode (get_prefer_mask_policy (), Pmode));
> > +    e.add_input_operand (Pmode,
> > +                        gen_int_mode (get_prefer_mask_policy (), Pmode));
> >      return e.generate_insn (code_for_vsetvl_no_side_effects (Pmode));
> >    }
> >  };
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> > index f30275c8280..e927829c151 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> > @@ -22,5 +22,5 @@ void f (int32_t * a, int32_t * b, int n)
> >        }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> > index 5dbc871ed12..98630a1ca02 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> > @@ -22,7 +22,6 @@ void f (int32_t * a, int32_t * b, int n)
> >        }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> > new file mode 100644
> > index 00000000000..857ab0cd1e6
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> > @@ -0,0 +1,25 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
> > +
> > +#include "riscv_vector.h"
> > +
> > +void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
> > +
> > +  size_t vl;
> > +  if (cond == 1)
> > +    vl = __riscv_vsetvl_e32m1(avl);
> > +  else if (cond == 2)
> > +    vl = __riscv_vsetvl_e8mf4(avl);
> > +  else if (cond == 2)
> > +    vl = __riscv_vsetvl_e16mf2(avl);
> > +  else
> > +    vl = __riscv_vsetvl_e64m2(avl);
> > +  for (size_t i = 0; i < n; i += 1) {
> > +    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> > +    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> > +    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> > +    __riscv_vse32_v_i32m1(out, c, vl);
> > +  }
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> > new file mode 100644
> > index 00000000000..2b0844ff977
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> > @@ -0,0 +1,25 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
> > +
> > +#include "riscv_vector.h"
> > +
> > +void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond) {
> > +
> > +  size_t vl;
> > +  if (cond == 1)
> > +    vl = __riscv_vsetvlmax_e32m1();
> > +  else if (cond == 2)
> > +    vl = __riscv_vsetvlmax_e8mf4();
> > +  else if (cond == 2)
> > +    vl = __riscv_vsetvlmax_e16mf2();
> > +  else
> > +    vl = __riscv_vsetvlmax_e64m2();
> > +  for (size_t i = 0; i < n; i += 1) {
> > +    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> > +    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> > +    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> > +    __riscv_vse32_v_i32m1(out, c, vl);
> > +  }
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> > index 8936d3be6a7..b665bb84eb7 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> > @@ -18,4 +18,4 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> > index c677a81d706..a1954e7a09a 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> > @@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {srli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> > index 103f4238c76..98df431bced 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> > @@ -18,4 +18,4 @@ void f(int8_t *base, int8_t *out, size_t vl, size_t m, size_t k) {
> >
> >  /* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> > index 1b92cb876cc..0160d732fc3 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> > @@ -22,6 +22,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
> >  }
> >
> >  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > \ No newline at end of file
> > +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> > index 79a6f271997..a8497210473 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> > @@ -18,6 +18,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> > index 0a3a5a3d2d7..0e6b14ee3e9 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> > @@ -17,5 +17,5 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> > index 0ee04ee04c9..15e99537462 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> > @@ -17,7 +17,7 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> > index a0335a34645..caa2103da5d 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> > @@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > --
> > 2.36.3
> >
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
  2023-11-08  6:33     ` Kito Cheng
@ 2023-11-08  6:36       ` juzhe.zhong
  2023-11-08  6:52         ` Kito Cheng
  2023-12-04 17:14       ` Maciej W. Rozycki
  1 sibling, 1 reply; 8+ messages in thread
From: juzhe.zhong @ 2023-11-08  6:36 UTC (permalink / raw)
  To: kito.cheng; +Cc: gcc-patches, Kito.cheng, jeffreyalaw, Robin Dapp

[-- Attachment #1: Type: text/plain, Size: 21639 bytes --]

Another question raise to me.

Is it necessary we have such many variant of vsetvls?

I am thinking about redesign:

__riscv_vsetvl_e8mf8
__riscv_vsetvl_e16mf4
__riscv_vsetvl_e32mf2
__riscv_vsetvl_e64m1

They are quite redundant. They have the same result.

May be just design as :

__riscv_vsetvl_ratio64

I am no proposing it since it has been used for a long time. Just raise my concern.



juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-11-08 14:33
To: juzhe.zhong@rivai.ai
CC: gcc-patches; Kito.cheng; jeffreyalaw; Robin Dapp
Subject: Re: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
OK, then LGTM, thanks for the explanation :)
 
On Wed, Nov 8, 2023 at 2:33 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> More details:
>
> bb 1   bb 2
>   \    /
>    bb 3
>
> VSETVL PASS can only do VSETVL demand fusion, fuse demand from bb 3 to bb 1, and fuse demand from bb 3 to bb2.
> We are not able to remove block bb 1 and bb 2 and create new bb 4 to hold the vsetvl if bb 1 and bb 2 has the same vsetvl:
>
> bb 4 (new block)
>   |
> bb 3
>
> I don't think we should do this on VSETVL PASS.
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-11-08 14:16
> To: Juzhe-Zhong
> CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
> Subject: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
> I thought vsetvli insertion will try to merge them into one for those
> cases? Could you explain few more reasons why they are not fused now?
> Not an objection since I could imageing that would be easier to
> process, just wondering why.
>
> On Wed, Nov 8, 2023 at 2:11 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
> >
> > Since our user vsetvl intrinsics are defined as just calculate the VL output
> > which is the number of the elements to be processed. Such intrinsics do not
> > have any side effects.  We should normalize them when they have same ratio.
> >
> > E.g __riscv_vsetvl_e8mf8 result is same as __riscv_vsetvl_e64m1.
> >
> > Normalize them can allow us have better codegen.
> > Consider this following example:
> >
> > #include "riscv_vector.h"
> >
> > void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
> >
> >   size_t vl;
> >   if (cond)
> >     vl = __riscv_vsetvl_e32m1(avl);
> >   else
> >     vl = __riscv_vsetvl_e16mf2(avl);
> >   for (size_t i = 0; i < n; i += 1) {
> >     vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> >     vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> >     vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> >     __riscv_vse32_v_i32m1(out, c, vl);
> >   }
> > }
> >
> > Before this patch:
> >
> > foo:
> >         beq     a5,zero,.L2
> >         vsetvli a6,a6,e32,m1,tu,ma
> > .L3:
> >         li      a5,0
> >         beq     a4,zero,.L9
> > .L4:
> >         vle32.v v1,0(a0)
> >         addi    a5,a5,1
> >         vle32.v v1,0(a1)
> >         vle32.v v1,0(a2)
> >         vse32.v v1,0(a3)
> >         bne     a4,a5,.L4
> > .L9:
> >         ret
> > .L2:
> >         vsetvli zero,a6,e32,m1,tu,ma
> >         j       .L3
> >
> > After this patch:
> >
> > foo:
> >         li      a5,0
> >         vsetvli zero,a6,e32,m1,tu,ma
> >         beq     a4,zero,.L9
> > .L4:
> >         vle32.v v1,0(a0)
> >         addi    a5,a5,1
> >         vle32.v v1,0(a1)
> >         vle32.v v1,0(a2)
> >         vse32.v v1,0(a3)
> >         bne     a4,a5,.L4
> > .L9:
> >         ret
> >
> >         PR target/112092
> >
> > gcc/ChangeLog:
> >
> >         * config/riscv/riscv-vector-builtins-bases.cc: Normalize the vsetvls.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/riscv/rvv/vsetvl/pr109743-1.c: Adapt test.
> >         * gcc.target/riscv/rvv/vsetvl/pr109743-3.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvl-11.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvl-15.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvl-22.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c: Ditto.
> >         * gcc.target/riscv/rvv/vsetvl/pr112092-1.c: New test.
> >         * gcc.target/riscv/rvv/vsetvl/pr112092-2.c: New test.
> >
> > ---
> >  .../riscv/riscv-vector-builtins-bases.cc      | 24 +++++++++++++-----
> >  .../gcc.target/riscv/rvv/vsetvl/pr109743-1.c  |  2 +-
> >  .../gcc.target/riscv/rvv/vsetvl/pr109743-3.c  |  3 +--
> >  .../gcc.target/riscv/rvv/vsetvl/pr112092-1.c  | 25 +++++++++++++++++++
> >  .../gcc.target/riscv/rvv/vsetvl/pr112092-2.c  | 25 +++++++++++++++++++
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvl-11.c   |  2 +-
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvl-15.c   |  2 +-
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvl-22.c   |  2 +-
> >  .../riscv/rvv/vsetvl/vsetvlmax-13.c           |  4 +--
> >  .../riscv/rvv/vsetvl/vsetvlmax-15.c           |  6 ++---
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c |  4 +--
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c |  2 +-
> >  .../gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c |  4 +--
> >  13 files changed, 83 insertions(+), 22 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> >
> > diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> > index 0298b7987a1..d70468542ee 100644
> > --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> > +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> > @@ -131,19 +131,31 @@ public:
> >
> >      tree type = builtin_types[e.type.index].vector;
> >      machine_mode mode = TYPE_MODE (type);
> > -    machine_mode inner_mode = GET_MODE_INNER (mode);
> > +    /* Normalize same RATO (SEW/LMUL) into same vsetvl instruction.
> > +
> > +        - e8,mf8/e16,mf4/e32,mf2/e64,m1 --> e8mf8
> > +        - e8,mf4/e16,mf2/e32,m1/e64,m2  --> e8mf4
> > +        - e8,mf2/e16,m1/e32,m2/e64,m4   --> e8mf2
> > +        - e8,m1/e16,m2/e32,m4/e64,m8    --> e8m1
> > +        - e8,m2/e16,m4/e32,m8           --> e8m2
> > +        - e8,m4/e16,m8                  --> e8m4
> > +        - e8,m8                         --> e8m8
> > +    */
> >      /* SEW.  */
> > -    e.add_input_operand (Pmode,
> > -                        gen_int_mode (GET_MODE_BITSIZE (inner_mode), Pmode));
> > +    e.add_input_operand (Pmode, gen_int_mode (8, Pmode));
> >
> >      /* LMUL.  */
> > -    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (mode), Pmode));
> > +    machine_mode e8_mode
> > +      = get_vector_mode (QImode, GET_MODE_NUNITS (mode)).require ();
> > +    e.add_input_operand (Pmode, gen_int_mode (get_vlmul (e8_mode), Pmode));
> >
> >      /* TAIL_ANY.  */
> > -    e.add_input_operand (Pmode, gen_int_mode (get_prefer_tail_policy (), Pmode));
> > +    e.add_input_operand (Pmode,
> > +                        gen_int_mode (get_prefer_tail_policy (), Pmode));
> >
> >      /* MASK_ANY.  */
> > -    e.add_input_operand (Pmode, gen_int_mode (get_prefer_mask_policy (), Pmode));
> > +    e.add_input_operand (Pmode,
> > +                        gen_int_mode (get_prefer_mask_policy (), Pmode));
> >      return e.generate_insn (code_for_vsetvl_no_side_effects (Pmode));
> >    }
> >  };
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> > index f30275c8280..e927829c151 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-1.c
> > @@ -22,5 +22,5 @@ void f (int32_t * a, int32_t * b, int n)
> >        }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> > index 5dbc871ed12..98630a1ca02 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109743-3.c
> > @@ -22,7 +22,6 @@ void f (int32_t * a, int32_t * b, int n)
> >        }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+} 2 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> > new file mode 100644
> > index 00000000000..857ab0cd1e6
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-1.c
> > @@ -0,0 +1,25 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
> > +
> > +#include "riscv_vector.h"
> > +
> > +void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond, int avl) {
> > +
> > +  size_t vl;
> > +  if (cond == 1)
> > +    vl = __riscv_vsetvl_e32m1(avl);
> > +  else if (cond == 2)
> > +    vl = __riscv_vsetvl_e8mf4(avl);
> > +  else if (cond == 2)
> > +    vl = __riscv_vsetvl_e16mf2(avl);
> > +  else
> > +    vl = __riscv_vsetvl_e64m2(avl);
> > +  for (size_t i = 0; i < n; i += 1) {
> > +    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> > +    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> > +    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> > +    __riscv_vse32_v_i32m1(out, c, vl);
> > +  }
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> > new file mode 100644
> > index 00000000000..2b0844ff977
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112092-2.c
> > @@ -0,0 +1,25 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2" } */
> > +
> > +#include "riscv_vector.h"
> > +
> > +void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int cond) {
> > +
> > +  size_t vl;
> > +  if (cond == 1)
> > +    vl = __riscv_vsetvlmax_e32m1();
> > +  else if (cond == 2)
> > +    vl = __riscv_vsetvlmax_e8mf4();
> > +  else if (cond == 2)
> > +    vl = __riscv_vsetvlmax_e16mf2();
> > +  else
> > +    vl = __riscv_vsetvlmax_e64m2();
> > +  for (size_t i = 0; i < n; i += 1) {
> > +    vint32m1_t a = __riscv_vle32_v_i32m1(in1, vl);
> > +    vint32m1_t b = __riscv_vle32_v_i32m1_tu(a, in2, vl);
> > +    vint32m1_t c = __riscv_vle32_v_i32m1_tu(b, in3, vl);
> > +    __riscv_vse32_v_i32m1(out, c, vl);
> > +  }
> > +}
> > +
> > +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> > index 8936d3be6a7..b665bb84eb7 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-11.c
> > @@ -18,4 +18,4 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> > index c677a81d706..a1954e7a09a 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-15.c
> > @@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {srli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> > index 103f4238c76..98df431bced 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-22.c
> > @@ -18,4 +18,4 @@ void f(int8_t *base, int8_t *out, size_t vl, size_t m, size_t k) {
> >
> >  /* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*4} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> > index 1b92cb876cc..0160d732fc3 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-13.c
> > @@ -22,6 +22,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
> >  }
> >
> >  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > \ No newline at end of file
> > +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> > index 79a6f271997..a8497210473 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-15.c
> > @@ -18,6 +18,6 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli} 3 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {slli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*5} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> > index 0a3a5a3d2d7..0e6b14ee3e9 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-5.c
> > @@ -17,5 +17,5 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-O1" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> > index 0ee04ee04c9..15e99537462 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-7.c
> > @@ -17,7 +17,7 @@ void foo(void *in1, void *in2, void *in3, void *out, size_t n) {
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*m1,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> >  /* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> > index a0335a34645..caa2103da5d 100644
> > --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> > +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvlmax-8.c
> > @@ -18,5 +18,5 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c
> >    }
> >  }
> >
> > -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > -/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*m1,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */
> > --
> > 2.36.3
> >
>
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
  2023-11-08  6:36       ` juzhe.zhong
@ 2023-11-08  6:52         ` Kito Cheng
  0 siblings, 0 replies; 8+ messages in thread
From: Kito Cheng @ 2023-11-08  6:52 UTC (permalink / raw)
  To: juzhe.zhong; +Cc: kito.cheng, gcc-patches, jeffreyalaw, Robin Dapp

On Wed, Nov 8, 2023 at 2:37 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> Another question raise to me.
>
> Is it necessary we have such many variant of vsetvls?
>
> I am thinking about redesign:
>
> __riscv_vsetvl_e8mf8
> __riscv_vsetvl_e16mf4
> __riscv_vsetvl_e32mf2
> __riscv_vsetvl_e64m1
>
> They are quite redundant. They have the same result.
>
> May be just design as :
>
> __riscv_vsetvl_ratio64
>
> I am no proposing it since it has been used for a long time. Just raise my concern.

Yeah, I agree those variant are just having same behavior even
semantic on the current intrinsic model, one reason is we don't have
smart vsetvli insertion pass at design stage, also it's more obviously
to user to pick the right vsetvli intrinsic, however I intend not to
change that interface, the reason is simple, it's used for a long time
as you mentioned, change that would be huge disturbance.

There may have same argument for vbool* stuffs, but vbool* kind of
mixing historical reason* and also we didn't found better way to model
that.

* We have define MLEN is v-spec long times ago, I forgot it's 0.7 or 0.8..

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Re: [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092]
  2023-11-08  6:33     ` Kito Cheng
  2023-11-08  6:36       ` juzhe.zhong
@ 2023-12-04 17:14       ` Maciej W. Rozycki
  1 sibling, 0 replies; 8+ messages in thread
From: Maciej W. Rozycki @ 2023-12-04 17:14 UTC (permalink / raw)
  To: Kito Cheng; +Cc: juzhe.zhong, gcc-patches, Kito.cheng, jeffreyalaw, Robin Dapp

On Wed, 8 Nov 2023, Kito Cheng wrote:

> OK, then LGTM, thanks for the explanation :)

 Please don't top-post on a GCC mailing list (and preferably in off-list 
replies to such mailing list messages unless it's been agreed to somehow 
with the participants), as it makes it difficult to make context replies.

 Best practice is to reply inline, quoting the relevant original paragraph 
(or enough context) referred to above, and with all the other parts of the 
message replied to discarded.  We may even have it written down somewhere 
(though I haven't checked; in the old days it used to be assumed), and I 
do hope any sane modern MUA can handle it.

 Otherwise the discussion thread quickly grows into an illegible mess.

 So this change does indeed fix PR 112092, however we now have an issue 
with several other test cases and the new `-mmovcc' option.  For example 
vsetvl-13.c fails with "-mmovcc -mbranch-cost=8" test options and assembly 
produced is like:

	vsetvli	a6,a6,e8,mf4,ta,ma
	snez	a5,a5
	neg	a5,a5
	and	a6,a5,a6
	not	a5,a5
	andi	a5,a5,55
	or	a5,a6,a5
	beq	a4,zero,.L10
	li	a6,0
	vsetvli	zero,a5,e32,m1,tu,ma
.L4:
	vle32.v	v1,0(a0)
	vle32.v	v1,0(a1)
	vle32.v	v1,0(a2)
	vse32.v	v1,0(a3)
	addi	a6,a6,1
	bne	a4,a6,.L4
.L10:
	ret

As far as I can tell code produced is legitimate, and for the record 
analogous assembly is produced with `-march=rv32gcv_zicond' too:

	vsetvli	a6,a6,e8,mf4,ta,ma
	czero.eqz	a6,a6,a5
	li	a7,55
	czero.nez	a5,a7,a5
	or	a5,a5,a6
	beq	a4,zero,.L10
	li	a6,0
	vsetvli	zero,a5,e32,m1,tu,ma
.L4:
	vle32.v	v1,0(a0)
	vle32.v	v1,0(a1)
	vle32.v	v1,0(a2)
	vse32.v	v1,0(a3)
	addi	a6,a6,1
	bne	a4,a6,.L4
.L10:
	ret

-- it's just that you can't see it with regression testing, because the 
test case overrides `-march='.  Presumably we do want to execute VSETVLI 
twice here on the basis that to avoid the second one by means of branches 
would be more costly than not to.

 Shall we just silence false failures like this with `-mno-movcc' then or 
shall we handle the conditional-move case somehow?

 For reference plain branched assembly is like:

	li	a7,55
	beq	a5,zero,.L13
	vsetvli	zero,a6,e32,m1,tu,ma
.L2:
	beq	a4,zero,.L11
	li	a5,0
.L4:
	vle32.v	v1,0(a0)
	vle32.v	v1,0(a1)
	vle32.v	v1,0(a2)
	vse32.v	v1,0(a3)
	addi	a5,a5,1
	bne	a4,a5,.L4
.L11:
	ret
.L13:
	vsetvli	zero,a7,e32,m1,tu,ma
	j	.L2

  Maciej

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-12-04 17:14 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-08  6:10 [PATCH] RISC-V: Normalize user vsetvl intrinsics[PR112092] Juzhe-Zhong
2023-11-08  6:16 ` Kito Cheng
2023-11-08  6:23   ` juzhe.zhong
2023-11-08  6:32   ` juzhe.zhong
2023-11-08  6:33     ` Kito Cheng
2023-11-08  6:36       ` juzhe.zhong
2023-11-08  6:52         ` Kito Cheng
2023-12-04 17:14       ` Maciej W. Rozycki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).