[PATCH] RISC-V: Support widening register overlap for vf4/vf8

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] RISC-V: Support widening register overlap for vf4/vf8
@ 2023-11-30  6:49 Juzhe-Zhong
  2023-11-30  7:08 ` Kito Cheng
  0 siblings, 1 reply; 2+ messages in thread
From: Juzhe-Zhong @ 2023-11-30  6:49 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong


size_t
foo (char const *buf, size_t len)
{
  size_t sum = 0;
  size_t vl = __riscv_vsetvlmax_e8m8 ();
  size_t step = vl * 4;
  const char *it = buf, *end = buf + len;
  for (; it + step <= end;)
    {
      vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
      it += vl;
      vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
      it += vl;
      vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
      it += vl;
      vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
      it += vl;
      
      asm volatile("nop" ::: "memory");
      vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl);
      vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl);
      vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl);
      vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl);

      asm volatile("nop" ::: "memory");
      size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
      size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
      size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
      size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);

      sum += sumation (sum0, sum1, sum2, sum3);
    }
  return sum;
}

Before this patch:

        add     a3,s0,s1
        add     a4,s6,s1
        add     a5,s7,s1
        vsetvli zero,s0,e64,m8,ta,ma
        vle8.v  v4,0(s1)
        vle8.v  v3,0(a3)
        mv      s1,s2
        vle8.v  v2,0(a4)
        vle8.v  v1,0(a5)
        nop
        vsext.vf8       v8,v4
        vsext.vf8       v16,v2
        vs8r.v  v8,0(sp)
        vsext.vf8       v24,v1
        vsext.vf8       v8,v3
        nop
        vmv.x.s a1,v8
        vl8re64.v       v8,0(sp)
        vmv.x.s a3,v24
        vmv.x.s a2,v16
        vmv.x.s a0,v8
        add     s2,s2,s5
        call    sumation
        add     s3,s3,a0
        bgeu    s4,s2,.L5

After this patch:

	add	a3,s0,s1
	add	a4,s6,s1
	add	a5,s7,s1
	vsetvli	zero,s0,e64,m8,ta,ma
	vle8.v	v15,0(s1)
	vle8.v	v23,0(a3)
	mv	s1,s2
	vle8.v	v31,0(a4)
	vle8.v	v7,0(a5)
	vsext.vf8	v8,v15
	vsext.vf8	v16,v23
	vsext.vf8	v24,v31
	vsext.vf8	v0,v7
	vmv.x.s	a3,v0
	vmv.x.s	a2,v24
	vmv.x.s	a1,v16
	vmv.x.s	a0,v8
	add	s2,s2,s5
	call	sumation
	add	s3,s3,a0
	bgeu	s4,s2,.L5

	PR target/112431

gcc/ChangeLog:

	* config/riscv/vector.md: Add widening overlap of vf2/vf4.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/pr112431-16.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-17.c: New test.
	* gcc.target/riscv/rvv/base/pr112431-18.c: New test.

---
 gcc/config/riscv/vector.md                    | 38 ++++++-----
 .../gcc.target/riscv/rvv/base/pr112431-16.c   | 68 +++++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr112431-17.c   | 51 ++++++++++++++
 .../gcc.target/riscv/rvv/base/pr112431-18.c   | 51 ++++++++++++++
 4 files changed, 190 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 6b891c11324..e5d62c6e58b 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3704,43 +3704,45 @@
 
 ;; Vector Quad-Widening Sign-extend and Zero-extend.
 (define_insn "@pred_<optab><mode>_vf4"
-  [(set (match_operand:VQEXTI 0 "register_operand"          "=&vr,&vr")
+  [(set (match_operand:VQEXTI 0 "register_operand"               "=vr,   vr,   vr,   vr, ?&vr, ?&vr")
 	(if_then_else:VQEXTI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"          "   rK,   rK")
-	     (match_operand 5 "const_int_operand"              "    i,    i")
-	     (match_operand 6 "const_int_operand"              "    i,    i")
-	     (match_operand 7 "const_int_operand"              "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 4 "vector_length_operand"          "   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 5 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
+	     (match_operand 6 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
+	     (match_operand 7 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (any_extend:VQEXTI
-	    (match_operand:<V_QUAD_TRUNC> 3 "register_operand" "   vr,   vr"))
-	  (match_operand:VQEXTI 2 "vector_merge_operand"       "   vu,    0")))]
+	    (match_operand:<V_QUAD_TRUNC> 3 "register_operand" "  W43,  W43,  W86,  W86,   vr,   vr"))
+	  (match_operand:VQEXTI 2 "vector_merge_operand"       "   vu,    0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "v<sz>ext.vf4\t%0,%3%p1"
   [(set_attr "type" "vext")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "W43,W43,W86,W86,none,none")])
 
 ;; Vector Oct-Widening Sign-extend and Zero-extend.
 (define_insn "@pred_<optab><mode>_vf8"
-  [(set (match_operand:VOEXTI 0 "register_operand"         "=&vr,&vr")
+  [(set (match_operand:VOEXTI 0 "register_operand"              "=vr,   vr, ?&vr, ?&vr")
 	(if_then_else:VOEXTI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"         "   rK,   rK")
-	     (match_operand 5 "const_int_operand"             "    i,    i")
-	     (match_operand 6 "const_int_operand"             "    i,    i")
-	     (match_operand 7 "const_int_operand"             "    i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
+	     (match_operand 4 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 5 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 6 "const_int_operand"             "    i,    i,    i,    i")
+	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (any_extend:VOEXTI
-	    (match_operand:<V_OCT_TRUNC> 3 "register_operand" "   vr,   vr"))
-	  (match_operand:VOEXTI 2 "vector_merge_operand"      "   vu,    0")))]
+	    (match_operand:<V_OCT_TRUNC> 3 "register_operand" "  W87,  W87,   vr,   vr"))
+	  (match_operand:VOEXTI 2 "vector_merge_operand"      "   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "v<sz>ext.vf8\t%0,%3%p1"
   [(set_attr "type" "vext")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<MODE>")
+   (set_attr "group_overlap" "W87,W87,none,none")])
 
 ;; Vector Widening Add/Subtract/Multiply.
 (define_insn "@pred_dual_widen_<any_widen_binop:optab><any_extend:su><mode>"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
new file mode 100644
index 00000000000..98f42458883
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
+	  size_t sum5, size_t sum6, size_t sum7)
+{
+  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      
+      asm volatile("nop" ::: "memory");
+      vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl);
+      vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl);
+      vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl);
+      vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl);
+      vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl);
+      vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl);
+      vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl);
+      vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
+      size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
+      size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
+      size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
+      size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
+
+      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
new file mode 100644
index 00000000000..9b60005344d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vint8m2_t v0 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+      it += vl;
+      vint8m2_t v1 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+      it += vl;
+      vint8m2_t v2 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+      it += vl;
+      vint8m2_t v3 = __riscv_vle8_v_i8m2 ((void *) it, vl);
+      it += vl;
+
+      asm volatile("nop" ::: "memory");
+      vint32m8_t vw0 = __riscv_vsext_vf4_i32m8 (v0, vl);
+      vint32m8_t vw1 = __riscv_vsext_vf4_i32m8 (v1, vl);
+      vint32m8_t vw2 = __riscv_vsext_vf4_i32m8 (v2, vl);
+      vint32m8_t vw3 = __riscv_vsext_vf4_i32m8 (v3, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i32m8_i32 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i32m8_i32 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i32m8_i32 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i32m8_i32 (vw3);
+
+      sum += sumation (sum0, sum1, sum2, sum3);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
new file mode 100644
index 00000000000..dd65b2fa098
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+size_t __attribute__ ((noinline))
+sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
+{
+  return sum0 + sum1 + sum2 + sum3;
+}
+
+size_t
+foo (char const *buf, size_t len)
+{
+  size_t sum = 0;
+  size_t vl = __riscv_vsetvlmax_e8m8 ();
+  size_t step = vl * 4;
+  const char *it = buf, *end = buf + len;
+  for (; it + step <= end;)
+    {
+      vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
+      it += vl;
+      
+      asm volatile("nop" ::: "memory");
+      vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl);
+      vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl);
+      vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl);
+      vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl);
+
+      asm volatile("nop" ::: "memory");
+      size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
+      size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
+      size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
+      size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);
+
+      sum += sumation (sum0, sum1, sum2, sum3);
+    }
+  return sum;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
-- 
2.36.3


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] RISC-V: Support widening register overlap for vf4/vf8
  2023-11-30  6:49 [PATCH] RISC-V: Support widening register overlap for vf4/vf8 Juzhe-Zhong
@ 2023-11-30  7:08 ` Kito Cheng
  0 siblings, 0 replies; 2+ messages in thread
From: Kito Cheng @ 2023-11-30  7:08 UTC (permalink / raw)
  To: Juzhe-Zhong; +Cc: gcc-patches, kito.cheng, jeffreyalaw, rdapp.gcc

LGTM, thanks :)

On Thu, Nov 30, 2023 at 2:49 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
>
> size_t
> foo (char const *buf, size_t len)
> {
>   size_t sum = 0;
>   size_t vl = __riscv_vsetvlmax_e8m8 ();
>   size_t step = vl * 4;
>   const char *it = buf, *end = buf + len;
>   for (; it + step <= end;)
>     {
>       vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
>       it += vl;
>       vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
>       it += vl;
>       vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
>       it += vl;
>       vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
>       it += vl;
>
>       asm volatile("nop" ::: "memory");
>       vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl);
>       vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl);
>       vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl);
>       vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl);
>
>       asm volatile("nop" ::: "memory");
>       size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
>       size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
>       size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
>       size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);
>
>       sum += sumation (sum0, sum1, sum2, sum3);
>     }
>   return sum;
> }
>
> Before this patch:
>
>         add     a3,s0,s1
>         add     a4,s6,s1
>         add     a5,s7,s1
>         vsetvli zero,s0,e64,m8,ta,ma
>         vle8.v  v4,0(s1)
>         vle8.v  v3,0(a3)
>         mv      s1,s2
>         vle8.v  v2,0(a4)
>         vle8.v  v1,0(a5)
>         nop
>         vsext.vf8       v8,v4
>         vsext.vf8       v16,v2
>         vs8r.v  v8,0(sp)
>         vsext.vf8       v24,v1
>         vsext.vf8       v8,v3
>         nop
>         vmv.x.s a1,v8
>         vl8re64.v       v8,0(sp)
>         vmv.x.s a3,v24
>         vmv.x.s a2,v16
>         vmv.x.s a0,v8
>         add     s2,s2,s5
>         call    sumation
>         add     s3,s3,a0
>         bgeu    s4,s2,.L5
>
> After this patch:
>
>         add     a3,s0,s1
>         add     a4,s6,s1
>         add     a5,s7,s1
>         vsetvli zero,s0,e64,m8,ta,ma
>         vle8.v  v15,0(s1)
>         vle8.v  v23,0(a3)
>         mv      s1,s2
>         vle8.v  v31,0(a4)
>         vle8.v  v7,0(a5)
>         vsext.vf8       v8,v15
>         vsext.vf8       v16,v23
>         vsext.vf8       v24,v31
>         vsext.vf8       v0,v7
>         vmv.x.s a3,v0
>         vmv.x.s a2,v24
>         vmv.x.s a1,v16
>         vmv.x.s a0,v8
>         add     s2,s2,s5
>         call    sumation
>         add     s3,s3,a0
>         bgeu    s4,s2,.L5
>
>         PR target/112431
>
> gcc/ChangeLog:
>
>         * config/riscv/vector.md: Add widening overlap of vf2/vf4.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/base/pr112431-16.c: New test.
>         * gcc.target/riscv/rvv/base/pr112431-17.c: New test.
>         * gcc.target/riscv/rvv/base/pr112431-18.c: New test.
>
> ---
>  gcc/config/riscv/vector.md                    | 38 ++++++-----
>  .../gcc.target/riscv/rvv/base/pr112431-16.c   | 68 +++++++++++++++++++
>  .../gcc.target/riscv/rvv/base/pr112431-17.c   | 51 ++++++++++++++
>  .../gcc.target/riscv/rvv/base/pr112431-18.c   | 51 ++++++++++++++
>  4 files changed, 190 insertions(+), 18 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
>
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index 6b891c11324..e5d62c6e58b 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -3704,43 +3704,45 @@
>
>  ;; Vector Quad-Widening Sign-extend and Zero-extend.
>  (define_insn "@pred_<optab><mode>_vf4"
> -  [(set (match_operand:VQEXTI 0 "register_operand"          "=&vr,&vr")
> +  [(set (match_operand:VQEXTI 0 "register_operand"               "=vr,   vr,   vr,   vr, ?&vr, ?&vr")
>         (if_then_else:VQEXTI
>           (unspec:<VM>
> -           [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
> -            (match_operand 4 "vector_length_operand"          "   rK,   rK")
> -            (match_operand 5 "const_int_operand"              "    i,    i")
> -            (match_operand 6 "const_int_operand"              "    i,    i")
> -            (match_operand 7 "const_int_operand"              "    i,    i")
> +           [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
> +            (match_operand 4 "vector_length_operand"          "   rK,   rK,   rK,   rK,   rK,   rK")
> +            (match_operand 5 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
> +            (match_operand 6 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
> +            (match_operand 7 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
>              (reg:SI VL_REGNUM)
>              (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>           (any_extend:VQEXTI
> -           (match_operand:<V_QUAD_TRUNC> 3 "register_operand" "   vr,   vr"))
> -         (match_operand:VQEXTI 2 "vector_merge_operand"       "   vu,    0")))]
> +           (match_operand:<V_QUAD_TRUNC> 3 "register_operand" "  W43,  W43,  W86,  W86,   vr,   vr"))
> +         (match_operand:VQEXTI 2 "vector_merge_operand"       "   vu,    0,   vu,    0,   vu,    0")))]
>    "TARGET_VECTOR"
>    "v<sz>ext.vf4\t%0,%3%p1"
>    [(set_attr "type" "vext")
> -   (set_attr "mode" "<MODE>")])
> +   (set_attr "mode" "<MODE>")
> +   (set_attr "group_overlap" "W43,W43,W86,W86,none,none")])
>
>  ;; Vector Oct-Widening Sign-extend and Zero-extend.
>  (define_insn "@pred_<optab><mode>_vf8"
> -  [(set (match_operand:VOEXTI 0 "register_operand"         "=&vr,&vr")
> +  [(set (match_operand:VOEXTI 0 "register_operand"              "=vr,   vr, ?&vr, ?&vr")
>         (if_then_else:VOEXTI
>           (unspec:<VM>
> -           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
> -            (match_operand 4 "vector_length_operand"         "   rK,   rK")
> -            (match_operand 5 "const_int_operand"             "    i,    i")
> -            (match_operand 6 "const_int_operand"             "    i,    i")
> -            (match_operand 7 "const_int_operand"             "    i,    i")
> +           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
> +            (match_operand 4 "vector_length_operand"         "   rK,   rK,   rK,   rK")
> +            (match_operand 5 "const_int_operand"             "    i,    i,    i,    i")
> +            (match_operand 6 "const_int_operand"             "    i,    i,    i,    i")
> +            (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
>              (reg:SI VL_REGNUM)
>              (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>           (any_extend:VOEXTI
> -           (match_operand:<V_OCT_TRUNC> 3 "register_operand" "   vr,   vr"))
> -         (match_operand:VOEXTI 2 "vector_merge_operand"      "   vu,    0")))]
> +           (match_operand:<V_OCT_TRUNC> 3 "register_operand" "  W87,  W87,   vr,   vr"))
> +         (match_operand:VOEXTI 2 "vector_merge_operand"      "   vu,    0,   vu,    0")))]
>    "TARGET_VECTOR"
>    "v<sz>ext.vf8\t%0,%3%p1"
>    [(set_attr "type" "vext")
> -   (set_attr "mode" "<MODE>")])
> +   (set_attr "mode" "<MODE>")
> +   (set_attr "group_overlap" "W87,W87,none,none")])
>
>  ;; Vector Widening Add/Subtract/Multiply.
>  (define_insn "@pred_dual_widen_<any_widen_binop:optab><any_extend:su><mode>"
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
> new file mode 100644
> index 00000000000..98f42458883
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +size_t __attribute__ ((noinline))
> +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
> +         size_t sum5, size_t sum6, size_t sum7)
> +{
> +  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
> +}
> +
> +size_t
> +foo (char const *buf, size_t len)
> +{
> +  size_t sum = 0;
> +  size_t vl = __riscv_vsetvlmax_e8m8 ();
> +  size_t step = vl * 4;
> +  const char *it = buf, *end = buf + len;
> +  for (; it + step <= end;)
> +    {
> +      vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +
> +      asm volatile("nop" ::: "memory");
> +      vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl);
> +      vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl);
> +      vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl);
> +      vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl);
> +      vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl);
> +      vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl);
> +      vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl);
> +      vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl);
> +
> +      asm volatile("nop" ::: "memory");
> +      size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
> +      size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
> +      size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
> +      size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
> +      size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
> +      size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
> +      size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
> +      size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
> +
> +      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
> +    }
> +  return sum;
> +}
> +
> +/* { dg-final { scan-assembler-not {vmv1r} } } */
> +/* { dg-final { scan-assembler-not {vmv2r} } } */
> +/* { dg-final { scan-assembler-not {vmv4r} } } */
> +/* { dg-final { scan-assembler-not {vmv8r} } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
> new file mode 100644
> index 00000000000..9b60005344d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +size_t __attribute__ ((noinline))
> +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
> +{
> +  return sum0 + sum1 + sum2 + sum3;
> +}
> +
> +size_t
> +foo (char const *buf, size_t len)
> +{
> +  size_t sum = 0;
> +  size_t vl = __riscv_vsetvlmax_e8m8 ();
> +  size_t step = vl * 4;
> +  const char *it = buf, *end = buf + len;
> +  for (; it + step <= end;)
> +    {
> +      vint8m2_t v0 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> +      it += vl;
> +      vint8m2_t v1 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> +      it += vl;
> +      vint8m2_t v2 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> +      it += vl;
> +      vint8m2_t v3 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> +      it += vl;
> +
> +      asm volatile("nop" ::: "memory");
> +      vint32m8_t vw0 = __riscv_vsext_vf4_i32m8 (v0, vl);
> +      vint32m8_t vw1 = __riscv_vsext_vf4_i32m8 (v1, vl);
> +      vint32m8_t vw2 = __riscv_vsext_vf4_i32m8 (v2, vl);
> +      vint32m8_t vw3 = __riscv_vsext_vf4_i32m8 (v3, vl);
> +
> +      asm volatile("nop" ::: "memory");
> +      size_t sum0 = __riscv_vmv_x_s_i32m8_i32 (vw0);
> +      size_t sum1 = __riscv_vmv_x_s_i32m8_i32 (vw1);
> +      size_t sum2 = __riscv_vmv_x_s_i32m8_i32 (vw2);
> +      size_t sum3 = __riscv_vmv_x_s_i32m8_i32 (vw3);
> +
> +      sum += sumation (sum0, sum1, sum2, sum3);
> +    }
> +  return sum;
> +}
> +
> +/* { dg-final { scan-assembler-not {vmv1r} } } */
> +/* { dg-final { scan-assembler-not {vmv2r} } } */
> +/* { dg-final { scan-assembler-not {vmv4r} } } */
> +/* { dg-final { scan-assembler-not {vmv8r} } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
> new file mode 100644
> index 00000000000..dd65b2fa098
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +size_t __attribute__ ((noinline))
> +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
> +{
> +  return sum0 + sum1 + sum2 + sum3;
> +}
> +
> +size_t
> +foo (char const *buf, size_t len)
> +{
> +  size_t sum = 0;
> +  size_t vl = __riscv_vsetvlmax_e8m8 ();
> +  size_t step = vl * 4;
> +  const char *it = buf, *end = buf + len;
> +  for (; it + step <= end;)
> +    {
> +      vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +
> +      asm volatile("nop" ::: "memory");
> +      vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl);
> +      vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl);
> +      vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl);
> +      vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl);
> +
> +      asm volatile("nop" ::: "memory");
> +      size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
> +      size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
> +      size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
> +      size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);
> +
> +      sum += sumation (sum0, sum1, sum2, sum3);
> +    }
> +  return sum;
> +}
> +
> +/* { dg-final { scan-assembler-not {vmv1r} } } */
> +/* { dg-final { scan-assembler-not {vmv2r} } } */
> +/* { dg-final { scan-assembler-not {vmv4r} } } */
> +/* { dg-final { scan-assembler-not {vmv8r} } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> --
> 2.36.3
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-11-30  7:08 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-30  6:49 [PATCH] RISC-V: Support widening register overlap for vf4/vf8 Juzhe-Zhong
2023-11-30  7:08 ` Kito Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).