public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] RISC-V: Allow dest operand and accumulator operand overlap of widen reduction instruction[PR112327]
@ 2023-11-01  6:56 Juzhe-Zhong
  2023-11-01 19:02 ` Jeff Law
  0 siblings, 1 reply; 3+ messages in thread
From: Juzhe-Zhong @ 2023-11-01  6:56 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, kito.cheng, jeffreyalaw, rdapp.gcc, Juzhe-Zhong


Consider this following intrinsic code:

void rvv_dot_prod(int16_t *pSrcA, int16_t *pSrcB, uint32_t n, int64_t *result)
{
    size_t vl;
    vint16m4_t vSrcA, vSrcB;
    vint64m1_t vSum = __riscv_vmv_s_x_i64m1(0, 1);
    while (n > 0) {
        vl = __riscv_vsetvl_e16m4(n);
        vSrcA = __riscv_vle16_v_i16m4(pSrcA, vl);
        vSrcB = __riscv_vle16_v_i16m4(pSrcB, vl);
        vSum = __riscv_vwredsum_vs_i32m8_i64m1(__riscv_vwmul_vv_i32m8(vSrcA, vSrcB, vl), vSum, vl);
        pSrcA += vl;
        pSrcB += vl;
        n -= vl;
    }
    *result = __riscv_vmv_x_s_i64m1_i64(vSum);
}

https://godbolt.org/z/vWd35W7G6

Before this patch:

...
Loop:
...
vmv1r.v v2,v1
...
vwredsum.vs     v1,v8,v2
...

After this patch:

...
Loop:
...
vwredsum.vs	v1,v8,v1
...

	PR target/112327

gcc/ChangeLog:

	* config/riscv/vector.md: Add '0'.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/pr112327-1.c: New test.
	* gcc.target/riscv/rvv/base/pr112327-2.c: New test.

---
 gcc/config/riscv/vector.md                    |  4 +--
 .../gcc.target/riscv/rvv/base/pr112327-1.c    | 27 +++++++++++++++++++
 .../gcc.target/riscv/rvv/base/pr112327-2.c    | 27 +++++++++++++++++++
 3 files changed, 56 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c

diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 0297e4f0227..3577971fa33 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7765,7 +7765,7 @@
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
            (unspec:<V_EXT_LMUL1> [
 	     (match_operand:VI_QHS         3 "register_operand"      "   vr,   vr")
-	     (match_operand:<V_EXT_LMUL1>  4 "register_operand"      "   vr,   vr")
+	     (match_operand:<V_EXT_LMUL1>  4 "register_operand"      "  vr0,  vr0")
            ] ANY_WREDUC)
 	   (match_operand:<V_EXT_LMUL1>    2 "vector_merge_operand"  "   vu,    0")] UNSPEC_REDUC))]
   "TARGET_VECTOR"
@@ -7834,7 +7834,7 @@
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
            (unspec:<V_EXT_LMUL1> [
 	     (match_operand:VF_HS          3 "register_operand"      "   vr,   vr")
-	     (match_operand:<V_EXT_LMUL1>  4 "register_operand"      "   vr,   vr")
+	     (match_operand:<V_EXT_LMUL1>  4 "register_operand"      "  vr0,  vr0")
            ] ANY_FWREDUC_SUM)
 	   (match_operand:<V_EXT_LMUL1>    2 "vector_merge_operand"  "   vu,    0")] UNSPEC_REDUC))]
   "TARGET_VECTOR"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c
new file mode 100644
index 00000000000..20da23976f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (int16_t *pSrcA, int16_t *pSrcB, uint32_t n, int64_t *result)
+{
+  size_t vl;
+  vint16m4_t vSrcA, vSrcB;
+  vint64m1_t vSum = __riscv_vmv_s_x_i64m1 (0, 1);
+  while (n > 0)
+    {
+      vl = __riscv_vsetvl_e16m4 (n);
+      vSrcA = __riscv_vle16_v_i16m4 (pSrcA, vl);
+      vSrcB = __riscv_vle16_v_i16m4 (pSrcB, vl);
+      vSum = __riscv_vwredsum_vs_i32m8_i64m1 (
+	__riscv_vwmul_vv_i32m8 (vSrcA, vSrcB, vl), vSum, vl);
+      pSrcA += vl;
+      pSrcB += vl;
+      n -= vl;
+    }
+  *result = __riscv_vmv_x_s_i64m1_i64 (vSum);
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv\.v\.v} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c
new file mode 100644
index 00000000000..5ffde000fbd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zfh -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (_Float16 *pSrcA, _Float16 *pSrcB, uint32_t n, double *result)
+{
+  size_t vl;
+  vfloat16m4_t vSrcA, vSrcB;
+  vfloat64m1_t vSum = __riscv_vfmv_s_f_f64m1 (0, 1);
+  while (n > 0)
+    {
+      vl = __riscv_vsetvl_e16m4 (n);
+      vSrcA = __riscv_vle16_v_f16m4 (pSrcA, vl);
+      vSrcB = __riscv_vle16_v_f16m4 (pSrcB, vl);
+      vSum = __riscv_vfwredusum_vs_f32m8_f64m1 (
+	__riscv_vfwmul_vv_f32m8 (vSrcA, vSrcB, vl), vSum, vl);
+      pSrcA += vl;
+      pSrcB += vl;
+      n -= vl;
+    }
+  *result = __riscv_vfmv_f_s_f64m1_f64 (vSum);
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv\.v\.v} } } */
-- 
2.36.3


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] RISC-V: Allow dest operand and accumulator operand overlap of widen reduction instruction[PR112327]
  2023-11-01  6:56 [PATCH] RISC-V: Allow dest operand and accumulator operand overlap of widen reduction instruction[PR112327] Juzhe-Zhong
@ 2023-11-01 19:02 ` Jeff Law
  2023-11-02  0:52   ` Li, Pan2
  0 siblings, 1 reply; 3+ messages in thread
From: Jeff Law @ 2023-11-01 19:02 UTC (permalink / raw)
  To: Juzhe-Zhong, gcc-patches; +Cc: kito.cheng, kito.cheng, rdapp.gcc



On 11/1/23 00:56, Juzhe-Zhong wrote:
> 
> Consider this following intrinsic code:
> 
> void rvv_dot_prod(int16_t *pSrcA, int16_t *pSrcB, uint32_t n, int64_t *result)
> {
>      size_t vl;
>      vint16m4_t vSrcA, vSrcB;
>      vint64m1_t vSum = __riscv_vmv_s_x_i64m1(0, 1);
>      while (n > 0) {
>          vl = __riscv_vsetvl_e16m4(n);
>          vSrcA = __riscv_vle16_v_i16m4(pSrcA, vl);
>          vSrcB = __riscv_vle16_v_i16m4(pSrcB, vl);
>          vSum = __riscv_vwredsum_vs_i32m8_i64m1(__riscv_vwmul_vv_i32m8(vSrcA, vSrcB, vl), vSum, vl);
>          pSrcA += vl;
>          pSrcB += vl;
>          n -= vl;
>      }
>      *result = __riscv_vmv_x_s_i64m1_i64(vSum);
> }
> 
> https://godbolt.org/z/vWd35W7G6
> 
> Before this patch:
> 
> ...
> Loop:
> ...
> vmv1r.v v2,v1
> ...
> vwredsum.vs     v1,v8,v2
> ...
> 
> After this patch:
> 
> ...
> Loop:
> ...
> vwredsum.vs	v1,v8,v1
> ...
> 
> 	PR target/112327
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/vector.md: Add '0'.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/base/pr112327-1.c: New test.
> 	* gcc.target/riscv/rvv/base/pr112327-2.c: New test.
OK
jeff

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH] RISC-V: Allow dest operand and accumulator operand overlap of widen reduction instruction[PR112327]
  2023-11-01 19:02 ` Jeff Law
@ 2023-11-02  0:52   ` Li, Pan2
  0 siblings, 0 replies; 3+ messages in thread
From: Li, Pan2 @ 2023-11-02  0:52 UTC (permalink / raw)
  To: Jeff Law, Juzhe-Zhong, gcc-patches; +Cc: kito.cheng, kito.cheng, rdapp.gcc

Committed, thanks Jeff.

Pan

-----Original Message-----
From: Jeff Law <jeffreyalaw@gmail.com> 
Sent: Thursday, November 2, 2023 3:02 AM
To: Juzhe-Zhong <juzhe.zhong@rivai.ai>; gcc-patches@gcc.gnu.org
Cc: kito.cheng@gmail.com; kito.cheng@sifive.com; rdapp.gcc@gmail.com
Subject: Re: [PATCH] RISC-V: Allow dest operand and accumulator operand overlap of widen reduction instruction[PR112327]



On 11/1/23 00:56, Juzhe-Zhong wrote:
> 
> Consider this following intrinsic code:
> 
> void rvv_dot_prod(int16_t *pSrcA, int16_t *pSrcB, uint32_t n, int64_t *result)
> {
>      size_t vl;
>      vint16m4_t vSrcA, vSrcB;
>      vint64m1_t vSum = __riscv_vmv_s_x_i64m1(0, 1);
>      while (n > 0) {
>          vl = __riscv_vsetvl_e16m4(n);
>          vSrcA = __riscv_vle16_v_i16m4(pSrcA, vl);
>          vSrcB = __riscv_vle16_v_i16m4(pSrcB, vl);
>          vSum = __riscv_vwredsum_vs_i32m8_i64m1(__riscv_vwmul_vv_i32m8(vSrcA, vSrcB, vl), vSum, vl);
>          pSrcA += vl;
>          pSrcB += vl;
>          n -= vl;
>      }
>      *result = __riscv_vmv_x_s_i64m1_i64(vSum);
> }
> 
> https://godbolt.org/z/vWd35W7G6
> 
> Before this patch:
> 
> ...
> Loop:
> ...
> vmv1r.v v2,v1
> ...
> vwredsum.vs     v1,v8,v2
> ...
> 
> After this patch:
> 
> ...
> Loop:
> ...
> vwredsum.vs	v1,v8,v1
> ...
> 
> 	PR target/112327
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/vector.md: Add '0'.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/base/pr112327-1.c: New test.
> 	* gcc.target/riscv/rvv/base/pr112327-2.c: New test.
OK
jeff

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-11-02  0:52 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-01  6:56 [PATCH] RISC-V: Allow dest operand and accumulator operand overlap of widen reduction instruction[PR112327] Juzhe-Zhong
2023-11-01 19:02 ` Jeff Law
2023-11-02  0:52   ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).