From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
To: gcc-patches@gcc.gnu.org
Cc: kito.cheng@gmail.com, kito.cheng@sifive.com,
jeffreyalaw@gmail.com, rdapp.gcc@gmail.com,
Juzhe-Zhong <juzhe.zhong@rivai.ai>
Subject: [PATCH] RISC-V: Allow dest operand and accumulator operand overlap of widen reduction instruction[PR112327]
Date: Wed, 1 Nov 2023 14:56:39 +0800 [thread overview]
Message-ID: <20231101065639.158911-1-juzhe.zhong@rivai.ai> (raw)
Consider this following intrinsic code:
void rvv_dot_prod(int16_t *pSrcA, int16_t *pSrcB, uint32_t n, int64_t *result)
{
size_t vl;
vint16m4_t vSrcA, vSrcB;
vint64m1_t vSum = __riscv_vmv_s_x_i64m1(0, 1);
while (n > 0) {
vl = __riscv_vsetvl_e16m4(n);
vSrcA = __riscv_vle16_v_i16m4(pSrcA, vl);
vSrcB = __riscv_vle16_v_i16m4(pSrcB, vl);
vSum = __riscv_vwredsum_vs_i32m8_i64m1(__riscv_vwmul_vv_i32m8(vSrcA, vSrcB, vl), vSum, vl);
pSrcA += vl;
pSrcB += vl;
n -= vl;
}
*result = __riscv_vmv_x_s_i64m1_i64(vSum);
}
https://godbolt.org/z/vWd35W7G6
Before this patch:
...
Loop:
...
vmv1r.v v2,v1
...
vwredsum.vs v1,v8,v2
...
After this patch:
...
Loop:
...
vwredsum.vs v1,v8,v1
...
PR target/112327
gcc/ChangeLog:
* config/riscv/vector.md: Add '0'.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/pr112327-1.c: New test.
* gcc.target/riscv/rvv/base/pr112327-2.c: New test.
---
gcc/config/riscv/vector.md | 4 +--
.../gcc.target/riscv/rvv/base/pr112327-1.c | 27 +++++++++++++++++++
.../gcc.target/riscv/rvv/base/pr112327-2.c | 27 +++++++++++++++++++
3 files changed, 56 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 0297e4f0227..3577971fa33 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7765,7 +7765,7 @@
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(unspec:<V_EXT_LMUL1> [
(match_operand:VI_QHS 3 "register_operand" " vr, vr")
- (match_operand:<V_EXT_LMUL1> 4 "register_operand" " vr, vr")
+ (match_operand:<V_EXT_LMUL1> 4 "register_operand" " vr0, vr0")
] ANY_WREDUC)
(match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
"TARGET_VECTOR"
@@ -7834,7 +7834,7 @@
(reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
(unspec:<V_EXT_LMUL1> [
(match_operand:VF_HS 3 "register_operand" " vr, vr")
- (match_operand:<V_EXT_LMUL1> 4 "register_operand" " vr, vr")
+ (match_operand:<V_EXT_LMUL1> 4 "register_operand" " vr0, vr0")
] ANY_FWREDUC_SUM)
(match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))]
"TARGET_VECTOR"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c
new file mode 100644
index 00000000000..20da23976f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (int16_t *pSrcA, int16_t *pSrcB, uint32_t n, int64_t *result)
+{
+ size_t vl;
+ vint16m4_t vSrcA, vSrcB;
+ vint64m1_t vSum = __riscv_vmv_s_x_i64m1 (0, 1);
+ while (n > 0)
+ {
+ vl = __riscv_vsetvl_e16m4 (n);
+ vSrcA = __riscv_vle16_v_i16m4 (pSrcA, vl);
+ vSrcB = __riscv_vle16_v_i16m4 (pSrcB, vl);
+ vSum = __riscv_vwredsum_vs_i32m8_i64m1 (
+ __riscv_vwmul_vv_i32m8 (vSrcA, vSrcB, vl), vSum, vl);
+ pSrcA += vl;
+ pSrcB += vl;
+ n -= vl;
+ }
+ *result = __riscv_vmv_x_s_i64m1_i64 (vSum);
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv\.v\.v} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c
new file mode 100644
index 00000000000..5ffde000fbd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112327-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zfh -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+void
+foo (_Float16 *pSrcA, _Float16 *pSrcB, uint32_t n, double *result)
+{
+ size_t vl;
+ vfloat16m4_t vSrcA, vSrcB;
+ vfloat64m1_t vSum = __riscv_vfmv_s_f_f64m1 (0, 1);
+ while (n > 0)
+ {
+ vl = __riscv_vsetvl_e16m4 (n);
+ vSrcA = __riscv_vle16_v_f16m4 (pSrcA, vl);
+ vSrcB = __riscv_vle16_v_f16m4 (pSrcB, vl);
+ vSum = __riscv_vfwredusum_vs_f32m8_f64m1 (
+ __riscv_vfwmul_vv_f32m8 (vSrcA, vSrcB, vl), vSum, vl);
+ pSrcA += vl;
+ pSrcB += vl;
+ n -= vl;
+ }
+ *result = __riscv_vfmv_f_s_f64m1_f64 (vSum);
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv\.v\.v} } } */
--
2.36.3
next reply other threads:[~2023-11-01 6:56 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-01 6:56 Juzhe-Zhong [this message]
2023-11-01 19:02 ` Jeff Law
2023-11-02 0:52 ` Li, Pan2
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231101065639.158911-1-juzhe.zhong@rivai.ai \
--to=juzhe.zhong@rivai.ai \
--cc=gcc-patches@gcc.gnu.org \
--cc=jeffreyalaw@gmail.com \
--cc=kito.cheng@gmail.com \
--cc=kito.cheng@sifive.com \
--cc=rdapp.gcc@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).