From: Kito Cheng <kito.cheng@gmail.com>
To: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Cc: gcc-patches@gcc.gnu.org, kito.cheng@sifive.com,
jeffreyalaw@gmail.com, rdapp.gcc@gmail.com
Subject: Re: [PATCH] RISC-V: Support widening register overlap for vf4/vf8
Date: Thu, 30 Nov 2023 15:08:27 +0800 [thread overview]
Message-ID: <CA+yXCZC=ajUEjWA76Xuevhyv2SAjNJvDQT7F69BkWcFBs3HycA@mail.gmail.com> (raw)
In-Reply-To: <20231130064905.2716758-1-juzhe.zhong@rivai.ai>
LGTM, thanks :)
On Thu, Nov 30, 2023 at 2:49 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
>
> size_t
> foo (char const *buf, size_t len)
> {
> size_t sum = 0;
> size_t vl = __riscv_vsetvlmax_e8m8 ();
> size_t step = vl * 4;
> const char *it = buf, *end = buf + len;
> for (; it + step <= end;)
> {
> vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> it += vl;
> vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> it += vl;
> vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> it += vl;
> vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> it += vl;
>
> asm volatile("nop" ::: "memory");
> vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl);
> vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl);
> vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl);
> vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl);
>
> asm volatile("nop" ::: "memory");
> size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
> size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
> size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
> size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);
>
> sum += sumation (sum0, sum1, sum2, sum3);
> }
> return sum;
> }
>
> Before this patch:
>
> add a3,s0,s1
> add a4,s6,s1
> add a5,s7,s1
> vsetvli zero,s0,e64,m8,ta,ma
> vle8.v v4,0(s1)
> vle8.v v3,0(a3)
> mv s1,s2
> vle8.v v2,0(a4)
> vle8.v v1,0(a5)
> nop
> vsext.vf8 v8,v4
> vsext.vf8 v16,v2
> vs8r.v v8,0(sp)
> vsext.vf8 v24,v1
> vsext.vf8 v8,v3
> nop
> vmv.x.s a1,v8
> vl8re64.v v8,0(sp)
> vmv.x.s a3,v24
> vmv.x.s a2,v16
> vmv.x.s a0,v8
> add s2,s2,s5
> call sumation
> add s3,s3,a0
> bgeu s4,s2,.L5
>
> After this patch:
>
> add a3,s0,s1
> add a4,s6,s1
> add a5,s7,s1
> vsetvli zero,s0,e64,m8,ta,ma
> vle8.v v15,0(s1)
> vle8.v v23,0(a3)
> mv s1,s2
> vle8.v v31,0(a4)
> vle8.v v7,0(a5)
> vsext.vf8 v8,v15
> vsext.vf8 v16,v23
> vsext.vf8 v24,v31
> vsext.vf8 v0,v7
> vmv.x.s a3,v0
> vmv.x.s a2,v24
> vmv.x.s a1,v16
> vmv.x.s a0,v8
> add s2,s2,s5
> call sumation
> add s3,s3,a0
> bgeu s4,s2,.L5
>
> PR target/112431
>
> gcc/ChangeLog:
>
> * config/riscv/vector.md: Add widening overlap of vf2/vf4.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/pr112431-16.c: New test.
> * gcc.target/riscv/rvv/base/pr112431-17.c: New test.
> * gcc.target/riscv/rvv/base/pr112431-18.c: New test.
>
> ---
> gcc/config/riscv/vector.md | 38 ++++++-----
> .../gcc.target/riscv/rvv/base/pr112431-16.c | 68 +++++++++++++++++++
> .../gcc.target/riscv/rvv/base/pr112431-17.c | 51 ++++++++++++++
> .../gcc.target/riscv/rvv/base/pr112431-18.c | 51 ++++++++++++++
> 4 files changed, 190 insertions(+), 18 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
>
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index 6b891c11324..e5d62c6e58b 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -3704,43 +3704,45 @@
>
> ;; Vector Quad-Widening Sign-extend and Zero-extend.
> (define_insn "@pred_<optab><mode>_vf4"
> - [(set (match_operand:VQEXTI 0 "register_operand" "=&vr,&vr")
> + [(set (match_operand:VQEXTI 0 "register_operand" "=vr, vr, vr, vr, ?&vr, ?&vr")
> (if_then_else:VQEXTI
> (unspec:<VM>
> - [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
> - (match_operand 4 "vector_length_operand" " rK, rK")
> - (match_operand 5 "const_int_operand" " i, i")
> - (match_operand 6 "const_int_operand" " i, i")
> - (match_operand 7 "const_int_operand" " i, i")
> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
> + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK")
> + (match_operand 5 "const_int_operand" " i, i, i, i, i, i")
> + (match_operand 6 "const_int_operand" " i, i, i, i, i, i")
> + (match_operand 7 "const_int_operand" " i, i, i, i, i, i")
> (reg:SI VL_REGNUM)
> (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> (any_extend:VQEXTI
> - (match_operand:<V_QUAD_TRUNC> 3 "register_operand" " vr, vr"))
> - (match_operand:VQEXTI 2 "vector_merge_operand" " vu, 0")))]
> + (match_operand:<V_QUAD_TRUNC> 3 "register_operand" " W43, W43, W86, W86, vr, vr"))
> + (match_operand:VQEXTI 2 "vector_merge_operand" " vu, 0, vu, 0, vu, 0")))]
> "TARGET_VECTOR"
> "v<sz>ext.vf4\t%0,%3%p1"
> [(set_attr "type" "vext")
> - (set_attr "mode" "<MODE>")])
> + (set_attr "mode" "<MODE>")
> + (set_attr "group_overlap" "W43,W43,W86,W86,none,none")])
>
> ;; Vector Oct-Widening Sign-extend and Zero-extend.
> (define_insn "@pred_<optab><mode>_vf8"
> - [(set (match_operand:VOEXTI 0 "register_operand" "=&vr,&vr")
> + [(set (match_operand:VOEXTI 0 "register_operand" "=vr, vr, ?&vr, ?&vr")
> (if_then_else:VOEXTI
> (unspec:<VM>
> - [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
> - (match_operand 4 "vector_length_operand" " rK, rK")
> - (match_operand 5 "const_int_operand" " i, i")
> - (match_operand 6 "const_int_operand" " i, i")
> - (match_operand 7 "const_int_operand" " i, i")
> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1,vmWc1,vmWc1")
> + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK")
> + (match_operand 5 "const_int_operand" " i, i, i, i")
> + (match_operand 6 "const_int_operand" " i, i, i, i")
> + (match_operand 7 "const_int_operand" " i, i, i, i")
> (reg:SI VL_REGNUM)
> (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> (any_extend:VOEXTI
> - (match_operand:<V_OCT_TRUNC> 3 "register_operand" " vr, vr"))
> - (match_operand:VOEXTI 2 "vector_merge_operand" " vu, 0")))]
> + (match_operand:<V_OCT_TRUNC> 3 "register_operand" " W87, W87, vr, vr"))
> + (match_operand:VOEXTI 2 "vector_merge_operand" " vu, 0, vu, 0")))]
> "TARGET_VECTOR"
> "v<sz>ext.vf8\t%0,%3%p1"
> [(set_attr "type" "vext")
> - (set_attr "mode" "<MODE>")])
> + (set_attr "mode" "<MODE>")
> + (set_attr "group_overlap" "W87,W87,none,none")])
>
> ;; Vector Widening Add/Subtract/Multiply.
> (define_insn "@pred_dual_widen_<any_widen_binop:optab><any_extend:su><mode>"
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
> new file mode 100644
> index 00000000000..98f42458883
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +size_t __attribute__ ((noinline))
> +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
> + size_t sum5, size_t sum6, size_t sum7)
> +{
> + return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
> +}
> +
> +size_t
> +foo (char const *buf, size_t len)
> +{
> + size_t sum = 0;
> + size_t vl = __riscv_vsetvlmax_e8m8 ();
> + size_t step = vl * 4;
> + const char *it = buf, *end = buf + len;
> + for (; it + step <= end;)
> + {
> + vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> + vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> + vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> + vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> + vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> + vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> + vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> + vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> +
> + asm volatile("nop" ::: "memory");
> + vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl);
> + vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl);
> + vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl);
> + vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl);
> + vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl);
> + vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl);
> + vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl);
> + vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl);
> +
> + asm volatile("nop" ::: "memory");
> + size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
> + size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
> + size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
> + size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
> + size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
> + size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
> + size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
> + size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
> +
> + sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
> + }
> + return sum;
> +}
> +
> +/* { dg-final { scan-assembler-not {vmv1r} } } */
> +/* { dg-final { scan-assembler-not {vmv2r} } } */
> +/* { dg-final { scan-assembler-not {vmv4r} } } */
> +/* { dg-final { scan-assembler-not {vmv8r} } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
> new file mode 100644
> index 00000000000..9b60005344d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +size_t __attribute__ ((noinline))
> +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
> +{
> + return sum0 + sum1 + sum2 + sum3;
> +}
> +
> +size_t
> +foo (char const *buf, size_t len)
> +{
> + size_t sum = 0;
> + size_t vl = __riscv_vsetvlmax_e8m8 ();
> + size_t step = vl * 4;
> + const char *it = buf, *end = buf + len;
> + for (; it + step <= end;)
> + {
> + vint8m2_t v0 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> + it += vl;
> + vint8m2_t v1 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> + it += vl;
> + vint8m2_t v2 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> + it += vl;
> + vint8m2_t v3 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> + it += vl;
> +
> + asm volatile("nop" ::: "memory");
> + vint32m8_t vw0 = __riscv_vsext_vf4_i32m8 (v0, vl);
> + vint32m8_t vw1 = __riscv_vsext_vf4_i32m8 (v1, vl);
> + vint32m8_t vw2 = __riscv_vsext_vf4_i32m8 (v2, vl);
> + vint32m8_t vw3 = __riscv_vsext_vf4_i32m8 (v3, vl);
> +
> + asm volatile("nop" ::: "memory");
> + size_t sum0 = __riscv_vmv_x_s_i32m8_i32 (vw0);
> + size_t sum1 = __riscv_vmv_x_s_i32m8_i32 (vw1);
> + size_t sum2 = __riscv_vmv_x_s_i32m8_i32 (vw2);
> + size_t sum3 = __riscv_vmv_x_s_i32m8_i32 (vw3);
> +
> + sum += sumation (sum0, sum1, sum2, sum3);
> + }
> + return sum;
> +}
> +
> +/* { dg-final { scan-assembler-not {vmv1r} } } */
> +/* { dg-final { scan-assembler-not {vmv2r} } } */
> +/* { dg-final { scan-assembler-not {vmv4r} } } */
> +/* { dg-final { scan-assembler-not {vmv8r} } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
> new file mode 100644
> index 00000000000..dd65b2fa098
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +size_t __attribute__ ((noinline))
> +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
> +{
> + return sum0 + sum1 + sum2 + sum3;
> +}
> +
> +size_t
> +foo (char const *buf, size_t len)
> +{
> + size_t sum = 0;
> + size_t vl = __riscv_vsetvlmax_e8m8 ();
> + size_t step = vl * 4;
> + const char *it = buf, *end = buf + len;
> + for (; it + step <= end;)
> + {
> + vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> + vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> + vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> + vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> + it += vl;
> +
> + asm volatile("nop" ::: "memory");
> + vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl);
> + vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl);
> + vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl);
> + vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl);
> +
> + asm volatile("nop" ::: "memory");
> + size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
> + size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
> + size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
> + size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);
> +
> + sum += sumation (sum0, sum1, sum2, sum3);
> + }
> + return sum;
> +}
> +
> +/* { dg-final { scan-assembler-not {vmv1r} } } */
> +/* { dg-final { scan-assembler-not {vmv2r} } } */
> +/* { dg-final { scan-assembler-not {vmv4r} } } */
> +/* { dg-final { scan-assembler-not {vmv8r} } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> --
> 2.36.3
>
prev parent reply other threads:[~2023-11-30 7:08 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-30 6:49 Juzhe-Zhong
2023-11-30 7:08 ` Kito Cheng [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CA+yXCZC=ajUEjWA76Xuevhyv2SAjNJvDQT7F69BkWcFBs3HycA@mail.gmail.com' \
--to=kito.cheng@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=jeffreyalaw@gmail.com \
--cc=juzhe.zhong@rivai.ai \
--cc=kito.cheng@sifive.com \
--cc=rdapp.gcc@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).