Re: [PATCH] RISC-V: Support widening register overlap for vf4/vf8

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Kito Cheng <kito.cheng@gmail.com>
To: Juzhe-Zhong <juzhe.zhong@rivai.ai>
Cc: gcc-patches@gcc.gnu.org, kito.cheng@sifive.com,
	jeffreyalaw@gmail.com,  rdapp.gcc@gmail.com
Subject: Re: [PATCH] RISC-V: Support widening register overlap for vf4/vf8
Date: Thu, 30 Nov 2023 15:08:27 +0800	[thread overview]
Message-ID: <CA+yXCZC=ajUEjWA76Xuevhyv2SAjNJvDQT7F69BkWcFBs3HycA@mail.gmail.com> (raw)
In-Reply-To: <20231130064905.2716758-1-juzhe.zhong@rivai.ai>

LGTM, thanks :)

On Thu, Nov 30, 2023 at 2:49 PM Juzhe-Zhong <juzhe.zhong@rivai.ai> wrote:
>
>
> size_t
> foo (char const *buf, size_t len)
> {
>   size_t sum = 0;
>   size_t vl = __riscv_vsetvlmax_e8m8 ();
>   size_t step = vl * 4;
>   const char *it = buf, *end = buf + len;
>   for (; it + step <= end;)
>     {
>       vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
>       it += vl;
>       vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
>       it += vl;
>       vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
>       it += vl;
>       vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
>       it += vl;
>
>       asm volatile("nop" ::: "memory");
>       vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl);
>       vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl);
>       vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl);
>       vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl);
>
>       asm volatile("nop" ::: "memory");
>       size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
>       size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
>       size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
>       size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);
>
>       sum += sumation (sum0, sum1, sum2, sum3);
>     }
>   return sum;
> }
>
> Before this patch:
>
>         add     a3,s0,s1
>         add     a4,s6,s1
>         add     a5,s7,s1
>         vsetvli zero,s0,e64,m8,ta,ma
>         vle8.v  v4,0(s1)
>         vle8.v  v3,0(a3)
>         mv      s1,s2
>         vle8.v  v2,0(a4)
>         vle8.v  v1,0(a5)
>         nop
>         vsext.vf8       v8,v4
>         vsext.vf8       v16,v2
>         vs8r.v  v8,0(sp)
>         vsext.vf8       v24,v1
>         vsext.vf8       v8,v3
>         nop
>         vmv.x.s a1,v8
>         vl8re64.v       v8,0(sp)
>         vmv.x.s a3,v24
>         vmv.x.s a2,v16
>         vmv.x.s a0,v8
>         add     s2,s2,s5
>         call    sumation
>         add     s3,s3,a0
>         bgeu    s4,s2,.L5
>
> After this patch:
>
>         add     a3,s0,s1
>         add     a4,s6,s1
>         add     a5,s7,s1
>         vsetvli zero,s0,e64,m8,ta,ma
>         vle8.v  v15,0(s1)
>         vle8.v  v23,0(a3)
>         mv      s1,s2
>         vle8.v  v31,0(a4)
>         vle8.v  v7,0(a5)
>         vsext.vf8       v8,v15
>         vsext.vf8       v16,v23
>         vsext.vf8       v24,v31
>         vsext.vf8       v0,v7
>         vmv.x.s a3,v0
>         vmv.x.s a2,v24
>         vmv.x.s a1,v16
>         vmv.x.s a0,v8
>         add     s2,s2,s5
>         call    sumation
>         add     s3,s3,a0
>         bgeu    s4,s2,.L5
>
>         PR target/112431
>
> gcc/ChangeLog:
>
>         * config/riscv/vector.md: Add widening overlap of vf2/vf4.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/base/pr112431-16.c: New test.
>         * gcc.target/riscv/rvv/base/pr112431-17.c: New test.
>         * gcc.target/riscv/rvv/base/pr112431-18.c: New test.
>
> ---
>  gcc/config/riscv/vector.md                    | 38 ++++++-----
>  .../gcc.target/riscv/rvv/base/pr112431-16.c   | 68 +++++++++++++++++++
>  .../gcc.target/riscv/rvv/base/pr112431-17.c   | 51 ++++++++++++++
>  .../gcc.target/riscv/rvv/base/pr112431-18.c   | 51 ++++++++++++++
>  4 files changed, 190 insertions(+), 18 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
>
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index 6b891c11324..e5d62c6e58b 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -3704,43 +3704,45 @@
>
>  ;; Vector Quad-Widening Sign-extend and Zero-extend.
>  (define_insn "@pred_<optab><mode>_vf4"
> -  [(set (match_operand:VQEXTI 0 "register_operand"          "=&vr,&vr")
> +  [(set (match_operand:VQEXTI 0 "register_operand"               "=vr,   vr,   vr,   vr, ?&vr, ?&vr")
>         (if_then_else:VQEXTI
>           (unspec:<VM>
> -           [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
> -            (match_operand 4 "vector_length_operand"          "   rK,   rK")
> -            (match_operand 5 "const_int_operand"              "    i,    i")
> -            (match_operand 6 "const_int_operand"              "    i,    i")
> -            (match_operand 7 "const_int_operand"              "    i,    i")
> +           [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
> +            (match_operand 4 "vector_length_operand"          "   rK,   rK,   rK,   rK,   rK,   rK")
> +            (match_operand 5 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
> +            (match_operand 6 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
> +            (match_operand 7 "const_int_operand"              "    i,    i,    i,    i,    i,    i")
>              (reg:SI VL_REGNUM)
>              (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>           (any_extend:VQEXTI
> -           (match_operand:<V_QUAD_TRUNC> 3 "register_operand" "   vr,   vr"))
> -         (match_operand:VQEXTI 2 "vector_merge_operand"       "   vu,    0")))]
> +           (match_operand:<V_QUAD_TRUNC> 3 "register_operand" "  W43,  W43,  W86,  W86,   vr,   vr"))
> +         (match_operand:VQEXTI 2 "vector_merge_operand"       "   vu,    0,   vu,    0,   vu,    0")))]
>    "TARGET_VECTOR"
>    "v<sz>ext.vf4\t%0,%3%p1"
>    [(set_attr "type" "vext")
> -   (set_attr "mode" "<MODE>")])
> +   (set_attr "mode" "<MODE>")
> +   (set_attr "group_overlap" "W43,W43,W86,W86,none,none")])
>
>  ;; Vector Oct-Widening Sign-extend and Zero-extend.
>  (define_insn "@pred_<optab><mode>_vf8"
> -  [(set (match_operand:VOEXTI 0 "register_operand"         "=&vr,&vr")
> +  [(set (match_operand:VOEXTI 0 "register_operand"              "=vr,   vr, ?&vr, ?&vr")
>         (if_then_else:VOEXTI
>           (unspec:<VM>
> -           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
> -            (match_operand 4 "vector_length_operand"         "   rK,   rK")
> -            (match_operand 5 "const_int_operand"             "    i,    i")
> -            (match_operand 6 "const_int_operand"             "    i,    i")
> -            (match_operand 7 "const_int_operand"             "    i,    i")
> +           [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
> +            (match_operand 4 "vector_length_operand"         "   rK,   rK,   rK,   rK")
> +            (match_operand 5 "const_int_operand"             "    i,    i,    i,    i")
> +            (match_operand 6 "const_int_operand"             "    i,    i,    i,    i")
> +            (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
>              (reg:SI VL_REGNUM)
>              (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>           (any_extend:VOEXTI
> -           (match_operand:<V_OCT_TRUNC> 3 "register_operand" "   vr,   vr"))
> -         (match_operand:VOEXTI 2 "vector_merge_operand"      "   vu,    0")))]
> +           (match_operand:<V_OCT_TRUNC> 3 "register_operand" "  W87,  W87,   vr,   vr"))
> +         (match_operand:VOEXTI 2 "vector_merge_operand"      "   vu,    0,   vu,    0")))]
>    "TARGET_VECTOR"
>    "v<sz>ext.vf8\t%0,%3%p1"
>    [(set_attr "type" "vext")
> -   (set_attr "mode" "<MODE>")])
> +   (set_attr "mode" "<MODE>")
> +   (set_attr "group_overlap" "W87,W87,none,none")])
>
>  ;; Vector Widening Add/Subtract/Multiply.
>  (define_insn "@pred_dual_widen_<any_widen_binop:optab><any_extend:su><mode>"
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
> new file mode 100644
> index 00000000000..98f42458883
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c
> @@ -0,0 +1,68 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +size_t __attribute__ ((noinline))
> +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum4,
> +         size_t sum5, size_t sum6, size_t sum7)
> +{
> +  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
> +}
> +
> +size_t
> +foo (char const *buf, size_t len)
> +{
> +  size_t sum = 0;
> +  size_t vl = __riscv_vsetvlmax_e8m8 ();
> +  size_t step = vl * 4;
> +  const char *it = buf, *end = buf + len;
> +  for (; it + step <= end;)
> +    {
> +      vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v4 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v5 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v6 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v7 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +
> +      asm volatile("nop" ::: "memory");
> +      vint32m4_t vw0 = __riscv_vsext_vf4_i32m4 (v0, vl);
> +      vint32m4_t vw1 = __riscv_vsext_vf4_i32m4 (v1, vl);
> +      vint32m4_t vw2 = __riscv_vsext_vf4_i32m4 (v2, vl);
> +      vint32m4_t vw3 = __riscv_vsext_vf4_i32m4 (v3, vl);
> +      vint32m4_t vw4 = __riscv_vsext_vf4_i32m4 (v4, vl);
> +      vint32m4_t vw5 = __riscv_vsext_vf4_i32m4 (v5, vl);
> +      vint32m4_t vw6 = __riscv_vsext_vf4_i32m4 (v6, vl);
> +      vint32m4_t vw7 = __riscv_vsext_vf4_i32m4 (v7, vl);
> +
> +      asm volatile("nop" ::: "memory");
> +      size_t sum0 = __riscv_vmv_x_s_i32m4_i32 (vw0);
> +      size_t sum1 = __riscv_vmv_x_s_i32m4_i32 (vw1);
> +      size_t sum2 = __riscv_vmv_x_s_i32m4_i32 (vw2);
> +      size_t sum3 = __riscv_vmv_x_s_i32m4_i32 (vw3);
> +      size_t sum4 = __riscv_vmv_x_s_i32m4_i32 (vw4);
> +      size_t sum5 = __riscv_vmv_x_s_i32m4_i32 (vw5);
> +      size_t sum6 = __riscv_vmv_x_s_i32m4_i32 (vw6);
> +      size_t sum7 = __riscv_vmv_x_s_i32m4_i32 (vw7);
> +
> +      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7);
> +    }
> +  return sum;
> +}
> +
> +/* { dg-final { scan-assembler-not {vmv1r} } } */
> +/* { dg-final { scan-assembler-not {vmv2r} } } */
> +/* { dg-final { scan-assembler-not {vmv4r} } } */
> +/* { dg-final { scan-assembler-not {vmv8r} } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
> new file mode 100644
> index 00000000000..9b60005344d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +size_t __attribute__ ((noinline))
> +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
> +{
> +  return sum0 + sum1 + sum2 + sum3;
> +}
> +
> +size_t
> +foo (char const *buf, size_t len)
> +{
> +  size_t sum = 0;
> +  size_t vl = __riscv_vsetvlmax_e8m8 ();
> +  size_t step = vl * 4;
> +  const char *it = buf, *end = buf + len;
> +  for (; it + step <= end;)
> +    {
> +      vint8m2_t v0 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> +      it += vl;
> +      vint8m2_t v1 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> +      it += vl;
> +      vint8m2_t v2 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> +      it += vl;
> +      vint8m2_t v3 = __riscv_vle8_v_i8m2 ((void *) it, vl);
> +      it += vl;
> +
> +      asm volatile("nop" ::: "memory");
> +      vint32m8_t vw0 = __riscv_vsext_vf4_i32m8 (v0, vl);
> +      vint32m8_t vw1 = __riscv_vsext_vf4_i32m8 (v1, vl);
> +      vint32m8_t vw2 = __riscv_vsext_vf4_i32m8 (v2, vl);
> +      vint32m8_t vw3 = __riscv_vsext_vf4_i32m8 (v3, vl);
> +
> +      asm volatile("nop" ::: "memory");
> +      size_t sum0 = __riscv_vmv_x_s_i32m8_i32 (vw0);
> +      size_t sum1 = __riscv_vmv_x_s_i32m8_i32 (vw1);
> +      size_t sum2 = __riscv_vmv_x_s_i32m8_i32 (vw2);
> +      size_t sum3 = __riscv_vmv_x_s_i32m8_i32 (vw3);
> +
> +      sum += sumation (sum0, sum1, sum2, sum3);
> +    }
> +  return sum;
> +}
> +
> +/* { dg-final { scan-assembler-not {vmv1r} } } */
> +/* { dg-final { scan-assembler-not {vmv2r} } } */
> +/* { dg-final { scan-assembler-not {vmv4r} } } */
> +/* { dg-final { scan-assembler-not {vmv8r} } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
> new file mode 100644
> index 00000000000..dd65b2fa098
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
> +
> +#include "riscv_vector.h"
> +
> +size_t __attribute__ ((noinline))
> +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3)
> +{
> +  return sum0 + sum1 + sum2 + sum3;
> +}
> +
> +size_t
> +foo (char const *buf, size_t len)
> +{
> +  size_t sum = 0;
> +  size_t vl = __riscv_vsetvlmax_e8m8 ();
> +  size_t step = vl * 4;
> +  const char *it = buf, *end = buf + len;
> +  for (; it + step <= end;)
> +    {
> +      vint8m1_t v0 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v1 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v2 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +      vint8m1_t v3 = __riscv_vle8_v_i8m1 ((void *) it, vl);
> +      it += vl;
> +
> +      asm volatile("nop" ::: "memory");
> +      vint64m8_t vw0 = __riscv_vsext_vf8_i64m8 (v0, vl);
> +      vint64m8_t vw1 = __riscv_vsext_vf8_i64m8 (v1, vl);
> +      vint64m8_t vw2 = __riscv_vsext_vf8_i64m8 (v2, vl);
> +      vint64m8_t vw3 = __riscv_vsext_vf8_i64m8 (v3, vl);
> +
> +      asm volatile("nop" ::: "memory");
> +      size_t sum0 = __riscv_vmv_x_s_i64m8_i64 (vw0);
> +      size_t sum1 = __riscv_vmv_x_s_i64m8_i64 (vw1);
> +      size_t sum2 = __riscv_vmv_x_s_i64m8_i64 (vw2);
> +      size_t sum3 = __riscv_vmv_x_s_i64m8_i64 (vw3);
> +
> +      sum += sumation (sum0, sum1, sum2, sum3);
> +    }
> +  return sum;
> +}
> +
> +/* { dg-final { scan-assembler-not {vmv1r} } } */
> +/* { dg-final { scan-assembler-not {vmv2r} } } */
> +/* { dg-final { scan-assembler-not {vmv4r} } } */
> +/* { dg-final { scan-assembler-not {vmv8r} } } */
> +/* { dg-final { scan-assembler-not {csrr} } } */
> --
> 2.36.3
>

     prev parent reply	other threads:[~2023-11-30  7:08 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-30  6:49 Juzhe-Zhong
2023-11-30  7:08 ` Kito Cheng [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CA+yXCZC=ajUEjWA76Xuevhyv2SAjNJvDQT7F69BkWcFBs3HycA@mail.gmail.com' \
    --to=kito.cheng@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jeffreyalaw@gmail.com \
    --cc=juzhe.zhong@rivai.ai \
    --cc=kito.cheng@sifive.com \
    --cc=rdapp.gcc@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).