From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ej1-x632.google.com (mail-ej1-x632.google.com [IPv6:2a00:1450:4864:20::632]) by sourceware.org (Postfix) with ESMTPS id B69443858D32 for ; Thu, 30 Nov 2023 07:08:42 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org B69443858D32 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org B69443858D32 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=2a00:1450:4864:20::632 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1701328125; cv=none; b=CmDn4NB8s+rdVcFCAvBaXsyZjLXL9erVF1f6cRLBzqqrdfjDsDfdaT82lhr+hQbhBEUNLczCsmXjPKa5txQslt2+HlSuau0C4AjHL7SjV1QOCskV2+NqTVbAjqbJLiRckSZqAvU7lME1hK76Iym1yJINwGue40bw/QNBvc/O+7A= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1701328125; c=relaxed/simple; bh=7tRtpA7eh+WLhEs1Ekf9Q3KMByVhlnimTdNxDeQASaM=; h=DKIM-Signature:MIME-Version:From:Date:Message-ID:Subject:To; b=rXO78m4Afl2sMMXCeu2hHgeSHVeQDwFRvw+Vy1akJ4z6/wbgudRvMFTGzJVquivL9P1n7bm2qkdvbLiaL+ogaVhljaVLG0hIpfJrL00WtLQjXnP8BiDux5R4u1+x0SnddevIPFdpuMHg3eE/iIRFepgknPT/6qaSEx9ITbxEKj8= ARC-Authentication-Results: i=1; server2.sourceware.org Received: by mail-ej1-x632.google.com with SMTP id a640c23a62f3a-a132acb67b5so77547866b.3 for ; Wed, 29 Nov 2023 23:08:42 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1701328121; x=1701932921; darn=gcc.gnu.org; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:from:to:cc:subject:date :message-id:reply-to; bh=Nmrmo5a2m/256KLYgiDAB7oUMZm7pMZygB+QpODiJgk=; b=Rag9kcVE9stmnvAsFLqA4k+472zbI2WwOmvINIwjSP/KHEDv1bHPwC9tmnZart5/VS Jj9T+YXWip3gVzDFyLA7mzhWlq6uxHBN2qt9o4Vlbkcv/mrP4h1Ifaw3iBt6s73N/N/x O2MWon0q+9mmsU+FcCKIUmOK0igE8oz07eAPAWDAoWUdE31EVmUtlDotrNIWJcDvPZC+ VbJ5pv65okaoCrL7xhrGsJi/+28RpdhjFASLH8LtS0jBIvD/IE3HpbsYTh10HprOFFtD 4Lo3KV3cibgWkFmGHjvlr1NSDnZW2Oo5J4DaodJrSN1nbNHjj2dLOUfNvSHeA0e4L5fe FHpw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1701328121; x=1701932921; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=Nmrmo5a2m/256KLYgiDAB7oUMZm7pMZygB+QpODiJgk=; b=vvfH7M4/IlcA2w8lVTDPBBLKL7eI5D3gdSkVACZ8xa3G0lgkyJNTKzQQBMZFGRl7Us dKh0cAmtS8Ut5M1TnkKtw1P85RY+pK6TSBvBy2JLfr8FG3hZMeUwyFFtKfNfmWVFeish fBOkiHhEpfuKTgCDTWC0gqm+aGIxEzzabej77AqHarb5R+dQYzitJ7rOCOWiPnqyVfJ6 vzMsfuzcozDlfS7GfcmNQ4AcZJ2AnheZdepaiLtX5QBgjs9HwrjyV6OrodpnM6Iys65w yNZ6ZqkpMpLyyl3BAtA61x168In04OxX2t7KIN+lI3/nAvZNbYQcQU6FjCK3oaPUxbNu cwrw== X-Gm-Message-State: AOJu0YxVrbUF7zLxNIPhox2e9OJNs1VXYhfIpVDeUTCrSbtMEvA0rZPH LWb8Ikn5vO3cSKHht0yhZ40+nB6OPsxLnJ+moTE= X-Google-Smtp-Source: AGHT+IFPFBa0nYOzTEx0mznUGvXiAGEBF81jEudkc3t7b+nYmZ9waTAIjEtNMTAGBYO1eEs7I1+jaTcp4RRC7jS5BuY= X-Received: by 2002:a17:906:8b:b0:a18:a973:e5d7 with SMTP id 11-20020a170906008b00b00a18a973e5d7mr416485ejc.34.1701328120650; Wed, 29 Nov 2023 23:08:40 -0800 (PST) MIME-Version: 1.0 References: <20231130064905.2716758-1-juzhe.zhong@rivai.ai> In-Reply-To: <20231130064905.2716758-1-juzhe.zhong@rivai.ai> From: Kito Cheng Date: Thu, 30 Nov 2023 15:08:27 +0800 Message-ID: Subject: Re: [PATCH] RISC-V: Support widening register overlap for vf4/vf8 To: Juzhe-Zhong Cc: gcc-patches@gcc.gnu.org, kito.cheng@sifive.com, jeffreyalaw@gmail.com, rdapp.gcc@gmail.com Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Spam-Status: No, score=-11.7 required=5.0 tests=DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,GIT_PATCH_0,KAM_NUMSUBJECT,KAM_SHORT,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,T_SCC_BODY_TEXT_LINE autolearn=unavailable autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: LGTM, thanks :) On Thu, Nov 30, 2023 at 2:49=E2=80=AFPM Juzhe-Zhong = wrote: > > > size_t > foo (char const *buf, size_t len) > { > size_t sum =3D 0; > size_t vl =3D __riscv_vsetvlmax_e8m8 (); > size_t step =3D vl * 4; > const char *it =3D buf, *end =3D buf + len; > for (; it + step <=3D end;) > { > vint8m1_t v0 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > it +=3D vl; > vint8m1_t v1 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > it +=3D vl; > vint8m1_t v2 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > it +=3D vl; > vint8m1_t v3 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > it +=3D vl; > > asm volatile("nop" ::: "memory"); > vint64m8_t vw0 =3D __riscv_vsext_vf8_i64m8 (v0, vl); > vint64m8_t vw1 =3D __riscv_vsext_vf8_i64m8 (v1, vl); > vint64m8_t vw2 =3D __riscv_vsext_vf8_i64m8 (v2, vl); > vint64m8_t vw3 =3D __riscv_vsext_vf8_i64m8 (v3, vl); > > asm volatile("nop" ::: "memory"); > size_t sum0 =3D __riscv_vmv_x_s_i64m8_i64 (vw0); > size_t sum1 =3D __riscv_vmv_x_s_i64m8_i64 (vw1); > size_t sum2 =3D __riscv_vmv_x_s_i64m8_i64 (vw2); > size_t sum3 =3D __riscv_vmv_x_s_i64m8_i64 (vw3); > > sum +=3D sumation (sum0, sum1, sum2, sum3); > } > return sum; > } > > Before this patch: > > add a3,s0,s1 > add a4,s6,s1 > add a5,s7,s1 > vsetvli zero,s0,e64,m8,ta,ma > vle8.v v4,0(s1) > vle8.v v3,0(a3) > mv s1,s2 > vle8.v v2,0(a4) > vle8.v v1,0(a5) > nop > vsext.vf8 v8,v4 > vsext.vf8 v16,v2 > vs8r.v v8,0(sp) > vsext.vf8 v24,v1 > vsext.vf8 v8,v3 > nop > vmv.x.s a1,v8 > vl8re64.v v8,0(sp) > vmv.x.s a3,v24 > vmv.x.s a2,v16 > vmv.x.s a0,v8 > add s2,s2,s5 > call sumation > add s3,s3,a0 > bgeu s4,s2,.L5 > > After this patch: > > add a3,s0,s1 > add a4,s6,s1 > add a5,s7,s1 > vsetvli zero,s0,e64,m8,ta,ma > vle8.v v15,0(s1) > vle8.v v23,0(a3) > mv s1,s2 > vle8.v v31,0(a4) > vle8.v v7,0(a5) > vsext.vf8 v8,v15 > vsext.vf8 v16,v23 > vsext.vf8 v24,v31 > vsext.vf8 v0,v7 > vmv.x.s a3,v0 > vmv.x.s a2,v24 > vmv.x.s a1,v16 > vmv.x.s a0,v8 > add s2,s2,s5 > call sumation > add s3,s3,a0 > bgeu s4,s2,.L5 > > PR target/112431 > > gcc/ChangeLog: > > * config/riscv/vector.md: Add widening overlap of vf2/vf4. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/pr112431-16.c: New test. > * gcc.target/riscv/rvv/base/pr112431-17.c: New test. > * gcc.target/riscv/rvv/base/pr112431-18.c: New test. > > --- > gcc/config/riscv/vector.md | 38 ++++++----- > .../gcc.target/riscv/rvv/base/pr112431-16.c | 68 +++++++++++++++++++ > .../gcc.target/riscv/rvv/base/pr112431-17.c | 51 ++++++++++++++ > .../gcc.target/riscv/rvv/base/pr112431-18.c | 51 ++++++++++++++ > 4 files changed, 190 insertions(+), 18 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c > > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index 6b891c11324..e5d62c6e58b 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -3704,43 +3704,45 @@ > > ;; Vector Quad-Widening Sign-extend and Zero-extend. > (define_insn "@pred__vf4" > - [(set (match_operand:VQEXTI 0 "register_operand" "=3D&vr,&vr"= ) > + [(set (match_operand:VQEXTI 0 "register_operand" "=3Dvr,= vr, vr, vr, ?&vr, ?&vr") > (if_then_else:VQEXTI > (unspec: > - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmW= c1") > - (match_operand 4 "vector_length_operand" " rK, = rK") > - (match_operand 5 "const_int_operand" " i, = i") > - (match_operand 6 "const_int_operand" " i, = i") > - (match_operand 7 "const_int_operand" " i, = i") > + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmW= c1,vmWc1,vmWc1,vmWc1,vmWc1") > + (match_operand 4 "vector_length_operand" " rK, = rK, rK, rK, rK, rK") > + (match_operand 5 "const_int_operand" " i, = i, i, i, i, i") > + (match_operand 6 "const_int_operand" " i, = i, i, i, i, i") > + (match_operand 7 "const_int_operand" " i, = i, i, i, i, i") > (reg:SI VL_REGNUM) > (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > (any_extend:VQEXTI > - (match_operand: 3 "register_operand" " vr, = vr")) > - (match_operand:VQEXTI 2 "vector_merge_operand" " vu, = 0")))] > + (match_operand: 3 "register_operand" " W43, W= 43, W86, W86, vr, vr")) > + (match_operand:VQEXTI 2 "vector_merge_operand" " vu, = 0, vu, 0, vu, 0")))] > "TARGET_VECTOR" > "vext.vf4\t%0,%3%p1" > [(set_attr "type" "vext") > - (set_attr "mode" "")]) > + (set_attr "mode" "") > + (set_attr "group_overlap" "W43,W43,W86,W86,none,none")]) > > ;; Vector Oct-Widening Sign-extend and Zero-extend. > (define_insn "@pred__vf8" > - [(set (match_operand:VOEXTI 0 "register_operand" "=3D&vr,&vr") > + [(set (match_operand:VOEXTI 0 "register_operand" "=3Dvr, = vr, ?&vr, ?&vr") > (if_then_else:VOEXTI > (unspec: > - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc= 1") > - (match_operand 4 "vector_length_operand" " rK, r= K") > - (match_operand 5 "const_int_operand" " i, = i") > - (match_operand 6 "const_int_operand" " i, = i") > - (match_operand 7 "const_int_operand" " i, = i") > + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc= 1,vmWc1,vmWc1") > + (match_operand 4 "vector_length_operand" " rK, r= K, rK, rK") > + (match_operand 5 "const_int_operand" " i, = i, i, i") > + (match_operand 6 "const_int_operand" " i, = i, i, i") > + (match_operand 7 "const_int_operand" " i, = i, i, i") > (reg:SI VL_REGNUM) > (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > (any_extend:VOEXTI > - (match_operand: 3 "register_operand" " vr, v= r")) > - (match_operand:VOEXTI 2 "vector_merge_operand" " vu, = 0")))] > + (match_operand: 3 "register_operand" " W87, W8= 7, vr, vr")) > + (match_operand:VOEXTI 2 "vector_merge_operand" " vu, = 0, vu, 0")))] > "TARGET_VECTOR" > "vext.vf8\t%0,%3%p1" > [(set_attr "type" "vext") > - (set_attr "mode" "")]) > + (set_attr "mode" "") > + (set_attr "group_overlap" "W87,W87,none,none")]) > > ;; Vector Widening Add/Subtract/Multiply. > (define_insn "@pred_dual_widen_" > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c b/gcc/= testsuite/gcc.target/riscv/rvv/base/pr112431-16.c > new file mode 100644 > index 00000000000..98f42458883 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-16.c > @@ -0,0 +1,68 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=3Drv64gcv -mabi=3Dlp64d -O3" } */ > + > +#include "riscv_vector.h" > + > +size_t __attribute__ ((noinline)) > +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3, size_t sum= 4, > + size_t sum5, size_t sum6, size_t sum7) > +{ > + return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7; > +} > + > +size_t > +foo (char const *buf, size_t len) > +{ > + size_t sum =3D 0; > + size_t vl =3D __riscv_vsetvlmax_e8m8 (); > + size_t step =3D vl * 4; > + const char *it =3D buf, *end =3D buf + len; > + for (; it + step <=3D end;) > + { > + vint8m1_t v0 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + vint8m1_t v1 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + vint8m1_t v2 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + vint8m1_t v3 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + vint8m1_t v4 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + vint8m1_t v5 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + vint8m1_t v6 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + vint8m1_t v7 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + > + asm volatile("nop" ::: "memory"); > + vint32m4_t vw0 =3D __riscv_vsext_vf4_i32m4 (v0, vl); > + vint32m4_t vw1 =3D __riscv_vsext_vf4_i32m4 (v1, vl); > + vint32m4_t vw2 =3D __riscv_vsext_vf4_i32m4 (v2, vl); > + vint32m4_t vw3 =3D __riscv_vsext_vf4_i32m4 (v3, vl); > + vint32m4_t vw4 =3D __riscv_vsext_vf4_i32m4 (v4, vl); > + vint32m4_t vw5 =3D __riscv_vsext_vf4_i32m4 (v5, vl); > + vint32m4_t vw6 =3D __riscv_vsext_vf4_i32m4 (v6, vl); > + vint32m4_t vw7 =3D __riscv_vsext_vf4_i32m4 (v7, vl); > + > + asm volatile("nop" ::: "memory"); > + size_t sum0 =3D __riscv_vmv_x_s_i32m4_i32 (vw0); > + size_t sum1 =3D __riscv_vmv_x_s_i32m4_i32 (vw1); > + size_t sum2 =3D __riscv_vmv_x_s_i32m4_i32 (vw2); > + size_t sum3 =3D __riscv_vmv_x_s_i32m4_i32 (vw3); > + size_t sum4 =3D __riscv_vmv_x_s_i32m4_i32 (vw4); > + size_t sum5 =3D __riscv_vmv_x_s_i32m4_i32 (vw5); > + size_t sum6 =3D __riscv_vmv_x_s_i32m4_i32 (vw6); > + size_t sum7 =3D __riscv_vmv_x_s_i32m4_i32 (vw7); > + > + sum +=3D sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7)= ; > + } > + return sum; > +} > + > +/* { dg-final { scan-assembler-not {vmv1r} } } */ > +/* { dg-final { scan-assembler-not {vmv2r} } } */ > +/* { dg-final { scan-assembler-not {vmv4r} } } */ > +/* { dg-final { scan-assembler-not {vmv8r} } } */ > +/* { dg-final { scan-assembler-not {csrr} } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c b/gcc/= testsuite/gcc.target/riscv/rvv/base/pr112431-17.c > new file mode 100644 > index 00000000000..9b60005344d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-17.c > @@ -0,0 +1,51 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=3Drv64gcv -mabi=3Dlp64d -O3" } */ > + > +#include "riscv_vector.h" > + > +size_t __attribute__ ((noinline)) > +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3) > +{ > + return sum0 + sum1 + sum2 + sum3; > +} > + > +size_t > +foo (char const *buf, size_t len) > +{ > + size_t sum =3D 0; > + size_t vl =3D __riscv_vsetvlmax_e8m8 (); > + size_t step =3D vl * 4; > + const char *it =3D buf, *end =3D buf + len; > + for (; it + step <=3D end;) > + { > + vint8m2_t v0 =3D __riscv_vle8_v_i8m2 ((void *) it, vl); > + it +=3D vl; > + vint8m2_t v1 =3D __riscv_vle8_v_i8m2 ((void *) it, vl); > + it +=3D vl; > + vint8m2_t v2 =3D __riscv_vle8_v_i8m2 ((void *) it, vl); > + it +=3D vl; > + vint8m2_t v3 =3D __riscv_vle8_v_i8m2 ((void *) it, vl); > + it +=3D vl; > + > + asm volatile("nop" ::: "memory"); > + vint32m8_t vw0 =3D __riscv_vsext_vf4_i32m8 (v0, vl); > + vint32m8_t vw1 =3D __riscv_vsext_vf4_i32m8 (v1, vl); > + vint32m8_t vw2 =3D __riscv_vsext_vf4_i32m8 (v2, vl); > + vint32m8_t vw3 =3D __riscv_vsext_vf4_i32m8 (v3, vl); > + > + asm volatile("nop" ::: "memory"); > + size_t sum0 =3D __riscv_vmv_x_s_i32m8_i32 (vw0); > + size_t sum1 =3D __riscv_vmv_x_s_i32m8_i32 (vw1); > + size_t sum2 =3D __riscv_vmv_x_s_i32m8_i32 (vw2); > + size_t sum3 =3D __riscv_vmv_x_s_i32m8_i32 (vw3); > + > + sum +=3D sumation (sum0, sum1, sum2, sum3); > + } > + return sum; > +} > + > +/* { dg-final { scan-assembler-not {vmv1r} } } */ > +/* { dg-final { scan-assembler-not {vmv2r} } } */ > +/* { dg-final { scan-assembler-not {vmv4r} } } */ > +/* { dg-final { scan-assembler-not {vmv8r} } } */ > +/* { dg-final { scan-assembler-not {csrr} } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c b/gcc/= testsuite/gcc.target/riscv/rvv/base/pr112431-18.c > new file mode 100644 > index 00000000000..dd65b2fa098 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-18.c > @@ -0,0 +1,51 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=3Drv64gcv -mabi=3Dlp64d -O3" } */ > + > +#include "riscv_vector.h" > + > +size_t __attribute__ ((noinline)) > +sumation (size_t sum0, size_t sum1, size_t sum2, size_t sum3) > +{ > + return sum0 + sum1 + sum2 + sum3; > +} > + > +size_t > +foo (char const *buf, size_t len) > +{ > + size_t sum =3D 0; > + size_t vl =3D __riscv_vsetvlmax_e8m8 (); > + size_t step =3D vl * 4; > + const char *it =3D buf, *end =3D buf + len; > + for (; it + step <=3D end;) > + { > + vint8m1_t v0 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + vint8m1_t v1 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + vint8m1_t v2 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + vint8m1_t v3 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); > + it +=3D vl; > + > + asm volatile("nop" ::: "memory"); > + vint64m8_t vw0 =3D __riscv_vsext_vf8_i64m8 (v0, vl); > + vint64m8_t vw1 =3D __riscv_vsext_vf8_i64m8 (v1, vl); > + vint64m8_t vw2 =3D __riscv_vsext_vf8_i64m8 (v2, vl); > + vint64m8_t vw3 =3D __riscv_vsext_vf8_i64m8 (v3, vl); > + > + asm volatile("nop" ::: "memory"); > + size_t sum0 =3D __riscv_vmv_x_s_i64m8_i64 (vw0); > + size_t sum1 =3D __riscv_vmv_x_s_i64m8_i64 (vw1); > + size_t sum2 =3D __riscv_vmv_x_s_i64m8_i64 (vw2); > + size_t sum3 =3D __riscv_vmv_x_s_i64m8_i64 (vw3); > + > + sum +=3D sumation (sum0, sum1, sum2, sum3); > + } > + return sum; > +} > + > +/* { dg-final { scan-assembler-not {vmv1r} } } */ > +/* { dg-final { scan-assembler-not {vmv2r} } } */ > +/* { dg-final { scan-assembler-not {vmv4r} } } */ > +/* { dg-final { scan-assembler-not {vmv8r} } } */ > +/* { dg-final { scan-assembler-not {csrr} } } */ > -- > 2.36.3 >