From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id F3358385801F; Thu, 30 Nov 2023 12:11:43 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org F3358385801F DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1701346304; bh=Jhe9qoy97bf0EyLB3JQCUyD2afv9g4qou7sEx+8/nCs=; h=From:To:Subject:Date:In-Reply-To:References:From; b=JQM0IgaPX/RqAa5xLHAMrsIsgLTxhYw6K1O/p/72+n5wvknr3kf2ANjZKhXNzLETU +O8UpUUf7Si0BlwlDCNfmGPUeUBN2vlQ1kyQgETl1gxpN4S4Kl7N0T3wuU02W9HFfz tL09VZ4P4yHkqRKMuDyWoIMhEuvDpPhjQgC4EWsE= From: "cvs-commit at gcc dot gnu.org" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/112431] RISC-V GCC-15 feature: Support register overlap on widen RVV instructions Date: Thu, 30 Nov 2023 12:11:43 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Version: 14.0 X-Bugzilla-Keywords: missed-optimization X-Bugzilla-Severity: enhancement X-Bugzilla-Who: cvs-commit at gcc dot gnu.org X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D112431 --- Comment #8 from GCC Commits --- The master branch has been updated by Pan Li : https://gcc.gnu.org/g:303195e2a6b6f0e8f42e0578b61f9f37c6250beb commit r14-6008-g303195e2a6b6f0e8f42e0578b61f9f37c6250beb Author: Juzhe-Zhong Date: Thu Nov 30 20:08:43 2023 +0800 RISC-V: Support widening register overlap for vf4/vf8 size_t foo (char const *buf, size_t len) { size_t sum =3D 0; size_t vl =3D __riscv_vsetvlmax_e8m8 (); size_t step =3D vl * 4; const char *it =3D buf, *end =3D buf + len; for (; it + step <=3D end;) { vint8m1_t v0 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); it +=3D vl; vint8m1_t v1 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); it +=3D vl; vint8m1_t v2 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); it +=3D vl; vint8m1_t v3 =3D __riscv_vle8_v_i8m1 ((void *) it, vl); it +=3D vl; asm volatile("nop" ::: "memory"); vint64m8_t vw0 =3D __riscv_vsext_vf8_i64m8 (v0, vl); vint64m8_t vw1 =3D __riscv_vsext_vf8_i64m8 (v1, vl); vint64m8_t vw2 =3D __riscv_vsext_vf8_i64m8 (v2, vl); vint64m8_t vw3 =3D __riscv_vsext_vf8_i64m8 (v3, vl); asm volatile("nop" ::: "memory"); size_t sum0 =3D __riscv_vmv_x_s_i64m8_i64 (vw0); size_t sum1 =3D __riscv_vmv_x_s_i64m8_i64 (vw1); size_t sum2 =3D __riscv_vmv_x_s_i64m8_i64 (vw2); size_t sum3 =3D __riscv_vmv_x_s_i64m8_i64 (vw3); sum +=3D sumation (sum0, sum1, sum2, sum3); } return sum; } Before this patch: add a3,s0,s1 add a4,s6,s1 add a5,s7,s1 vsetvli zero,s0,e64,m8,ta,ma vle8.v v4,0(s1) vle8.v v3,0(a3) mv s1,s2 vle8.v v2,0(a4) vle8.v v1,0(a5) nop vsext.vf8 v8,v4 vsext.vf8 v16,v2 vs8r.v v8,0(sp) vsext.vf8 v24,v1 vsext.vf8 v8,v3 nop vmv.x.s a1,v8 vl8re64.v v8,0(sp) vmv.x.s a3,v24 vmv.x.s a2,v16 vmv.x.s a0,v8 add s2,s2,s5 call sumation add s3,s3,a0 bgeu s4,s2,.L5 After this patch: add a3,s0,s1 add a4,s6,s1 add a5,s7,s1 vsetvli zero,s0,e64,m8,ta,ma vle8.v v15,0(s1) vle8.v v23,0(a3) mv s1,s2 vle8.v v31,0(a4) vle8.v v7,0(a5) vsext.vf8 v8,v15 vsext.vf8 v16,v23 vsext.vf8 v24,v31 vsext.vf8 v0,v7 vmv.x.s a3,v0 vmv.x.s a2,v24 vmv.x.s a1,v16 vmv.x.s a0,v8 add s2,s2,s5 call sumation add s3,s3,a0 bgeu s4,s2,.L5 PR target/112431 gcc/ChangeLog: * config/riscv/vector.md: Add widening overlap of vf2/vf4. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112431-16.c: New test. * gcc.target/riscv/rvv/base/pr112431-17.c: New test. * gcc.target/riscv/rvv/base/pr112431-18.c: New test.=