From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id D74A53851416; Thu, 28 Jan 2021 15:02:50 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D74A53851416 From: "ktkachov at gcc dot gnu.org" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/98867] New: Failure to use SRI instruction for shift-right-and-insert vector operations Date: Thu, 28 Jan 2021 15:02:50 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: new X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Version: unknown X-Bugzilla-Keywords: missed-optimization X-Bugzilla-Severity: normal X-Bugzilla-Who: ktkachov at gcc dot gnu.org X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: bug_id short_desc product version bug_status keywords bug_severity priority component assigned_to reporter target_milestone cf_gcctarget Message-ID: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 X-BeenThere: gcc-bugs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-bugs mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 28 Jan 2021 15:02:50 -0000 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D98867 Bug ID: 98867 Summary: Failure to use SRI instruction for shift-right-and-insert vector operations Product: gcc Version: unknown Status: UNCONFIRMED Keywords: missed-optimization Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: ktkachov at gcc dot gnu.org Target Milestone: --- Target: aarch64 #define N 1024 unsigned char in[N]; unsigned char out[N]; #define SHIFT 6 void foo (void) { for (int i =3D 0; i < N; i++) { unsigned char mask =3D 255u >> SHIFT; unsigned char shifted =3D in[i] >> SHIFT; out[i] =3D (out[i] & ~mask) | shifted; } } at -O3 generates: foo: adrp x1, .LANCHOR0 add x1, x1, :lo12:.LANCHOR0 movi v2.16b, 0xfffffffffffffffc add x2, x1, 1024 mov x0, 0 .L2: ldr q0, [x1, x0] ldr q1, [x0, x2] and v0.16b, v0.16b, v2.16b ushr v1.16b, v1.16b, 6 orr v0.16b, v0.16b, v1.16b str q0, [x1, x0] add x0, x0, 16 cmp x0, 1024 bne .L2 ret whereas it could use the SRI instruction as clang does (unrolled 2x): foo: // @foo adrp x9, in adrp x10, out mov x8, xzr add x9, x9, :lo12:in add x10, x10, :lo12:out .LBB0_1: // %vector.body add x11, x9, x8 add x12, x10, x8 ldp q0, q1, [x11] ldp q2, q3, [x12] add x8, x8, #32 // =3D32 cmp x8, #1024 // =3D1024 sri v2.16b, v0.16b, #6 sri v3.16b, v1.16b, #6 stp q2, q3, [x12] b.ne .LBB0_1 This may be a bit too complex for combine to match though=