From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
 id 2F6753858407; Thu, 20 Jan 2022 13:05:43 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 2F6753858407
From: "asd0025 at gmail dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug c++/104145] New: Extra instructions generated for dual float
 return on ARM64.
Date: Thu, 20 Jan 2022 13:05:43 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: new
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: c++
X-Bugzilla-Version: unknown
X-Bugzilla-Keywords: 
X-Bugzilla-Severity: normal
X-Bugzilla-Who: asd0025 at gmail dot com
X-Bugzilla-Status: UNCONFIRMED
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: bug_id short_desc product version bug_status
 bug_severity priority component assigned_to reporter target_milestone
Message-ID: <bug-104145-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
X-BeenThere: gcc-bugs@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-bugs mailing list <gcc-bugs.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-bugs>,
 <mailto:gcc-bugs-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-bugs/>
List-Post: <mailto:gcc-bugs@gcc.gnu.org>
List-Help: <mailto:gcc-bugs-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-bugs>,
 <mailto:gcc-bugs-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Thu, 20 Jan 2022 13:05:43 -0000

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D104145

            Bug ID: 104145
           Summary: Extra instructions generated for dual float return on
                    ARM64.
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: asd0025 at gmail dot com
  Target Milestone: ---

In the following code snippet, inefficient code is generated when returning=
 2
floats on ARM64/AArch64: https://godbolt.org/z/3G8nMT8W4

```
typedef float f32;
typedef double f64;

template <class A, class B =3D A>
struct duo
{
    A a;
    B b;
};

duo<f32> stream_load2(const f32* p)
{
    f32 a, b;
    asm("ldnp %s0, %s1, %2" : "=3Dw"(a), "=3Dw"(b) : "m"(*(const f32(*)[2])=
p));
    return {a, b}; // NOTE: many extra instuctions are generated!
}

duo<f32> stream_load2_ldp(const f32* p)
{
    return {p[0], p[1]}; // NOTE: inefficient code is generated for this!
}

duo<f64> stream_load2(const f64* p)
{
    f64 a, b;
    asm("ldnp %d0, %d1, %2" : "=3Dw"(a), "=3Dw"(b) : "m"(*(const f64(*)[2])=
p));
    return {a, b}; // NOTE: works as expected!
}
```

GCC output (v6.4+):
```
stream_load2(float const*):
        ldnp s1, s0, [x0]
        fmov    w2, s1
        fmov    w0, s0
        mov     x1, 0
        bfi     x1, x2, 0, 32
        bfi     x1, x0, 32, 32
        lsr     x0, x1, 32
        lsr     w1, w1, 0
        fmov    s1, w0
        fmov    s0, w1
        ret

stream_load2_ldp(float const*):
        ldr     d0, [x0]
        fmov    x1, d0
        lsr     x0, x1, 32
        fmov    s1, w0
        lsr     w0, w1, 0
        fmov    s0, w0
        ret

stream_load2(double const*):
        ldnp d0, d1, [x0]
        ret
```

Clang output:
```
stream_load2(float const*):
        ldnp    s0, s1, [x0]
        ret

stream_load2_ldp(float const*):
        ldp     s0, s1, [x0]
        ret

stream_load2(double const*):
        ldnp    d0, d1, [x0]
        ret
```=