From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugzilla@gcc.gnu.org>
Received: by sourceware.org (Postfix, from userid 48)
	id 6D4D33858C52; Tue, 21 Mar 2023 01:09:33 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 6D4D33858C52
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1679360973;
	bh=b1P0K9c7UoM2WEjkdAopgWhN6Ea92YRTCyZ4BYSg47I=;
	h=From:To:Subject:Date:From;
	b=cnmdaadGYp4d7ZzsebX5kc37CZTJTpa/vUzZ/3G2NNuZRV/lUTBKhMoA2G/2V+HIe
	 AuEN9w8GARVFcER26HCAVBE5FFP9JhkGNNB6qOh0I9Pjk86+quNT4gmtDRXx1fE5Tn
	 2lNPy3zy+8qUalgOWnqfrmV3jgQwlLvUn4ng9UhU=
From: "witold.baryluk+gcc at gmail dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug d/109221] New: std.math.floor, core.math.ldexp, std.math.poly
 poor inlining
Date: Tue, 21 Mar 2023 01:09:31 +0000
X-Bugzilla-Reason: CC
X-Bugzilla-Type: new
X-Bugzilla-Watch-Reason: None
X-Bugzilla-Product: gcc
X-Bugzilla-Component: d
X-Bugzilla-Version: 13.0
X-Bugzilla-Keywords: 
X-Bugzilla-Severity: normal
X-Bugzilla-Who: witold.baryluk+gcc at gmail dot com
X-Bugzilla-Status: UNCONFIRMED
X-Bugzilla-Resolution: 
X-Bugzilla-Priority: P3
X-Bugzilla-Assigned-To: ibuclaw at gdcproject dot org
X-Bugzilla-Target-Milestone: ---
X-Bugzilla-Flags: 
X-Bugzilla-Changed-Fields: bug_id short_desc product version bug_status
 bug_severity priority component assigned_to reporter target_milestone
Message-ID: <bug-109221-4@http.gcc.gnu.org/bugzilla/>
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/
Auto-Submitted: auto-generated
MIME-Version: 1.0
List-Id: <gcc-bugs.sourceware.org>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D109221

            Bug ID: 109221
           Summary: std.math.floor, core.math.ldexp, std.math.poly poor
                    inlining
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: d
          Assignee: ibuclaw at gdcproject dot org
          Reporter: witold.baryluk+gcc at gmail dot com
  Target Milestone: ---

Example:

static float sRGB_case4(float x) {
    // import std.math : exp;
    return 1.055f * expImpl(x) - 0.055f;  // expImpl not inlined by default
    // (inlined when using pragma(inline, true), but that fails to inline in
DMD)
}


// pragma(inline, true)
// This is borrowed from phobos/exponential.d to help gcc inline it fully.
// Only T =3D=3D float case is here (as some traits are private to phobos).
// Also isNaN and range checks are removed, as sRGB performs own checks.
static private T expImpl(T)(T x) @safe pure nothrow @nogc
{
    //import std.math : floatTraits, RealFormat;
    //import std.math.traits : isNaN;
    //import std.math.rounding : floor;
    //import std.math.algebraic : poly;
    //import std.math.constants : LOG2E;
    import std.math;
    import core.math;

        static immutable T[6] P =3D [
            5.0000001201E-1,
            1.6666665459E-1,
            4.1665795894E-2,
            8.3334519073E-3,
            1.3981999507E-3,
            1.9875691500E-4,
        ];

        enum T C1 =3D 0.693359375;
        enum T C2 =3D -2.12194440e-4;

        // Overflow and Underflow limits.
        enum T OF =3D 88.72283905206835;
        enum T UF =3D -103.278929903431851103; // ln(2^-149)

    // Special cases.
    //if (isNaN(x))
    //    return x;
    //if (x > OF)
    //    return real.infinity;
    //if (x < UF)
    //    return 0.0;

    // Express: e^^x =3D e^^g * 2^^n
    //   =3D e^^g * e^^(n * LOG2E)
    //   =3D e^^(g + n * LOG2E)
    T xx =3D floor((cast(T) LOG2E) * x + cast(T) 0.5);   // NOT INLINED!
    const int n =3D cast(int) xx;
    x -=3D xx * C1;
    x -=3D xx * C2;

        xx =3D x * x;
        x =3D poly(x, P) * xx + x + 1.0f;     // poly is generated optimall=
y, but
not inlined

    // Scale by power of 2.
    x =3D core.math.ldexp(x, n);    // NOT INLINED

    return x;
}


gdc gdc
(Compiler-Explorer-Build-gcc-454a4d5041f53cd1f7d902f6c0017b7ce95b36df-binut=
ils-2.38)
13.0.1 20230318 (experimental)
gdc -O3 -march=3Dznver2 -frelease -fbounds-check=3Doff


pure nothrow @nogc @safe float std.math.algebraic.poly!(float, float,
6).poly(float, ref const(float[6])):
        vmovss  xmm1, DWORD PTR [rdi+20]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+16]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+12]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+8]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+4]
        vfmadd213ss     xmm0, xmm1, DWORD PTR [rdi]
        ret
pure nothrow @nogc @safe float example.expImpl!(float).expImpl(float):
        push    rbx
        vmovaps xmm1, xmm0
        sub     rsp, 16
        vmovss  xmm0, DWORD PTR .LC0[rip]
        vfmadd213ss     xmm0, xmm1, DWORD PTR .LC1[rip]
        vmovss  DWORD PTR [rsp+8], xmm1
        call    pure nothrow @nogc @trusted float
std.math.rounding.floor(float)
        vmovss  xmm1, DWORD PTR [rsp+8]
        mov     edi, OFFSET FLAT:immutable(float[6])
example.expImpl!(float).expImpl(float).P
        vfnmadd231ss    xmm1, xmm0, DWORD PTR .LC2[rip]
        vmovss  DWORD PTR [rsp+12], xmm0
        vfnmadd231ss    xmm1, xmm0, DWORD PTR .LC3[rip]
        vmulss  xmm3, xmm1, xmm1
        vmovaps xmm0, xmm1
        vmovss  DWORD PTR [rsp+8], xmm1
        vmovd   ebx, xmm3
        call    pure nothrow @nogc @safe float std.math.algebraic.poly!(flo=
at,
float, 6).poly(float, ref const(float[6]))
        vmovss  xmm1, DWORD PTR [rsp+8]
        vmovd   xmm4, ebx
        vmovss  xmm2, DWORD PTR [rsp+12]
        vfmadd132ss     xmm0, xmm1, xmm4
        vaddss  xmm0, xmm0, DWORD PTR .LC4[rip]
        add     rsp, 16
        pop     rbx
        vcvttss2si      edi, xmm2
        jmp     ldexpf
float example.sRGB_case4(float):
        sub     rsp, 8
        call    pure nothrow @nogc @safe float
example.expImpl!(float).expImpl(float)
        vmovss  xmm1, DWORD PTR .LC6[rip]
        vfmadd132ss     xmm0, xmm1, DWORD PTR .LC5[rip]
        add     rsp, 8
        ret


https://godbolt.org/z/YMoMPdjn5


Additionally

std.math.exp itself, is never inlined by gcc. This is important, as some ea=
rly
checks (isNaN, OF, UF checks) in exp could be removed by proper inlining.=