From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 6D4D33858C52; Tue, 21 Mar 2023 01:09:33 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 6D4D33858C52 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1679360973; bh=b1P0K9c7UoM2WEjkdAopgWhN6Ea92YRTCyZ4BYSg47I=; h=From:To:Subject:Date:From; b=cnmdaadGYp4d7ZzsebX5kc37CZTJTpa/vUzZ/3G2NNuZRV/lUTBKhMoA2G/2V+HIe AuEN9w8GARVFcER26HCAVBE5FFP9JhkGNNB6qOh0I9Pjk86+quNT4gmtDRXx1fE5Tn 2lNPy3zy+8qUalgOWnqfrmV3jgQwlLvUn4ng9UhU= From: "witold.baryluk+gcc at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug d/109221] New: std.math.floor, core.math.ldexp, std.math.poly poor inlining Date: Tue, 21 Mar 2023 01:09:31 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: new X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: d X-Bugzilla-Version: 13.0 X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: witold.baryluk+gcc at gmail dot com X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: ibuclaw at gdcproject dot org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: bug_id short_desc product version bug_status bug_severity priority component assigned_to reporter target_milestone Message-ID: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D109221 Bug ID: 109221 Summary: std.math.floor, core.math.ldexp, std.math.poly poor inlining Product: gcc Version: 13.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: d Assignee: ibuclaw at gdcproject dot org Reporter: witold.baryluk+gcc at gmail dot com Target Milestone: --- Example: static float sRGB_case4(float x) { // import std.math : exp; return 1.055f * expImpl(x) - 0.055f; // expImpl not inlined by default // (inlined when using pragma(inline, true), but that fails to inline in DMD) } // pragma(inline, true) // This is borrowed from phobos/exponential.d to help gcc inline it fully. // Only T =3D=3D float case is here (as some traits are private to phobos). // Also isNaN and range checks are removed, as sRGB performs own checks. static private T expImpl(T)(T x) @safe pure nothrow @nogc { //import std.math : floatTraits, RealFormat; //import std.math.traits : isNaN; //import std.math.rounding : floor; //import std.math.algebraic : poly; //import std.math.constants : LOG2E; import std.math; import core.math; static immutable T[6] P =3D [ 5.0000001201E-1, 1.6666665459E-1, 4.1665795894E-2, 8.3334519073E-3, 1.3981999507E-3, 1.9875691500E-4, ]; enum T C1 =3D 0.693359375; enum T C2 =3D -2.12194440e-4; // Overflow and Underflow limits. enum T OF =3D 88.72283905206835; enum T UF =3D -103.278929903431851103; // ln(2^-149) // Special cases. //if (isNaN(x)) // return x; //if (x > OF) // return real.infinity; //if (x < UF) // return 0.0; // Express: e^^x =3D e^^g * 2^^n // =3D e^^g * e^^(n * LOG2E) // =3D e^^(g + n * LOG2E) T xx =3D floor((cast(T) LOG2E) * x + cast(T) 0.5); // NOT INLINED! const int n =3D cast(int) xx; x -=3D xx * C1; x -=3D xx * C2; xx =3D x * x; x =3D poly(x, P) * xx + x + 1.0f; // poly is generated optimall= y, but not inlined // Scale by power of 2. x =3D core.math.ldexp(x, n); // NOT INLINED return x; } gdc gdc (Compiler-Explorer-Build-gcc-454a4d5041f53cd1f7d902f6c0017b7ce95b36df-binut= ils-2.38) 13.0.1 20230318 (experimental) gdc -O3 -march=3Dznver2 -frelease -fbounds-check=3Doff pure nothrow @nogc @safe float std.math.algebraic.poly!(float, float, 6).poly(float, ref const(float[6])): vmovss xmm1, DWORD PTR [rdi+20] vfmadd213ss xmm1, xmm0, DWORD PTR [rdi+16] vfmadd213ss xmm1, xmm0, DWORD PTR [rdi+12] vfmadd213ss xmm1, xmm0, DWORD PTR [rdi+8] vfmadd213ss xmm1, xmm0, DWORD PTR [rdi+4] vfmadd213ss xmm0, xmm1, DWORD PTR [rdi] ret pure nothrow @nogc @safe float example.expImpl!(float).expImpl(float): push rbx vmovaps xmm1, xmm0 sub rsp, 16 vmovss xmm0, DWORD PTR .LC0[rip] vfmadd213ss xmm0, xmm1, DWORD PTR .LC1[rip] vmovss DWORD PTR [rsp+8], xmm1 call pure nothrow @nogc @trusted float std.math.rounding.floor(float) vmovss xmm1, DWORD PTR [rsp+8] mov edi, OFFSET FLAT:immutable(float[6]) example.expImpl!(float).expImpl(float).P vfnmadd231ss xmm1, xmm0, DWORD PTR .LC2[rip] vmovss DWORD PTR [rsp+12], xmm0 vfnmadd231ss xmm1, xmm0, DWORD PTR .LC3[rip] vmulss xmm3, xmm1, xmm1 vmovaps xmm0, xmm1 vmovss DWORD PTR [rsp+8], xmm1 vmovd ebx, xmm3 call pure nothrow @nogc @safe float std.math.algebraic.poly!(flo= at, float, 6).poly(float, ref const(float[6])) vmovss xmm1, DWORD PTR [rsp+8] vmovd xmm4, ebx vmovss xmm2, DWORD PTR [rsp+12] vfmadd132ss xmm0, xmm1, xmm4 vaddss xmm0, xmm0, DWORD PTR .LC4[rip] add rsp, 16 pop rbx vcvttss2si edi, xmm2 jmp ldexpf float example.sRGB_case4(float): sub rsp, 8 call pure nothrow @nogc @safe float example.expImpl!(float).expImpl(float) vmovss xmm1, DWORD PTR .LC6[rip] vfmadd132ss xmm0, xmm1, DWORD PTR .LC5[rip] add rsp, 8 ret https://godbolt.org/z/YMoMPdjn5 Additionally std.math.exp itself, is never inlined by gcc. This is important, as some ea= rly checks (isNaN, OF, UF checks) in exp could be removed by proper inlining.=