From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 8EC3B3874C29; Wed, 24 Feb 2021 05:23:18 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 8EC3B3874C29 From: "crazylht at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/99228] blend/shuffle Date: Wed, 24 Feb 2021 05:23:18 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Version: unknown X-Bugzilla-Keywords: missed-optimization X-Bugzilla-Severity: normal X-Bugzilla-Who: crazylht at gmail dot com X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 X-BeenThere: gcc-bugs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-bugs mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 24 Feb 2021 05:23:18 -0000 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D99228 --- Comment #3 from Hongtao.liu --- 1. To eliminate branch instructions, -ffast-math needs to be added. 2. Without inline complex sgn, gcc also generate blend/shuffle -std=3Dgnu++20 -Ofast -march=3Dznver2 -mno-vzeroupper #include #include #include #define TYPE double TYPE sgn(const TYPE &arg) { // https://de.wikipedia.org/wiki/Vorzeichenfunktion const TYPE s{copysign (TYPE{1}, arg)}; // v1 return (arg !=3D 0) ? s : 0; // v2 //if (arg !=3D 0) [[likely]] return s; //else return 0; // v3 //return std::conditional_move(arg !=3D 0, s, Type{0}); } TYPE complex_sgn(const std::complex &arg) { // https://en.wikipedia.org/wiki/Sign_function#Complex_signum const TYPE sr{sgn(arg.real())}; const TYPE si{sgn(arg.imag())}; // v1 return (arg.real() !=3D 0) ? sr : si; // v2 //if (arg.real() !=3D 0) [[likely]] return sr; //else ret= urn si; // v3 //return std::conditional_move(arg.real() !=3D 0, sr, si); } int main(const int argc, const char** args) { using value_type =3D TYPE; using complex_type =3D std::complex; if (argc =3D=3D 4) { const value_type a{value_type(std::stod(args[1]))}; const complex_type b{value_type(std::stod(args[2])), value_type(std::stod(args[3])= )}; std::cout << a << std::endl; std::cout << b << std::endl; std::cout << sgn(a) << std::endl; std::cout << complex_sgn(b) << std::endl; } return EXIT_SUCCESS; } assemble code sgn(double const&): vmovsd xmm0, QWORD PTR [rdi] vcomisd xmm0, QWORD PTR .LC0[rip] je .L8 vandpd xmm0, xmm0, XMMWORD PTR .LC1[rip] vorpd xmm0, xmm0, XMMWORD PTR .LC2[rip] .L8: ret complex_sgn(std::complex const&): vmovsd xmm0, QWORD PTR [rdi+8] vmovq xmm4, QWORD PTR .LC1[rip] vxorpd xmm2, xmm2, xmm2 vmovq xmm3, QWORD PTR .LC2[rip] vmovsd xmm1, QWORD PTR [rdi] vmovsd xmm5, xmm0, xmm0 vcmpeq_ussd xmm6, xmm0, xmm2 vandpd xmm5, xmm5, xmm4 vorpd xmm5, xmm5, xmm3 vblendvpd xmm0, xmm5, xmm0, xmm6 vmovsd xmm5, xmm1, xmm1 vandpd xmm5, xmm5, xmm4 vcmpneq_oqsd xmm1, xmm1, xmm2 vorpd xmm5, xmm5, xmm3 vblendvpd xmm0, xmm0, xmm5, xmm1 ret https://godbolt.org/z/cosh93=