From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id A09DE38582A3; Sat, 12 Nov 2022 18:06:05 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org A09DE38582A3 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1668276365; bh=KPF54uGnsNoWfchwk2alMwn0EhPrljI7nh8T12i7UMs=; h=From:To:Subject:Date:From; b=bK8Q0pqgNM/vsByyVhXZWDtra+GhutK8kOrOIhWqGwRJWpIEFro0g67aEN4QTQL4/ em1rC/q2MN8uaVdnkpAkFopq3IO9mbMJSahIDv1rfUQg21P8fONuScG5c4wXMYLL0Z iHD78bUhO8Kr1b3bIAUhVPwCPkS1r0mRTYYSvNxI= From: "slyfox at gcc dot gnu.org" To: gcc-bugs@gcc.gnu.org Subject: [Bug middle-end/107661] New: [13 Regression] lambdas get merged incorrectly in tempaltes, cause llvm-12 miscompilation Date: Sat, 12 Nov 2022 18:06:04 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: new X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: middle-end X-Bugzilla-Version: 13.0 X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: slyfox at gcc dot gnu.org X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: bug_id short_desc product version bug_status bug_severity priority component assigned_to reporter target_milestone attachments.created Message-ID: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D107661 Bug ID: 107661 Summary: [13 Regression] lambdas get merged incorrectly in tempaltes, cause llvm-12 miscompilation Product: gcc Version: 13.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: middle-end Assignee: unassigned at gcc dot gnu.org Reporter: slyfox at gcc dot gnu.org Target Milestone: --- Created attachment 53888 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=3D53888&action=3Dedit a.cc Initially observed the problem on llvm-12's test suite where 4 AMDGCN test fail: Failed Tests (4): LLVM :: CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll LLVM :: CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll LLVM :: CodeGen/AMDGPU/smem-war-hazard.mir LLVM :: CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll Digging deeper it looks liek llvm's class template class function_ref ... gets miscompiled in a very unusual way. I extracted smaller a.cc reproducer. It looks like as if gcc picked wrong (unused) lambda to inline into actually used code. Reproducing: $ ./gcc-13-HEAD/bin/gcc -Wall -O0 a.cc -o a $ ./gcc-13-HEAD/bin/gcc -Wall -O3 a.cc -o a ./bug_HEAD.bash: line 6: 1309437 Illegal instruction (core dumped) ./a $ ./gcc-13-HEAD/bin/gcc -Wall -O0 -DDISABLE_HACK a.cc -o a $ ./gcc-13-HEAD/bin/gcc -Wall -O3 -DDISABLE_HACK a.cc -o a $ ./gcc-13-HEAD/bin/gcc -v |& unnix Using built-in specs. COLLECT_GCC=3D/<>/gcc-13.0.0/bin/gcc COLLECT_LTO_WRAPPER=3D/<>/gcc-13.0.0/libexec/gcc/x86_64-unknown-linux-= gnu/13.0.0/lto-wrapper Target: x86_64-unknown-linux-gnu Configured with: Thread model: posix Supported LTO compression algorithms: zlib gcc version 13.0.0 20221112 (experimental) (GCC) Full a.cc example (somewhat long, also attached): /// 'function_ref' is taken from llvm-12 as is without any modifications. /// The rest if severely maimed AMDGCN hasard verifier code. // How to break: // $ ./gcc-13-snap/bin/gcc -O3 a.cc -o a && ./a // Illegal instruction (core dumped) // $ ./gcc-13-snap/bin/gcc -O3 -DDISABLE_HACK a.cc -o a && ./a // #pragma GCC optimize "-O1" #pragma GCC optimize "-fipa-cp" #pragma GCC optimize "-fipa-cp-clone" // #define DISABLE_HACK 1 #include #include #include #include /// An efficient, type-erasing, non-owning reference to a callable. This is /// intended for use as the type of a function parameter that is not used /// after the function in question returns. /// /// This class does not own the callable, so it is not in general safe to s= tore /// a function_ref. template class function_ref; template class function_ref { Ret (*callback)(intptr_t callable, Params ...params) =3D nullptr; intptr_t callable; template //__attribute__((noinline, noipa)) static Ret callback_fn(intptr_t callable, Params ...params) { return (*reinterpret_cast(callable))( std::forward(params)...); } public: __attribute__((noinline, noipa)) function_ref() =3D default; __attribute__((noinline, noipa)) function_ref(std::nullptr_t) {} template //__attribute__((noinline, noipa)) function_ref( Callable &&callable, // This is not the copy-constructor. std::enable_if_t< !std::is_same>, function_ref>::value> * =3D nullptr, // Functor must be callable and return a suitable type. std::enable_if_t::value || std::is_convertible(= )( std::declval()...)), Ret>::value> * =3D nullptr) : callback(callback_fn::type= >), callable(reinterpret_cast(&callable)) {} //__attribute__((noinline, noipa)) Ret operator()(Params ...params) const { return callback(callable, std::forward(params)...); } __attribute__((noinline, noipa)) explicit operator bool() const { return callback; } }; typedef int OI; typedef int OBB; typedef function_ref IsExpiredFnT; typedef function_ref IsHazardFnT; __attribute__((noinline, noipa)) OI get_e( OBB MBB, OI I) { static int n =3D 0; switch (n++) { case 0: return I; case 1: return ++I; default: return I; } } __attribute__((noinline, noipa)) static OBB get_mbb_b(OBB MBB) { return MBB; } __attribute__((noinline, noipa)) static OBB get_mbb_e(OBB MBB) { static int n =3D 0; switch (n++) { case 0: return MBB + 1; default: return MBB; } } __attribute__((noinline)) static int getWaitStatesSince6(IsHazardFnT IsHazard, OBB MBB, OI I, int WaitStates, IsExpiredFnT IsExpired) { auto E =3D get_e(MBB, I); if (I !=3D E) { WaitStates +=3D 2; if (IsExpired(I, WaitStates)) return std::numeric_limits::max(); } auto pri =3D get_mbb_b(MBB); auto pre =3D get_mbb_e(MBB); if (pri !=3D pre) { OBB Pred =3D pri; getWaitStatesSince6(IsHazard, Pred, I, WaitStates, IsExpired); } return std::numeric_limits::max(); } __attribute__((noinline)) // not a noclone static int getWaitStatesSince3(IsHazardFnT IsHazard, OI MI, IsExpiredFnT IsExpired) { return getWaitStatesSince6(IsHazard, 0, MI, 0, IsExpired); } __attribute__((noinline, noipa)) bool bug(OI MI) { auto IsHazardFn =3D [](OI I) __attribute__((noinline, noipa)) { return fa= lse; }; auto IsExpiredFn =3D [](OI MI, int) __attribute__((noinline, noipa)) { re= turn true; }; ::getWaitStatesSince3(IsHazardFn, MI, IsExpiredFn); return true; } __attribute__((noinline, noipa)) int main() { bug(0); } #if defined(DISABLE_HACK) #else __attribute__((noinline, noipa)) int seemingly_unused_foo(IsHazardFnT IsHazard, int Limit, OI MI) { auto IsExpiredFn =3D [Limit] (OI, int WaitStates) { __builtin_trap(); return WaitStates >=3D Limit; }; return ::getWaitStatesSince3(IsHazard, MI, IsExpiredFn); } #endif=