From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id 744E13858C20; Tue, 5 Sep 2023 16:24:56 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 744E13858C20 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1693931096; bh=9OVzSHwN0o5u2kjYJk+rlBFQQMeMe4XhEap64winZDw=; h=From:To:Subject:Date:In-Reply-To:References:From; b=NU6FYUSdjdcvrLkNe2PwIjqctHQsjR88jlG40QJB+f6lg2TbZAI5r78aEcSbN6Zrc 0MMXI7spcCea6mlU+9jAd/HFzroLS18mlH3U4SvHmLZ2CVWehHnbey2z9x3SuBxSeV GjNPXBHWczKyBHjDPefQaAgJY3739UQibxvGLw4s= From: "malat at debian dot org" To: gcc-bugs@gcc.gnu.org Subject: [Bug target/111231] armhf: Miscompilation at O2 level (O1 is working) Date: Tue, 05 Sep 2023 16:24:56 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: target X-Bugzilla-Version: 13.2.0 X-Bugzilla-Keywords: wrong-code X-Bugzilla-Severity: normal X-Bugzilla-Who: malat at debian dot org X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D111231 --- Comment #5 from Mathieu Malaterre --- Result from cvise (+manual editing): c++ -Wfatal-errors -Wall -Wextra -Werror -o works -O1 -march=3Darmv7-a -mfpu=3Dneon-vfpv4 -mfloat-abi=3Dhard -mfp16-format=3Dieee step7.cc -> exit(0) c++ -Wfatal-errors -Wall -Wextra -Werror -o fails -O2 -march=3Darmv7-a -mfpu=3Dneon-vfpv4 -mfloat-abi=3Dhard -mfp16-format=3Dieee step7.cc -> exit(42) Where: % cat step7.cc=20 #include #include #define HWY_MIN(a, b) a < b template void CopyBytes(From from, To to) { __builtin_memcpy(to, from, kBytes); } template void CopySameSize(From from, To to) { CopyBytes(from, to); } struct Relations { using Wide =3D int16_t; }; template using MakeWide =3D Relations::Wide; #if 0 #else #define HWY_LANES(T) (sizeof(T)) #endif namespace detail { constexpr size_t ScaleByPower(size_t N, int pow2) { return pow2 >=3D 0 ? N = : 0; } } // namespace detail template struct Simd { using T =3D Lane; static constexpr size_t kWhole =3D N; static constexpr int kFrac =3D 0; static constexpr size_t kPrivateLanes =3D detail::ScaleByPower(kWhole, kF= rac); constexpr size_t MaxBytes() { return kPrivateLanes; } template static constexpr size_t RepartitionLanes() { return sizeof(int); } template static constexpr size_t WholeN() { return detail::ScaleByPower(kNewMaxLanes, kNewPow2); } template static constexpr size_t NewN= () { return WholeN(); } template using Rebind =3D Simd; template using Repartition =3D Simd()>(), kPow2>; }; namespace detail { template struct ClampNAndPow2 { using type =3D Simd; }; template struct CappedTagChecker { static constexpr size_t N =3D HWY_LANES(T); using type =3D typename ClampNAndPow2::type; }; } // namespace detail template using CappedTag =3D typename detail::CappedTagChecker::ty= pe; template using TFromD =3D typename D::T; #define HWY_MAX_LANES_D(D) D::kPrivateLanes template size_t MaxLanes(D) { return HWY_MAX_LANES_D(D); } template size_t Lanes(D) { return HWY_MAX_LANES_D(D); } template using Rebind =3D typename D::Rebind; template using RebindToUnsigned =3D Rebind; template using Repartition =3D typename D::Repartition; template using RepartitionToWide =3D Repartition, D>; #include #include #include namespace hwy { template struct Vec128 { T raw[16 / sizeof(T)]; }; template Vec128, HWY_MAX_LANES_D(D)> Zero(D); template using VFromD =3D decltype(Zero(D())); template VFromD BitCast(D, VFrom v) { VFromD to; CopySameSize(&v, &to); return to; } template VFromD Set(D d, T2 t) { VFromD v; for (size_t i =3D 0; i < MaxLanes(d); ++i) v.raw[i] =3D t; return v; } template Vec128 SaturatedAdd(Vec128 b) { Vec128 a; using TW =3D T; for (size_t i =3D 0; i < N; ++i) a.raw[i] =3D HWY_MIN(TW() + b.raw[i], T()); return a; } template void Store(VFromD v, D d, TFromD *aligned) { CopyBytes(v.raw, aligned); } template using Vec =3D decltype(Zero(D())); template Vec SatWidenMulPairwiseAdd(DI16 di16, VU8 a, VI8 b) { RebindToUnsigned du16; auto a1 =3D BitCast(du16, a), b1(BitCast(di16, b)); return SaturatedAdd(a1); } using AllocPtr =3D void *(void *, size_t); using FreePtr =3D void (*)(void *, void *); void *AllocateAlignedBytes(size_t size, AllocPtr, void *) { return aligned_alloc(4096, size); } void FreeAlignedBytes(const void *ptr, FreePtr, void *) { free((void*)ptr); } AllocPtr *AllocateAlignedItems_alloc_ptr; template T *AllocateAlignedItems(size_t items) { size_t size =3D sizeof(T); bool is_pow2 =3D 0; size_t bits(size); size_t bytes =3D is_pow2 ? bits : items; return static_cast( AllocateAlignedBytes(bytes, AllocateAlignedItems_alloc_ptr, 0)); } struct AlignedFreer { AlignedFreer(FreePtr free_ptr, void *opaque_ptr) : free_(free_ptr), opaque_ptr_(opaque_ptr) {} template void operator()(T aligned_pointer) { FreeAlignedBytes(aligned_pointer, free_, opaque_ptr_); } FreePtr free_; void *opaque_ptr_; }; template using AlignedFreeUniquePtr =3D std::unique_ptr; FreePtr AllocateAligned_free; template AlignedFreeUniquePtr AllocateAligned(size_t items, void *opaque) { T *__trans_tmp_1 =3D AllocateAlignedItems(items); return AlignedFreeUniquePtr(__trans_tmp_1, AlignedFreer(AllocateAligned_free, opaqu= e)); } template AlignedFreeUniquePtr AllocateAligned(size_t item= s) { return AllocateAligned(items, nullptr); } void AssertArrayEqual2(void *a, void *b, size_t c, const char *, int) { if (memcmp(a, b, c)) exit(42); } template > void p(n d, Vec expected, Vec actual, const char *ab, int ac) { size_t q =3D Lanes(d); auto r =3D AllocateAligned(q), s =3D AllocateAligned(q); Store(expected, d, r.get()); Store(actual, d, s.get()); AssertArrayEqual2(r.get(), s.get(), q, ab, ac); } #define t(d, expected, actual) p(d, expected, actual, __FILE__, __LINE__) template struct w { static void x(size_t ae, size_t af) { CappedTag d; size_t ag =3D Lanes(d); if (ag < ae) return; if (af) v()(o(), d); } }; struct y { template void operator()(b, c d) { RepartitionToWide aa; size_t g =3D Lanes(d); RebindToUnsigned h; auto m =3D 0, j =3D Set(h, b()), k =3D Set(aa, 0); for (size_t i =3D 0; i < g; i++) t(aa, k, SatWidenMulPairwiseAdd(aa, j, m)); } }; template struct ai { template void operator()(o) { size_t aj =3D ah; constexpr size_t ak HWY_LANES(o); size_t af =3D ak; w::x(aj, af); } }; void l() { ai()(int8_t()); } } // namespace hwy int main() { hwy::l(); }=