public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
From: "malat at debian dot org" <gcc-bugzilla@gcc.gnu.org> To: gcc-bugs@gcc.gnu.org Subject: [Bug target/111231] armhf: Miscompilation at O2 level (O1 is working) Date: Tue, 05 Sep 2023 16:24:56 +0000 [thread overview] Message-ID: <bug-111231-4-kEYT7mgRHW@http.gcc.gnu.org/bugzilla/> (raw) In-Reply-To: <bug-111231-4@http.gcc.gnu.org/bugzilla/> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111231 --- Comment #5 from Mathieu Malaterre <malat at debian dot org> --- Result from cvise (+manual editing): c++ -Wfatal-errors -Wall -Wextra -Werror -o works -O1 -march=armv7-a -mfpu=neon-vfpv4 -mfloat-abi=hard -mfp16-format=ieee step7.cc -> exit(0) c++ -Wfatal-errors -Wall -Wextra -Werror -o fails -O2 -march=armv7-a -mfpu=neon-vfpv4 -mfloat-abi=hard -mfp16-format=ieee step7.cc -> exit(42) Where: % cat step7.cc #include <inttypes.h> #include <stddef.h> #define HWY_MIN(a, b) a < b template <size_t kBytes, typename From, typename To> void CopyBytes(From from, To to) { __builtin_memcpy(to, from, kBytes); } template <typename From, typename To> void CopySameSize(From from, To to) { CopyBytes<sizeof(int)>(from, to); } struct Relations { using Wide = int16_t; }; template <typename> using MakeWide = Relations::Wide; #if 0 #else #define HWY_LANES(T) (sizeof(T)) #endif namespace detail { constexpr size_t ScaleByPower(size_t N, int pow2) { return pow2 >= 0 ? N : 0; } } // namespace detail template <typename Lane, size_t N, int kPow2> struct Simd { using T = Lane; static constexpr size_t kWhole = N; static constexpr int kFrac = 0; static constexpr size_t kPrivateLanes = detail::ScaleByPower(kWhole, kFrac); constexpr size_t MaxBytes() { return kPrivateLanes; } template <typename> static constexpr size_t RepartitionLanes() { return sizeof(int); } template <int kNewPow2, size_t kNewMaxLanes> static constexpr size_t WholeN() { return detail::ScaleByPower(kNewMaxLanes, kNewPow2); } template <int kNewPow2, size_t kNewMaxLanes> static constexpr size_t NewN() { return WholeN<kNewPow2, kNewMaxLanes>(); } template <typename> using Rebind = Simd; template <typename NewT> using Repartition = Simd<NewT, NewN<kPow2, RepartitionLanes<NewT>()>(), kPow2>; }; namespace detail { template <typename T, size_t N, int> struct ClampNAndPow2 { using type = Simd<T, HWY_MIN(N, 6), 3>; }; template <typename T, size_t, int kPow2> struct CappedTagChecker { static constexpr size_t N = HWY_LANES(T); using type = typename ClampNAndPow2<T, N, kPow2>::type; }; } // namespace detail template <typename T, size_t kLimit, int kPow2 = 0> using CappedTag = typename detail::CappedTagChecker<T, kLimit, kPow2>::type; template <class D> using TFromD = typename D::T; #define HWY_MAX_LANES_D(D) D::kPrivateLanes template <class D> size_t MaxLanes(D) { return HWY_MAX_LANES_D(D); } template <class D> size_t Lanes(D) { return HWY_MAX_LANES_D(D); } template <class T, class D> using Rebind = typename D::Rebind<T>; template <class D> using RebindToUnsigned = Rebind<D, D>; template <class T, class D> using Repartition = typename D::Repartition<T>; template <class D> using RepartitionToWide = Repartition<MakeWide<D>, D>; #include <cmath> #include <cstring> #include <memory> namespace hwy { template <typename T, size_t> struct Vec128 { T raw[16 / sizeof(T)]; }; template <class D> Vec128<TFromD<D>, HWY_MAX_LANES_D(D)> Zero(D); template <class D> using VFromD = decltype(Zero(D())); template <class D, class VFrom> VFromD<D> BitCast(D, VFrom v) { VFromD<D> to; CopySameSize(&v, &to); return to; } template <class D, typename T2> VFromD<D> Set(D d, T2 t) { VFromD<D> v; for (size_t i = 0; i < MaxLanes(d); ++i) v.raw[i] = t; return v; } template <typename T, size_t N> Vec128<T, N> SaturatedAdd(Vec128<T, N> b) { Vec128<T, N> a; using TW = T; for (size_t i = 0; i < N; ++i) a.raw[i] = HWY_MIN(TW() + b.raw[i], T()); return a; } template <class D> void Store(VFromD<D> v, D d, TFromD<D> *aligned) { CopyBytes<d.MaxBytes()>(v.raw, aligned); } template <class D> using Vec = decltype(Zero(D())); template <class DI16, class VU8, class VI8> Vec<DI16> SatWidenMulPairwiseAdd(DI16 di16, VU8 a, VI8 b) { RebindToUnsigned<decltype(di16)> du16; auto a1 = BitCast(du16, a), b1(BitCast(di16, b)); return SaturatedAdd(a1); } using AllocPtr = void *(void *, size_t); using FreePtr = void (*)(void *, void *); void *AllocateAlignedBytes(size_t size, AllocPtr, void *) { return aligned_alloc(4096, size); } void FreeAlignedBytes(const void *ptr, FreePtr, void *) { free((void*)ptr); } AllocPtr *AllocateAlignedItems_alloc_ptr; template <typename T> T *AllocateAlignedItems(size_t items) { size_t size = sizeof(T); bool is_pow2 = 0; size_t bits(size); size_t bytes = is_pow2 ? bits : items; return static_cast<T *>( AllocateAlignedBytes(bytes, AllocateAlignedItems_alloc_ptr, 0)); } struct AlignedFreer { AlignedFreer(FreePtr free_ptr, void *opaque_ptr) : free_(free_ptr), opaque_ptr_(opaque_ptr) {} template <typename T> void operator()(T aligned_pointer) { FreeAlignedBytes(aligned_pointer, free_, opaque_ptr_); } FreePtr free_; void *opaque_ptr_; }; template <typename T> using AlignedFreeUniquePtr = std::unique_ptr<T, AlignedFreer>; FreePtr AllocateAligned_free; template <typename T> AlignedFreeUniquePtr<T[]> AllocateAligned(size_t items, void *opaque) { T *__trans_tmp_1 = AllocateAlignedItems<T>(items); return AlignedFreeUniquePtr<T[]>(__trans_tmp_1, AlignedFreer(AllocateAligned_free, opaque)); } template <typename T> AlignedFreeUniquePtr<T[]> AllocateAligned(size_t items) { return AllocateAligned<T>(items, nullptr); } void AssertArrayEqual2(void *a, void *b, size_t c, const char *, int) { if (memcmp(a, b, c)) exit(42); } template <class n, typename o = TFromD<n>> void p(n d, Vec<n> expected, Vec<n> actual, const char *ab, int ac) { size_t q = Lanes(d); auto r = AllocateAligned<o>(q), s = AllocateAligned<o>(q); Store(expected, d, r.get()); Store(actual, d, s.get()); AssertArrayEqual2(r.get(), s.get(), q, ab, ac); } #define t(d, expected, actual) p(d, expected, actual, __FILE__, __LINE__) template <typename o, size_t u, class v> struct w { static void x(size_t ae, size_t af) { CappedTag<o, u> d; size_t ag = Lanes(d); if (ag < ae) return; if (af) v()(o(), d); } }; struct y { template <typename b, class c> void operator()(b, c d) { RepartitionToWide<c> aa; size_t g = Lanes(d); RebindToUnsigned<decltype(d)> h; auto m = 0, j = Set(h, b()), k = Set(aa, 0); for (size_t i = 0; i < g; i++) t(aa, k, SatWidenMulPairwiseAdd(aa, j, m)); } }; template <int ah = 1> struct ai { template <typename o> void operator()(o) { size_t aj = ah; constexpr size_t ak HWY_LANES(o); size_t af = ak; w<o, ak, y>::x(aj, af); } }; void l() { ai()(int8_t()); } } // namespace hwy int main() { hwy::l(); }
next prev parent reply other threads:[~2023-09-05 16:24 UTC|newest] Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-08-30 6:47 [Bug target/111231] New: armhf: Miscompilation at O2 level malat at debian dot org 2023-08-30 6:49 ` [Bug target/111231] " malat at debian dot org 2023-08-30 6:51 ` malat at debian dot org 2023-08-30 7:18 ` [Bug target/111231] armhf: Miscompilation at O2 level (O1 is working) malat at debian dot org 2023-08-31 6:49 ` malat at debian dot org 2023-09-05 16:24 ` malat at debian dot org [this message] 2023-09-14 13:52 ` [Bug target/111231] [13/14 Regression] " malat at debian dot org 2023-09-15 8:06 ` [Bug target/111231] armhf: Miscompilation with -O2/-fno-exceptions level (-O2 " malat at debian dot org 2023-09-26 6:29 ` malat at debian dot org 2023-09-26 6:29 ` malat at debian dot org 2023-09-26 6:31 ` malat at debian dot org 2023-09-26 6:32 ` malat at debian dot org 2023-09-26 9:00 ` xry111 at gcc dot gnu.org 2023-10-06 6:16 ` [Bug target/111231] armhf: Miscompilation with -O2/-fno-exceptions level (-fno-tree-vectorize " malat at debian dot org 2023-10-06 6:21 ` malat at debian dot org 2023-10-06 6:47 ` malat at debian dot org 2023-12-15 7:33 ` malat at debian dot org 2024-03-17 2:44 ` [Bug target/111231] [12/13/14 regression] " sjames at gcc dot gnu.org 2024-03-17 2:46 ` sjames at gcc dot gnu.org 2024-03-22 13:39 ` law at gcc dot gnu.org 2024-03-22 18:02 ` rearnsha at gcc dot gnu.org 2024-03-25 12:46 ` rguenth at gcc dot gnu.org 2024-04-11 14:13 ` rearnsha at gcc dot gnu.org 2024-04-11 14:28 ` rearnsha at gcc dot gnu.org 2024-04-11 14:29 ` rearnsha at gcc dot gnu.org 2024-04-11 14:41 ` rearnsha at gcc dot gnu.org 2024-04-11 18:25 ` pinskia at gcc dot gnu.org 2024-04-12 6:17 ` rguenth at gcc dot gnu.org 2024-04-12 10:08 ` rearnsha at gcc dot gnu.org 2024-04-12 10:40 ` rearnsha at gcc dot gnu.org 2024-04-12 10:51 ` rguenth at gcc dot gnu.org 2024-04-12 13:10 ` rearnsha at gcc dot gnu.org 2024-04-15 6:46 ` rguenth at gcc dot gnu.org 2024-04-15 14:47 ` rearnsha at gcc dot gnu.org 2024-04-16 6:46 ` rguenth at gcc dot gnu.org 2024-04-16 6:57 ` rguenth at gcc dot gnu.org 2024-04-16 9:59 ` rearnsha at gcc dot gnu.org 2024-04-16 10:34 ` rguenther at suse dot de 2024-06-20 9:13 ` [Bug target/111231] [12/13/14/15 " rguenth at gcc dot gnu.org
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=bug-111231-4-kEYT7mgRHW@http.gcc.gnu.org/bugzilla/ \ --to=gcc-bugzilla@gcc.gnu.org \ --cc=gcc-bugs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).