public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
From: "malat at debian dot org" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
Subject: [Bug target/111231] armhf: Miscompilation at O2 level (O1 is working)
Date: Tue, 05 Sep 2023 16:24:56 +0000	[thread overview]
Message-ID: <bug-111231-4-kEYT7mgRHW@http.gcc.gnu.org/bugzilla/> (raw)
In-Reply-To: <bug-111231-4@http.gcc.gnu.org/bugzilla/>

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111231

--- Comment #5 from Mathieu Malaterre <malat at debian dot org> ---
Result from cvise (+manual editing):

c++ -Wfatal-errors -Wall -Wextra -Werror -o works -O1 -march=armv7-a
-mfpu=neon-vfpv4 -mfloat-abi=hard -mfp16-format=ieee step7.cc
-> exit(0)

c++ -Wfatal-errors -Wall -Wextra -Werror -o fails -O2 -march=armv7-a
-mfpu=neon-vfpv4 -mfloat-abi=hard -mfp16-format=ieee step7.cc
-> exit(42)

Where:

% cat step7.cc 
#include <inttypes.h>
#include <stddef.h>
#define HWY_MIN(a, b) a < b
template <size_t kBytes, typename From, typename To>
void CopyBytes(From from, To to) {
  __builtin_memcpy(to, from, kBytes);
}
template <typename From, typename To> void CopySameSize(From from, To to) {
  CopyBytes<sizeof(int)>(from, to);
}
struct Relations {
  using Wide = int16_t;
};
template <typename> using MakeWide = Relations::Wide;
#if 0
#else
#define HWY_LANES(T) (sizeof(T))
#endif
namespace detail {
constexpr size_t ScaleByPower(size_t N, int pow2) { return pow2 >= 0 ? N : 0; }
} // namespace detail
template <typename Lane, size_t N, int kPow2> struct Simd {
  using T = Lane;
  static constexpr size_t kWhole = N;
  static constexpr int kFrac = 0;
  static constexpr size_t kPrivateLanes = detail::ScaleByPower(kWhole, kFrac);
  constexpr size_t MaxBytes() { return kPrivateLanes; }
  template <typename> static constexpr size_t RepartitionLanes() {
    return sizeof(int);
  }
  template <int kNewPow2, size_t kNewMaxLanes>
  static constexpr size_t WholeN() {
    return detail::ScaleByPower(kNewMaxLanes, kNewPow2);
  }
  template <int kNewPow2, size_t kNewMaxLanes> static constexpr size_t NewN() {
    return WholeN<kNewPow2, kNewMaxLanes>();
  }
  template <typename> using Rebind = Simd;
  template <typename NewT>
  using Repartition =
      Simd<NewT, NewN<kPow2, RepartitionLanes<NewT>()>(), kPow2>;
};
namespace detail {
template <typename T, size_t N, int> struct ClampNAndPow2 {
  using type = Simd<T, HWY_MIN(N, 6), 3>;
};
template <typename T, size_t, int kPow2> struct CappedTagChecker {
  static constexpr size_t N = HWY_LANES(T);
  using type = typename ClampNAndPow2<T, N, kPow2>::type;
};
} // namespace detail
template <typename T, size_t kLimit, int kPow2 = 0>
using CappedTag = typename detail::CappedTagChecker<T, kLimit, kPow2>::type;
template <class D> using TFromD = typename D::T;
#define HWY_MAX_LANES_D(D) D::kPrivateLanes
template <class D> size_t MaxLanes(D) { return HWY_MAX_LANES_D(D); }
template <class D> size_t Lanes(D) { return HWY_MAX_LANES_D(D); }
template <class T, class D> using Rebind = typename D::Rebind<T>;
template <class D> using RebindToUnsigned = Rebind<D, D>;
template <class T, class D> using Repartition = typename D::Repartition<T>;
template <class D> using RepartitionToWide = Repartition<MakeWide<D>, D>;
#include <cmath>
#include <cstring>
#include <memory>
namespace hwy {
template <typename T, size_t> struct Vec128 {
  T raw[16 / sizeof(T)];
};
template <class D> Vec128<TFromD<D>, HWY_MAX_LANES_D(D)> Zero(D);
template <class D> using VFromD = decltype(Zero(D()));
template <class D, class VFrom> VFromD<D> BitCast(D, VFrom v) {
  VFromD<D> to;
  CopySameSize(&v, &to);
  return to;
}
template <class D, typename T2> VFromD<D> Set(D d, T2 t) {
  VFromD<D> v;
  for (size_t i = 0; i < MaxLanes(d); ++i)
    v.raw[i] = t;
  return v;
}
template <typename T, size_t N> Vec128<T, N> SaturatedAdd(Vec128<T, N> b) {
  Vec128<T, N> a;
  using TW = T;
  for (size_t i = 0; i < N; ++i)
    a.raw[i] = HWY_MIN(TW() + b.raw[i], T());
  return a;
}
template <class D> void Store(VFromD<D> v, D d, TFromD<D> *aligned) {
  CopyBytes<d.MaxBytes()>(v.raw, aligned);
}
template <class D> using Vec = decltype(Zero(D()));
template <class DI16, class VU8, class VI8>
Vec<DI16> SatWidenMulPairwiseAdd(DI16 di16, VU8 a, VI8 b) {
  RebindToUnsigned<decltype(di16)> du16;
  auto a1 = BitCast(du16, a), b1(BitCast(di16, b));
  return SaturatedAdd(a1);
}
using AllocPtr = void *(void *, size_t);
using FreePtr = void (*)(void *, void *);
void *AllocateAlignedBytes(size_t size, AllocPtr, void *) {
        return aligned_alloc(4096, size);

}
void FreeAlignedBytes(const void *ptr, FreePtr, void *) {
        free((void*)ptr);
}
AllocPtr *AllocateAlignedItems_alloc_ptr;
template <typename T> T *AllocateAlignedItems(size_t items) {
  size_t size = sizeof(T);
  bool is_pow2 = 0;
  size_t bits(size);
  size_t bytes = is_pow2 ? bits : items;
  return static_cast<T *>(
      AllocateAlignedBytes(bytes, AllocateAlignedItems_alloc_ptr, 0));
}
struct AlignedFreer {
  AlignedFreer(FreePtr free_ptr, void *opaque_ptr)
      : free_(free_ptr), opaque_ptr_(opaque_ptr) {}
  template <typename T> void operator()(T aligned_pointer) {
    FreeAlignedBytes(aligned_pointer, free_, opaque_ptr_);
  }
  FreePtr free_;
  void *opaque_ptr_;
};
template <typename T>
using AlignedFreeUniquePtr = std::unique_ptr<T, AlignedFreer>;
FreePtr AllocateAligned_free;
template <typename T>
AlignedFreeUniquePtr<T[]> AllocateAligned(size_t items, void *opaque) {
  T *__trans_tmp_1 = AllocateAlignedItems<T>(items);
  return AlignedFreeUniquePtr<T[]>(__trans_tmp_1,
                                   AlignedFreer(AllocateAligned_free, opaque));
}
template <typename T> AlignedFreeUniquePtr<T[]> AllocateAligned(size_t items) {
  return AllocateAligned<T>(items, nullptr);
}
void AssertArrayEqual2(void *a, void *b, size_t c, const char *, int) {
  if (memcmp(a, b, c))
    exit(42);
}
template <class n, typename o = TFromD<n>>
void p(n d, Vec<n> expected, Vec<n> actual, const char *ab, int ac) {
  size_t q = Lanes(d);
  auto r = AllocateAligned<o>(q), s = AllocateAligned<o>(q);
  Store(expected, d, r.get());
  Store(actual, d, s.get());
  AssertArrayEqual2(r.get(), s.get(), q, ab, ac);
}
#define t(d, expected, actual) p(d, expected, actual, __FILE__, __LINE__)
template <typename o, size_t u, class v> struct w {
  static void x(size_t ae, size_t af) {
    CappedTag<o, u> d;
    size_t ag = Lanes(d);
    if (ag < ae)
      return;
    if (af)
      v()(o(), d);
  }
};
struct y {
  template <typename b, class c> void operator()(b, c d) {
    RepartitionToWide<c> aa;
    size_t g = Lanes(d);
    RebindToUnsigned<decltype(d)> h;
    auto m = 0, j = Set(h, b()), k = Set(aa, 0);
    for (size_t i = 0; i < g; i++)
      t(aa, k, SatWidenMulPairwiseAdd(aa, j, m));
  }
};
template <int ah = 1> struct ai {
  template <typename o> void operator()(o) {
    size_t aj = ah;
    constexpr size_t ak HWY_LANES(o);
    size_t af = ak;
    w<o, ak, y>::x(aj, af);
  }
};
void l() { ai()(int8_t()); }
} // namespace hwy
int main() { hwy::l(); }

  parent reply	other threads:[~2023-09-05 16:24 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-30  6:47 [Bug target/111231] New: armhf: Miscompilation at O2 level malat at debian dot org
2023-08-30  6:49 ` [Bug target/111231] " malat at debian dot org
2023-08-30  6:51 ` malat at debian dot org
2023-08-30  7:18 ` [Bug target/111231] armhf: Miscompilation at O2 level (O1 is working) malat at debian dot org
2023-08-31  6:49 ` malat at debian dot org
2023-09-05 16:24 ` malat at debian dot org [this message]
2023-09-14 13:52 ` [Bug target/111231] [13/14 Regression] " malat at debian dot org
2023-09-15  8:06 ` [Bug target/111231] armhf: Miscompilation with -O2/-fno-exceptions level (-O2 " malat at debian dot org
2023-09-26  6:29 ` malat at debian dot org
2023-09-26  6:29 ` malat at debian dot org
2023-09-26  6:31 ` malat at debian dot org
2023-09-26  6:32 ` malat at debian dot org
2023-09-26  9:00 ` xry111 at gcc dot gnu.org
2023-10-06  6:16 ` [Bug target/111231] armhf: Miscompilation with -O2/-fno-exceptions level (-fno-tree-vectorize " malat at debian dot org
2023-10-06  6:21 ` malat at debian dot org
2023-10-06  6:47 ` malat at debian dot org
2023-12-15  7:33 ` malat at debian dot org
2024-03-17  2:44 ` [Bug target/111231] [12/13/14 regression] " sjames at gcc dot gnu.org
2024-03-17  2:46 ` sjames at gcc dot gnu.org
2024-03-22 13:39 ` law at gcc dot gnu.org
2024-03-22 18:02 ` rearnsha at gcc dot gnu.org
2024-03-25 12:46 ` rguenth at gcc dot gnu.org
2024-04-11 14:13 ` rearnsha at gcc dot gnu.org
2024-04-11 14:28 ` rearnsha at gcc dot gnu.org
2024-04-11 14:29 ` rearnsha at gcc dot gnu.org
2024-04-11 14:41 ` rearnsha at gcc dot gnu.org
2024-04-11 18:25 ` pinskia at gcc dot gnu.org
2024-04-12  6:17 ` rguenth at gcc dot gnu.org
2024-04-12 10:08 ` rearnsha at gcc dot gnu.org
2024-04-12 10:40 ` rearnsha at gcc dot gnu.org
2024-04-12 10:51 ` rguenth at gcc dot gnu.org
2024-04-12 13:10 ` rearnsha at gcc dot gnu.org
2024-04-15  6:46 ` rguenth at gcc dot gnu.org
2024-04-15 14:47 ` rearnsha at gcc dot gnu.org
2024-04-16  6:46 ` rguenth at gcc dot gnu.org
2024-04-16  6:57 ` rguenth at gcc dot gnu.org
2024-04-16  9:59 ` rearnsha at gcc dot gnu.org
2024-04-16 10:34 ` rguenther at suse dot de
2024-06-20  9:13 ` [Bug target/111231] [12/13/14/15 " rguenth at gcc dot gnu.org

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bug-111231-4-kEYT7mgRHW@http.gcc.gnu.org/bugzilla/ \
    --to=gcc-bugzilla@gcc.gnu.org \
    --cc=gcc-bugs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).