This should address Jonathan's feedback and adds support for atomic_ref On Wed, Sep 29, 2021 at 5:14 AM Jonathan Wakely wrote: > On Mon, 27 Sept 2021 at 15:11, Thomas Rodgers > wrote: > > > > From: Thomas Rodgers > > > > Now with checks for __has_builtin(__builtin_clear_padding) > > > > This change implements P0528 which requires that padding bits not > > participate in atomic compare exchange operations. All arguments to the > > generic template are 'sanitized' by the __builtin_clearpadding intrisic > > before they are used in comparisons. This alrequires that any stores > > also sanitize the incoming value. > > > > Signed-off-by: Thomas Rodgers > > > > libstdc++=v3/ChangeLog: > > > > * include/std/atomic (atomic::atomic(_Tp) clear padding for > > __cplusplus > 201703L. > > (atomic::store()) Clear padding. > > (atomic::exchange()) Likewise. > > (atomic::compare_exchange_weak()) Likewise. > > (atomic::compare_exchange_strong()) Likewise. > > Don't we also need this for std::atomic_ref, i.e. for the > __atomic_impl free functions in ? > > There we don't have any distinction between atomic_ref > and atomic_ref, they both use the same > implementations. But I think that's OK, as I think the built-in is > smart enough to be a no-op for types with no padding. > > > * testsuite/29_atomics/atomic/compare_exchange_padding.cc: New > > test. > > --- > > libstdc++-v3/include/std/atomic | 41 +++++++++++++++++- > > .../atomic/compare_exchange_padding.cc | 42 +++++++++++++++++++ > > 2 files changed, 81 insertions(+), 2 deletions(-) > > create mode 100644 > libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc > > > > diff --git a/libstdc++-v3/include/std/atomic > b/libstdc++-v3/include/std/atomic > > index 936dd50ba1c..4ac9ccdc1ab 100644 > > --- a/libstdc++-v3/include/std/atomic > > +++ b/libstdc++-v3/include/std/atomic > > @@ -228,7 +228,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > atomic& operator=(const atomic&) = delete; > > atomic& operator=(const atomic&) volatile = delete; > > > > - constexpr atomic(_Tp __i) noexcept : _M_i(__i) { } > > +#if __cplusplus > 201703L && __has_builtin(__builtin_clear_padding) > > + constexpr atomic(_Tp __i) noexcept : _M_i(__i) > > + { __builtin_clear_padding(std::__addressof(_M_i)); } > > +#else > > + constexpr atomic(_Tp __i) noexcept : _M_i(__i) > > + { } > > +#endif > > Please write this as a single function with the preprocessor > conditions in the body: > > constexpr atomic(_Tp __i) noexcept : _M_i(__i) > { > #if __cplusplus > 201703L && __has_builtin(__builtin_clear_padding) > __builtin_clear_padding(std::__addressof(_M_i)); } > #endif > } > > This not only avoids duplication of the identical parts, but it avoids > warnings from ld.gold if you use --detect-odr-violations. Otherwise, > the linker can see a definition of that constructor on two different > lines (233 and 236), and so warns about possible ODR violations, > something like "warning: while linking foo: symbol > 'std::atomic::atomic(int)' defined in multiple places (possible > ODR violation): ...atomic:233 ... atomic:236" > > Can't we clear the padding for >= 201402L instead of only C++20? Only > C++11 has a problem with the built-in in a constexpr function, right? > So we can DTRT for C++14 upwards. > > > > > > operator _Tp() const noexcept > > { return load(); } > > @@ -268,12 +274,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > void > > store(_Tp __i, memory_order __m = memory_order_seq_cst) noexcept > > { > > +#if __has_builtin(__builtin_clear_padding) > > + __builtin_clear_padding(std::__addressof(__i)); > > +#endif > > We repeat this *a lot*. When I started work on this I defined a > non-member function in the __atomic_impl namespace: > > template > _GLIBCXX_ALWAYS_INLINE void > __clear_padding(_Tp& __val) noexcept > { > #if __has_builtin(__builtin_clear_padding) > __builtin_clear_padding(std::__addressof(__val)); > #endif > } > > Then you can just use that everywhere (except the constexpr > constructor), without all the #if checks. > > > > > __atomic_store(std::__addressof(_M_i), std::__addressof(__i), > int(__m)); > > } > > > > void > > store(_Tp __i, memory_order __m = memory_order_seq_cst) volatile > noexcept > > { > > +#if __has_builtin(__builtin_clear_padding) > > + __builtin_clear_padding(std::__addressof(__i)); > > +#endif > > __atomic_store(std::__addressof(_M_i), std::__addressof(__i), > int(__m)); > > } > > > > @@ -300,6 +312,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > { > > alignas(_Tp) unsigned char __buf[sizeof(_Tp)]; > > _Tp* __ptr = reinterpret_cast<_Tp*>(__buf); > > +#if __has_builtin(__builtin_clear_padding) > > + __builtin_clear_padding(std::__addressof(__i)); > > +#endif > > __atomic_exchange(std::__addressof(_M_i), std::__addressof(__i), > > __ptr, int(__m)); > > return *__ptr; > > @@ -311,6 +326,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > { > > alignas(_Tp) unsigned char __buf[sizeof(_Tp)]; > > _Tp* __ptr = reinterpret_cast<_Tp*>(__buf); > > +#if __has_builtin(__builtin_clear_padding) > > + __builtin_clear_padding(std::__addressof(__i)); > > +#endif > > __atomic_exchange(std::__addressof(_M_i), std::__addressof(__i), > > __ptr, int(__m)); > > return *__ptr; > > @@ -322,6 +340,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > { > > __glibcxx_assert(__is_valid_cmpexch_failure_order(__f)); > > > > +#if __has_builtin(__builtin_clear_padding) > > + __builtin_clear_padding(std::__addressof(__e)); > > This unconditionally clears the padding of __e, which I don't think is > allowed. It potentially introduces a data race if another thread is > doing the CAS at the same time, and the program assumes that only the > CAS that fails will update expected. > > See the thread I started at > https://lists.isocpp.org/parallel/2020/12/3443.php > ("atomic compare_exchange and padding bits", 2020-12-03) > > The conclusion was that writing to __e is not allowed in the failure > case, so you need to make a copy of it (into a buffer, using memcpy), > then clear the padding in the copy, then try the > __atomic_compare_exchange and if it fails, copy back from the buffer > to __e. If all that extra work doesn't get inlined then we want to > only do it for types which might have padding bits, so I had > __atomic_impl::__maybe_has_padding in my unfinished patch: > > template > constexpr bool > __maybe_has_padding() > { > #if __has_builtin(__has_unique_object_representations) > return !__has_unique_object_representations(_Tp); > #else > return true; > #endif > } > > The MSVC implementation uses !__has_unique_object_representations(_Tp) > && !is_floating_point<_Tp>::value here, which is better than mine > above (FP types don't have unique object reps, but also don't have > padding bits). > > And then do something like this in compare_exchange_weak: > > > + { > +#if __has_builtin(__builtin_clear_padding) > + if _GLIBCXX_CONSTEXPR17 (__maybe_has_padding<_Tp>()) > + { > + _Val<_Tp> __expected0 = __expected; // XXX should use memcpy > + auto* __exp = __atomic_impl::__clear_padding(__expected0); > + auto* __des = __atomic_impl::__clear_padding(__desired); > + if (__atomic_compare_exchange(__ptr, __exp, __des, true, > + int(__success), int(__failure))) > + return true; > + __builtin_memcpy(std::__addressof(__expected), __exp, > sizeof(_Tp)); > + return false; > + } > +#endif > return __atomic_compare_exchange(__ptr, > std::__addressof(__expected), > > And similarly for compare_exchange_strong (or refactor them into one > function that takes a bool for weak/strong). > > If you do all that in __atomic_impl::compare_exchange_weak (making it > take a bool for weak/strong) then you can reuse it from > __atomic_impl:compare_exchange_strong, and then change the gneric > atomic::compare_exchange_{weak,strong} to use that as well. > > > > > > diff --git > a/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc > b/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc > > new file mode 100644 > > index 00000000000..0875f168097 > > --- /dev/null > > +++ > b/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc > > @@ -0,0 +1,42 @@ > > +// { dg-options "-std=gnu++2a" } > > +// { dg-do run { target c++2a } } > > We can (and should) use "20" not "2a". > > Does it need to be C++20 though, aren't all the clearings that are > being tested going to happen unconditionally? (well ... as long as the > builtin exists, which is true for GCC). > > > +// { dg-add-options libatomic } > > + > > +#include > > + > > +#include > > + > > +struct S { char c; short s; }; > > + > > +void __attribute__((noinline,noipa)) > > +fill_struct(S& s) > > +{ __builtin_memset(&s, 0xff, sizeof(S)); } > > + > > +bool > > +compare_struct(const S& a, const S& b) > > +{ return __builtin_memcmp(&a, &b, sizeof(S)) == 0; } > > + > > +int > > +main () > > +{ > > + S s; > > + fill_struct(s); > > + s.c = 'a'; > > + s.s = 42; > > + > > + std::atomic as{ s }; > > + auto ts = as.load(); > > + VERIFY( !compare_struct(s, ts) ); // padding cleared on construction > > + as.exchange(s); > > + auto es = as.load(); > > + VERIFY( compare_struct(ts, es) ); // padding cleared on exchange > > + > > + S n; > > + fill_struct(n); > > + n.c = 'b'; > > + n.s = 71; > > + // padding cleared on compexchg > > + VERIFY( as.compare_exchange_weak(s, n) ); > > Is it safe assume this won't fail spuriously? There is only one thread > doing the RMW operation, is that enough to avoid spurious failures? > > > + VERIFY( as.compare_exchange_strong(n, s) ); > > + return 0; > > +} > > -- > > 2.31.1 > > > >