[PATCH 1/5] std::latch: reduce internal implementation from ptrdiff

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
       [not found] <1968544.UC5HiB4uFJ@tjmaciei-mobl1>
@ 2021-02-26 15:59 ` Thiago Macieira
  2021-02-26 15:59   ` [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int Thiago Macieira
                     ` (5 more replies)
  0 siblings, 6 replies; 40+ messages in thread
From: Thiago Macieira @ 2021-02-26 15:59 UTC (permalink / raw)
  To: libstdc++; +Cc: gcc-patches

ints can be used as futex on Linux. ptrdiff_t on 64-bit Linux can't.
---
 libstdc++-v3/include/std/latch | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/latch b/libstdc++-v3/include/std/latch
index ef8c301e5e9..156aea5c5e5 100644
--- a/libstdc++-v3/include/std/latch
+++ b/libstdc++-v3/include/std/latch
@@ -48,7 +48,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   public:
     static constexpr ptrdiff_t
     max() noexcept
-    { return __gnu_cxx::__int_traits<ptrdiff_t>::__max; }
+    { return __gnu_cxx::__int_traits<int>::__max; }
 
     constexpr explicit latch(ptrdiff_t __expected) noexcept
       : _M_a(__expected) { }
@@ -85,7 +85,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   private:
-    alignas(__alignof__(ptrdiff_t)) ptrdiff_t _M_a;
+    alignas(__alignof__(int)) int _M_a;
   };
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
-- 
2.30.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int
  2021-02-26 15:59 ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Thiago Macieira
@ 2021-02-26 15:59   ` Thiago Macieira
  2021-03-03 14:34     ` Jonathan Wakely
  2021-02-26 15:59   ` [PATCH 3/5] std::__atomic_wait: don't use __detail::__waiter with futex Thiago Macieira
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 40+ messages in thread
From: Thiago Macieira @ 2021-02-26 15:59 UTC (permalink / raw)
  To: libstdc++; +Cc: gcc-patches

The kernel doesn't care what we store in those 32 bits, only that they are
comparable. This commit adds:
 * pointers and long on 32-bit architectures
 * unsigned
 * untyped enums and typed enums on int & unsigned int
 * float

We're not using FUTEX_OP anywhere today. The kernel reserves 4 bits for
this field but has only used 6 values so far, so it can be extended to
unsigned compares in the future, if needed.
---
 libstdc++-v3/include/bits/atomic_wait.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/bits/atomic_wait.h b/libstdc++-v3/include/bits/atomic_wait.h
index 424fccbe4c5..1c6bda2e2b6 100644
--- a/libstdc++-v3/include/bits/atomic_wait.h
+++ b/libstdc++-v3/include/bits/atomic_wait.h
@@ -65,7 +65,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     template<typename _Tp>
       inline constexpr bool __platform_wait_uses_type
 #ifdef _GLIBCXX_HAVE_LINUX_FUTEX
-	= is_same_v<remove_cv_t<_Tp>, __platform_wait_t>;
+	= is_scalar_v<remove_cv_t<_Tp>> && sizeof(_Tp) == sizeof(__platform_wait_t)
+	  && alignof(_Tp) >= alignof(__platform_wait_t);
 #else
 	= false;
 #endif
@@ -91,13 +92,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
     template<typename _Tp>
       void
-      __platform_wait(const _Tp* __addr, __platform_wait_t __val) noexcept
+      __platform_wait(const _Tp* __addr, _Tp __val) noexcept
       {
 	for(;;)
 	  {
 	    auto __e = syscall (SYS_futex, static_cast<const void*>(__addr),
 				  static_cast<int>(__futex_wait_flags::__wait_private),
-				    __val, nullptr);
+				  static_cast<__platform_wait_t>(__val), nullptr);
 	    if (!__e || errno == EAGAIN)
 	      break;
 	    else if (errno != EINTR)
-- 
2.30.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 3/5] std::__atomic_wait: don't use __detail::__waiter with futex
  2021-02-26 15:59 ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Thiago Macieira
  2021-02-26 15:59   ` [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int Thiago Macieira
@ 2021-02-26 15:59   ` Thiago Macieira
  2021-02-26 15:59   ` [PATCH 4/5] barrier: use int instead of unsigned char for the phase state Thiago Macieira
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 40+ messages in thread
From: Thiago Macieira @ 2021-02-26 15:59 UTC (permalink / raw)
  To: libstdc++; +Cc: gcc-patches

That violates "don't pay for what you don't need" rule of C++. Users of
std::atomic<T>::wait will often have some bit in their atomic indicate
whether the value is contended or not, so we don't need libstdc++ to do
double book-keeping for us.
---
 libstdc++-v3/include/bits/atomic_wait.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/libstdc++-v3/include/bits/atomic_wait.h b/libstdc++-v3/include/bits/atomic_wait.h
index 1c6bda2e2b6..4d240f44faf 100644
--- a/libstdc++-v3/include/bits/atomic_wait.h
+++ b/libstdc++-v3/include/bits/atomic_wait.h
@@ -258,14 +258,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (std::__atomic_spin(__pred))
 	return;
 
-      __waiter __w(__addr);
-      while (!__pred())
+      if constexpr (__platform_wait_uses_type<_Tp>)
 	{
-	  if constexpr (__platform_wait_uses_type<_Tp>)
-	    {
-	      __platform_wait(__addr, __old);
-	    }
-	  else
+	  __platform_wait(__addr, __old);
+	}
+      else
+	{
+	  __waiter __w(__addr);
+	  while (!__pred())
 	    {
 	      // TODO support timed backoff when this can be moved into the lib
 	      __w._M_do_wait();
@@ -274,13 +274,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<typename _Tp>
-    void
+    inline void
     __atomic_notify(const _Tp* __addr, bool __all) noexcept
     {
       using namespace __detail;
-      auto& __w = __waiters::_S_for((void*)__addr);
-      if (!__w._M_waiting())
-	return;
 
 #ifdef _GLIBCXX_HAVE_LINUX_FUTEX
       if constexpr (__platform_wait_uses_type<_Tp>)
@@ -290,6 +287,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       else
 #endif
 	{
+	  auto& __w = __waiters::_S_for((void*)__addr);
+	  if (!__w._M_waiting())
+	    return;
 	  __w._M_notify(__all);
 	}
     }
-- 
2.30.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-02-26 15:59 ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Thiago Macieira
  2021-02-26 15:59   ` [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int Thiago Macieira
  2021-02-26 15:59   ` [PATCH 3/5] std::__atomic_wait: don't use __detail::__waiter with futex Thiago Macieira
@ 2021-02-26 15:59   ` Thiago Macieira
  2021-02-28 15:05     ` Hans-Peter Nilsson
  2021-02-26 15:59   ` [PATCH 5/5] barrier: optimise by not having the hasher in a loop Thiago Macieira
                     ` (2 subsequent siblings)
  5 siblings, 1 reply; 40+ messages in thread
From: Thiago Macieira @ 2021-02-26 15:59 UTC (permalink / raw)
  To: libstdc++; +Cc: gcc-patches

ints can be used in futexes. chars can't.
---
 libstdc++-v3/include/std/barrier | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/libstdc++-v3/include/std/barrier b/libstdc++-v3/include/std/barrier
index e09212dfcb9..ae058bd3dc3 100644
--- a/libstdc++-v3/include/std/barrier
+++ b/libstdc++-v3/include/std/barrier
@@ -70,7 +70,7 @@ It looks different from literature pseudocode for two main reasons:
 
 */
 
-  enum class __barrier_phase_t : unsigned char { };
+  enum class __barrier_phase_t : int { };
 
   template<typename _CompletionF>
     class __tree_barrier
@@ -93,20 +93,24 @@ It looks different from literature pseudocode for two main reasons:
 
       alignas(__phase_alignment) __barrier_phase_t  _M_phase;
 
+      static __barrier_phase_t
+      _S_add_to_phase(__barrier_phase_t __phase, unsigned char __n)
+      {
+        __n += static_cast<unsigned char>(__phase);
+        return static_cast<__barrier_phase_t>(__n);
+      }
+
       bool
       _M_arrive(__barrier_phase_t __old_phase)
       {
-	const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
-	const auto __half_step =
-			   static_cast<__barrier_phase_t>(__old_phase_val + 1);
-	const auto __full_step =
-			   static_cast<__barrier_phase_t>(__old_phase_val + 2);
-
 	size_t __current_expected = _M_expected;
 	std::hash<std::thread::id> __hasher;
 	size_t __current = __hasher(std::this_thread::get_id())
 					  % ((_M_expected + 1) >> 1);
 
+	const auto __half_step = _S_add_to_phase(__old_phase, 1);
+	const auto __full_step = _S_add_to_phase(__old_phase, 2);
+
 	for (int __round = 0; ; ++__round)
 	  {
 	    if (__current_expected <= 1)
@@ -165,7 +169,6 @@ It looks different from literature pseudocode for two main reasons:
       {
 	__atomic_phase_ref_t __phase(_M_phase);
 	const auto __old_phase = __phase.load(memory_order_relaxed);
-	const auto __cur = static_cast<unsigned char>(__old_phase);
 	for(; __update; --__update)
 	  {
 	    if(_M_arrive(__old_phase))
@@ -173,7 +176,7 @@ It looks different from literature pseudocode for two main reasons:
 		_M_completion();
 		_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
 		_M_expected_adjustment.store(0, memory_order_relaxed);
-		auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
+		auto __new_phase = _S_add_to_phase(__old_phase, 2);
 		__phase.store(__new_phase, memory_order_release);
 		__phase.notify_all();
 	      }
-- 
2.30.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 5/5] barrier: optimise by not having the hasher in a loop
  2021-02-26 15:59 ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Thiago Macieira
                     ` (2 preceding siblings ...)
  2021-02-26 15:59   ` [PATCH 4/5] barrier: use int instead of unsigned char for the phase state Thiago Macieira
@ 2021-02-26 15:59   ` Thiago Macieira
  2021-03-03 14:36     ` Jonathan Wakely
  2021-02-26 18:14   ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Andreas Schwab
  2021-03-03 14:34   ` Jonathan Wakely
  5 siblings, 1 reply; 40+ messages in thread
From: Thiago Macieira @ 2021-02-26 15:59 UTC (permalink / raw)
  To: libstdc++; +Cc: gcc-patches

Our thread's ID does not change so we don't have to get it every time
and hash it every time.
---
 libstdc++-v3/include/std/barrier | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libstdc++-v3/include/std/barrier b/libstdc++-v3/include/std/barrier
index ae058bd3dc3..eb31a89b175 100644
--- a/libstdc++-v3/include/std/barrier
+++ b/libstdc++-v3/include/std/barrier
@@ -101,12 +101,10 @@ It looks different from literature pseudocode for two main reasons:
       }
 
       bool
-      _M_arrive(__barrier_phase_t __old_phase)
+      _M_arrive(__barrier_phase_t __old_phase, size_t __current)
       {
 	size_t __current_expected = _M_expected;
-	std::hash<std::thread::id> __hasher;
-	size_t __current = __hasher(std::this_thread::get_id())
-					  % ((_M_expected + 1) >> 1);
+        __current %= ((__current_expected + 1) >> 1);
 
 	const auto __half_step = _S_add_to_phase(__old_phase, 1);
 	const auto __full_step = _S_add_to_phase(__old_phase, 2);
@@ -167,11 +165,13 @@ It looks different from literature pseudocode for two main reasons:
       [[nodiscard]] arrival_token
       arrive(ptrdiff_t __update)
       {
+	std::hash<std::thread::id> __hasher;
+	size_t __current = __hasher(std::this_thread::get_id());
 	__atomic_phase_ref_t __phase(_M_phase);
 	const auto __old_phase = __phase.load(memory_order_relaxed);
 	for(; __update; --__update)
 	  {
-	    if(_M_arrive(__old_phase))
+	    if(_M_arrive(__old_phase, __current))
 	      {
 		_M_completion();
 		_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
-- 
2.30.1


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-02-26 15:59 ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Thiago Macieira
                     ` (3 preceding siblings ...)
  2021-02-26 15:59   ` [PATCH 5/5] barrier: optimise by not having the hasher in a loop Thiago Macieira
@ 2021-02-26 18:14   ` Andreas Schwab
  2021-02-26 19:08     ` Thiago Macieira
  2021-03-03 14:34   ` Jonathan Wakely
  5 siblings, 1 reply; 40+ messages in thread
From: Andreas Schwab @ 2021-02-26 18:14 UTC (permalink / raw)
  To: Thiago Macieira via Gcc-patches; +Cc: libstdc++, Thiago Macieira

On Feb 26 2021, Thiago Macieira via Gcc-patches wrote:

> @@ -85,7 +85,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>      }
>  
>    private:
> -    alignas(__alignof__(ptrdiff_t)) ptrdiff_t _M_a;
> +    alignas(__alignof__(int)) int _M_a;

Futexes must be aligned to 4 bytes.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-02-26 18:14   ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Andreas Schwab
@ 2021-02-26 19:08     ` Thiago Macieira
  2021-02-26 19:31       ` Andreas Schwab
  0 siblings, 1 reply; 40+ messages in thread
From: Thiago Macieira @ 2021-02-26 19:08 UTC (permalink / raw)
  To: Thiago Macieira via Gcc-patches, Andreas Schwab; +Cc: libstdc++

On Friday, 26 February 2021 10:14:42 PST Andreas Schwab wrote:
> On Feb 26 2021, Thiago Macieira via Gcc-patches wrote:
> > @@ -85,7 +85,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> > }
> > 
> > private:
> > -    alignas(__alignof__(ptrdiff_t)) ptrdiff_t _M_a;
> > +    alignas(__alignof__(int)) int _M_a;
> 
> Futexes must be aligned to 4 bytes.

Agreed, but doesn't this accomplish that?

-- 
Thiago Macieira - thiago.macieira (AT) intel.com
  Software Architect - Intel DPG Cloud Engineering




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-02-26 19:08     ` Thiago Macieira
@ 2021-02-26 19:31       ` Andreas Schwab
  2021-02-27  0:13         ` Thiago Macieira
  0 siblings, 1 reply; 40+ messages in thread
From: Andreas Schwab @ 2021-02-26 19:31 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: Thiago Macieira via Gcc-patches, libstdc++

On Feb 26 2021, Thiago Macieira wrote:

> On Friday, 26 February 2021 10:14:42 PST Andreas Schwab wrote:
>> On Feb 26 2021, Thiago Macieira via Gcc-patches wrote:
>> > @@ -85,7 +85,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> > }
>> > 
>> > private:
>> > -    alignas(__alignof__(ptrdiff_t)) ptrdiff_t _M_a;
>> > +    alignas(__alignof__(int)) int _M_a;
>> 
>> Futexes must be aligned to 4 bytes.
>
> Agreed, but doesn't this accomplish that?

No.  It uses whatever alignment the type already has, and is an
elaborate no-op.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-02-26 19:31       ` Andreas Schwab
@ 2021-02-27  0:13         ` Thiago Macieira
  2021-02-28 21:31           ` Hans-Peter Nilsson
  0 siblings, 1 reply; 40+ messages in thread
From: Thiago Macieira @ 2021-02-27  0:13 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: gcc-patches, libstdc++

On Friday, 26 February 2021 11:31:00 PST Andreas Schwab wrote:
> On Feb 26 2021, Thiago Macieira wrote:
> > On Friday, 26 February 2021 10:14:42 PST Andreas Schwab wrote:
> >> On Feb 26 2021, Thiago Macieira via Gcc-patches wrote:
> >> > -    alignas(__alignof__(ptrdiff_t)) ptrdiff_t _M_a;
> >> > +    alignas(__alignof__(int)) int _M_a;
> >> 
> >> Futexes must be aligned to 4 bytes.
> > 
> > Agreed, but doesn't this accomplish that?
> 
> No.  It uses whatever alignment the type already has, and is an
> elaborate no-op.

I thought so too when I read the original line. But I expected it was written 
like that for a reason, especially since the same pattern appears in other 
places.

I can change to "alignas(4)" (which is a GCC extension, I believe). Is that 
the correct solution?


-- 
Thiago Macieira - thiago.macieira (AT) intel.com
  Software Architect - Intel DPG Cloud Engineering




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-02-26 15:59   ` [PATCH 4/5] barrier: use int instead of unsigned char for the phase state Thiago Macieira
@ 2021-02-28 15:05     ` Hans-Peter Nilsson
  2021-03-01 16:28       ` Thomas Rodgers
  2021-03-01 17:24       ` Thiago Macieira
  0 siblings, 2 replies; 40+ messages in thread
From: Hans-Peter Nilsson @ 2021-02-28 15:05 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: libstdc++, gcc-patches

On Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote:
> ints can be used in futexes. chars can't.

Shouldn't that be an atomic type instead of a bare int then?

> ---
>  libstdc++-v3/include/std/barrier | 21 ++++++++++++---------
>  1 file changed, 12 insertions(+), 9 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/barrier b/libstdc++-v3/include/std/barrier
> index e09212dfcb9..ae058bd3dc3 100644
> --- a/libstdc++-v3/include/std/barrier
> +++ b/libstdc++-v3/include/std/barrier
> @@ -70,7 +70,7 @@ It looks different from literature pseudocode for two main reasons:
>
>  */
>
> -  enum class __barrier_phase_t : unsigned char { };
> +  enum class __barrier_phase_t : int { };

brgds, H-P

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-02-27  0:13         ` Thiago Macieira
@ 2021-02-28 21:31           ` Hans-Peter Nilsson
  2021-03-01  8:56             ` Richard Biener
  2021-03-01 16:32             ` Thomas Rodgers
  0 siblings, 2 replies; 40+ messages in thread
From: Hans-Peter Nilsson @ 2021-02-28 21:31 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: Andreas Schwab, libstdc++, gcc-patches



On Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote:

> On Friday, 26 February 2021 11:31:00 PST Andreas Schwab wrote:
> > On Feb 26 2021, Thiago Macieira wrote:
> > > On Friday, 26 February 2021 10:14:42 PST Andreas Schwab wrote:
> > >> On Feb 26 2021, Thiago Macieira via Gcc-patches wrote:
> > >> > -    alignas(__alignof__(ptrdiff_t)) ptrdiff_t _M_a;
> > >> > +    alignas(__alignof__(int)) int _M_a;
> > >>
> > >> Futexes must be aligned to 4 bytes.
> > >
> > > Agreed, but doesn't this accomplish that?
> >
> > No.  It uses whatever alignment the type already has, and is an
> > elaborate no-op.
>
> I thought so too when I read the original line. But I expected it was written
> like that for a reason, especially since the same pattern appears in other
> places.
>
> I can change to "alignas(4)" (which is a GCC extension, I believe). Is that
> the correct solution?

IMNSHO make use of the corresponding atomic type.  Then there'd
be no need for separate what's-the-right-align-curse games.

brgds, H-P

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-02-28 21:31           ` Hans-Peter Nilsson
@ 2021-03-01  8:56             ` Richard Biener
  2021-03-03 14:56               ` Jonathan Wakely
  2021-03-01 16:32             ` Thomas Rodgers
  1 sibling, 1 reply; 40+ messages in thread
From: Richard Biener @ 2021-03-01  8:56 UTC (permalink / raw)
  To: Hans-Peter Nilsson
  Cc: Thiago Macieira, GCC Patches, libstdc++, Andreas Schwab

On Sun, Feb 28, 2021 at 10:53 PM Hans-Peter Nilsson <hp@bitrange.com> wrote:
>
>
>
> On Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote:
>
> > On Friday, 26 February 2021 11:31:00 PST Andreas Schwab wrote:
> > > On Feb 26 2021, Thiago Macieira wrote:
> > > > On Friday, 26 February 2021 10:14:42 PST Andreas Schwab wrote:
> > > >> On Feb 26 2021, Thiago Macieira via Gcc-patches wrote:
> > > >> > -    alignas(__alignof__(ptrdiff_t)) ptrdiff_t _M_a;
> > > >> > +    alignas(__alignof__(int)) int _M_a;
> > > >>
> > > >> Futexes must be aligned to 4 bytes.
> > > >
> > > > Agreed, but doesn't this accomplish that?
> > >
> > > No.  It uses whatever alignment the type already has, and is an
> > > elaborate no-op.
> >
> > I thought so too when I read the original line. But I expected it was written
> > like that for a reason, especially since the same pattern appears in other
> > places.
> >
> > I can change to "alignas(4)" (which is a GCC extension, I believe). Is that
> > the correct solution?
>
> IMNSHO make use of the corresponding atomic type.  Then there'd
> be no need for separate what's-the-right-align-curse games.

Or align as the corresponding atomic type (in case using an actual
std::atomic<int> is undesirable).  OTOH the proposed code isn't
any more bogus than the previous ...

Richard.

> brgds, H-P

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-02-28 15:05     ` Hans-Peter Nilsson
@ 2021-03-01 16:28       ` Thomas Rodgers
  2021-03-01 17:24       ` Thiago Macieira
  1 sibling, 0 replies; 40+ messages in thread
From: Thomas Rodgers @ 2021-03-01 16:28 UTC (permalink / raw)
  To: Hans-Peter Nilsson; +Cc: libstdc++, gcc-patches

On 2021-02-28 07:05, Hans-Peter Nilsson wrote:

> On Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote:
> 
>> ints can be used in futexes. chars can't.
> 
> Shouldn't that be an atomic type instead of a bare int then?
> 

It's an atomic_ref.

>> ---
>> libstdc++-v3/include/std/barrier | 21 ++++++++++++---------
>> 1 file changed, 12 insertions(+), 9 deletions(-)
>> 
>> diff --git a/libstdc++-v3/include/std/barrier 
>> b/libstdc++-v3/include/std/barrier
>> index e09212dfcb9..ae058bd3dc3 100644
>> --- a/libstdc++-v3/include/std/barrier
>> +++ b/libstdc++-v3/include/std/barrier
>> @@ -70,7 +70,7 @@ It looks different from literature pseudocode for 
>> two main reasons:
>> 
>> */
>> 
>> -  enum class __barrier_phase_t : unsigned char { };
>> +  enum class __barrier_phase_t : int { };
> 
> brgds, H-P

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-02-28 21:31           ` Hans-Peter Nilsson
  2021-03-01  8:56             ` Richard Biener
@ 2021-03-01 16:32             ` Thomas Rodgers
  1 sibling, 0 replies; 40+ messages in thread
From: Thomas Rodgers @ 2021-03-01 16:32 UTC (permalink / raw)
  To: Hans-Peter Nilsson
  Cc: Thiago Macieira, gcc-patches, libstdc++, Andreas Schwab

On 2021-02-28 13:31, Hans-Peter Nilsson wrote:

>> On Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote:
>> 
>> On Friday, 26 February 2021 11:31:00 PST Andreas Schwab wrote: On Feb 
>> 26 2021, Thiago Macieira wrote: On Friday, 26 February 2021 10:14:42 
>> PST Andreas Schwab wrote: On Feb 26 2021, Thiago Macieira via 
>> Gcc-patches wrote: -    alignas(__alignof__(ptrdiff_t)) ptrdiff_t 
>> _M_a;
>> +    alignas(__alignof__(int)) int _M_a;
>> Futexes must be aligned to 4 bytes.
>> 
>> Agreed, but doesn't this accomplish that?
> No.  It uses whatever alignment the type already has, and is an
> elaborate no-op.
> I thought so too when I read the original line. But I expected it was 
> written
> like that for a reason, especially since the same pattern appears in 
> other
> places.

> I can change to "alignas(4)" (which is a GCC extension, I believe). Is 
> that
> the correct solution?
> IMNSHO make use of the corresponding atomic type.  Then there'd
> be no need for separate what's-the-right-align-curse games.

There is no predicate wait on atomic<T>.

> brgds, H-P

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-02-28 15:05     ` Hans-Peter Nilsson
  2021-03-01 16:28       ` Thomas Rodgers
@ 2021-03-01 17:24       ` Thiago Macieira
  2021-03-01 17:38         ` Thomas Rodgers
  2021-03-01 18:12         ` Ville Voutilainen
  1 sibling, 2 replies; 40+ messages in thread
From: Thiago Macieira @ 2021-03-01 17:24 UTC (permalink / raw)
  To: Hans-Peter Nilsson; +Cc: libstdc++, gcc-patches

On Sunday, 28 February 2021 07:05:47 PST Hans-Peter Nilsson wrote:
> On Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote:
> > ints can be used in futexes. chars can't.
> 
> Shouldn't that be an atomic type instead of a bare int then?

There are a couple of atomic_refs:

      using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
      using __atomic_phase_const_ref_t = std::__atomic_ref<const 
__barrier_phase_t>;

And _M_phase, despite being non-atomic, is never accessed without the 
atomic_ref, aside from the constructor. Both arrive() and wait() start off by 
creating the atomic_ref.

But I confess I don't understand this code sufficiently to say it is correct. 
I'm simply saying that waiting on unsigned chars will not use a futex, at 
least until https://lkml.org/lkml/2019/12/4/1373 is merged into the kernel.

-- 
Thiago Macieira - thiago.macieira (AT) intel.com
  Software Architect - Intel DPG Cloud Engineering




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 17:24       ` Thiago Macieira
@ 2021-03-01 17:38         ` Thomas Rodgers
  2021-03-01 17:40           ` Thomas Rodgers
  2021-03-01 18:06           ` Thiago Macieira
  2021-03-01 18:12         ` Ville Voutilainen
  1 sibling, 2 replies; 40+ messages in thread
From: Thomas Rodgers @ 2021-03-01 17:38 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: Hans-Peter Nilsson, libstdc++, gcc-patches

On 2021-03-01 09:24, Thiago Macieira via Libstdc++ wrote:

> On Sunday, 28 February 2021 07:05:47 PST Hans-Peter Nilsson wrote: On 
> Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote: ints can be 
> used in futexes. chars can't.
> Shouldn't that be an atomic type instead of a bare int then?

> There are a couple of atomic_refs:
> 
>      using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
>      using __atomic_phase_const_ref_t = std::__atomic_ref<const
> __barrier_phase_t>;
> 
> And _M_phase, despite being non-atomic, is never accessed without the
> atomic_ref, aside from the constructor. Both arrive() and wait() start 
> off by
> creating the atomic_ref.

If it's non-atomic, then how is wait() supposed to wait on it, 
atomically?

> But I confess I don't understand this code sufficiently to say it is 
> correct.
> I'm simply saying that waiting on unsigned chars will not use a futex, 
> at
> least until https://lkml.org/lkml/2019/12/4/1373 is merged into the 
> kernel.

And I am not disagreeing with that. I am, however saying, that I know 
this particular implementation (well the upstream one it is based on) 
has been extensively tested by the original author (ogiroux) including 
time on Summit. If we are going to start changing his design decisions 
(beyond the largely cosmetic, not algorithmic, ones that I have made as 
per Jonathan's request), they should be motivated by more than a 'well 
we feel int's would be better here because Futex' justification.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 17:38         ` Thomas Rodgers
@ 2021-03-01 17:40           ` Thomas Rodgers
  2021-03-01 18:06           ` Thiago Macieira
  1 sibling, 0 replies; 40+ messages in thread
From: Thomas Rodgers @ 2021-03-01 17:40 UTC (permalink / raw)
  To: libstdc++, gcc-patches

Botched replying to the list, sending again

On 2021-03-01 09:38, Thomas Rodgers wrote:

> On 2021-03-01 09:24, Thiago Macieira via Libstdc++ wrote:
> 
>> On Sunday, 28 February 2021 07:05:47 PST Hans-Peter Nilsson wrote: On 
>> Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote: ints can be 
>> used in futexes. chars can't.
>> Shouldn't that be an atomic type instead of a bare int then?
> 
>> There are a couple of atomic_refs:
>> 
>> using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
>> using __atomic_phase_const_ref_t = std::__atomic_ref<const
>> __barrier_phase_t>;
>> 
>> And _M_phase, despite being non-atomic, is never accessed without the
>> atomic_ref, aside from the constructor. Both arrive() and wait() start 
>> off by
>> creating the atomic_ref.
> 
> If it's non-atomic, then how is wait() supposed to wait on it, 
> atomically?
> 
>> But I confess I don't understand this code sufficiently to say it is 
>> correct.
>> I'm simply saying that waiting on unsigned chars will not use a futex, 
>> at
>> least until https://lkml.org/lkml/2019/12/4/1373 is merged into the 
>> kernel.
> 
> And I am not disagreeing with that. I am, however saying, that I know 
> this particular implementation (well the upstream one it is based on) 
> has been extensively tested by the original author (ogiroux) including 
> time on Summit. If we are going to start changing his design decisions 
> (beyond the largely cosmetic, not algorithmic, ones that I have made as 
> per Jonathan's request), they should be motivated by more than a 'well 
> we feel int's would be better here because Futex' justification.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 17:38         ` Thomas Rodgers
  2021-03-01 17:40           ` Thomas Rodgers
@ 2021-03-01 18:06           ` Thiago Macieira
  2021-03-01 19:08             ` Thomas Rodgers
  1 sibling, 1 reply; 40+ messages in thread
From: Thiago Macieira @ 2021-03-01 18:06 UTC (permalink / raw)
  To: Thomas Rodgers; +Cc: Hans-Peter Nilsson, libstdc++, gcc-patches

On Monday, 1 March 2021 09:38:58 PST Thomas Rodgers wrote:
> > And _M_phase, despite being non-atomic, is never accessed without the
> > atomic_ref, aside from the constructor. Both arrive() and wait() start
> > off by
> > creating the atomic_ref.
> 
> If it's non-atomic, then how is wait() supposed to wait on it,
> atomically?

Hey, it's your code :-)

It is using atomics to operate on the value. It's just that the type on the 
structure isn't an atomic by itself.

Why, I don't know. Olivier's original code did use atomics 
<https://github.com/ogiroux/atomic_wait/blob/master/include/barrier#L55-L56>:
    std::atomic<ptrdiff_t>             expected_adjustment;
    std::atomic<__phase_t>             phase;


> And I am not disagreeing with that. I am, however saying, that I know
> this particular implementation (well the upstream one it is based on)
> has been extensively tested by the original author (ogiroux) including
> time on Summit. If we are going to start changing his design decisions
> (beyond the largely cosmetic, not algorithmic, ones that I have made as
> per Jonathan's request), they should be motivated by more than a 'well
> we feel int's would be better here because Futex' justification.

That's a reasonable request.

I'd like to see the benchmark results of using a directly-futexable type vs 
using unsigned char. But even the timing results need to be weighed against 
the increased memory use for std::barrier, which means it's not a directly-
objective conclusion. And given we *may* get 8-bit futexes, it might be worth 
keeping them this way and just tell people with large contentions to upgrade.

That of course rests on the contended atomic_wait being out-of-line.

-- 
Thiago Macieira - thiago.macieira (AT) intel.com
  Software Architect - Intel DPG Cloud Engineering




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 17:24       ` Thiago Macieira
  2021-03-01 17:38         ` Thomas Rodgers
@ 2021-03-01 18:12         ` Ville Voutilainen
  2021-03-01 19:44           ` Thiago Macieira
  1 sibling, 1 reply; 40+ messages in thread
From: Ville Voutilainen @ 2021-03-01 18:12 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: Hans-Peter Nilsson, libstdc++, gcc-patches List

On Mon, 1 Mar 2021 at 20:09, Thiago Macieira via Libstdc++
<libstdc++@gcc.gnu.org> wrote:
>
> On Sunday, 28 February 2021 07:05:47 PST Hans-Peter Nilsson wrote:
> > On Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote:
> > > ints can be used in futexes. chars can't.
> >
> > Shouldn't that be an atomic type instead of a bare int then?
>
> There are a couple of atomic_refs:
>
>       using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
>       using __atomic_phase_const_ref_t = std::__atomic_ref<const
> __barrier_phase_t>;
>
> And _M_phase, despite being non-atomic, is never accessed without the
> atomic_ref, aside from the constructor. Both arrive() and wait() start off by
> creating the atomic_ref.
>
> But I confess I don't understand this code sufficiently to say it is correct.
> I'm simply saying that waiting on unsigned chars will not use a futex, at
> least until https://lkml.org/lkml/2019/12/4/1373 is merged into the kernel.

I do have a question about the intent/concern here, regardless of what
your patch technically
does. The ABI break _is_ your concern, and the "heisenbugs" you were
worried about would
in fact be caused by the ABI break? So if you embed these things in
your QAtomicThing, the ABI
break may mess it up(*)? Is that a correct understanding of the concern here?

(*) And inlining might not help because users might compile a
different std::atomic_thing in their
program than what your DSO has been compiled with, and you may end up
using mixtures.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 18:06           ` Thiago Macieira
@ 2021-03-01 19:08             ` Thomas Rodgers
  0 siblings, 0 replies; 40+ messages in thread
From: Thomas Rodgers @ 2021-03-01 19:08 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: Hans-Peter Nilsson, libstdc++, gcc-patches

On 2021-03-01 10:06, Thiago Macieira wrote:

> On Monday, 1 March 2021 09:38:58 PST Thomas Rodgers wrote: And 
> _M_phase, despite being non-atomic, is never accessed without the
> atomic_ref, aside from the constructor. Both arrive() and wait() start
> off by
> creating the atomic_ref.
> If it's non-atomic, then how is wait() supposed to wait on it,
> atomically?

> Hey, it's your code :-)
> 
> It is using atomics to operate on the value. It's just that the type on 
> the
> structure isn't an atomic by itself.

> Why, I don't know. Olivier's original code did use atomics
> <https://github.com/ogiroux/atomic_wait/blob/master/include/barrier#L55-L56>:
>    std::atomic<ptrdiff_t>             expected_adjustment;
>    std::atomic<__phase_t>             phase;

It is atomic, in that it is always an atomic_ref. This change was made 
at
Jonathan's request.

> And I am not disagreeing with that. I am, however saying, that I know
> this particular implementation (well the upstream one it is based on)
> has been extensively tested by the original author (ogiroux) including
> time on Summit. If we are going to start changing his design decisions
> (beyond the largely cosmetic, not algorithmic, ones that I have made as
> per Jonathan's request), they should be motivated by more than a 'well
> we feel int's would be better here because Futex' justification.

> That's a reasonable request.
> 
> I'd like to see the benchmark results of using a directly-futexable 
> type vs
> using unsigned char. But even the timing results need to be weighed 
> against
> the increased memory use for std::barrier, which means it's not a 
> directly-
> objective conclusion. And given we *may* get 8-bit futexes, it might be 
> worth
> keeping them this way and just tell people with large contentions to 
> upgrade.

We may also want to introduce a central barrier type for memory 
constrained environments. I specifically
removed that from this patch because it -

1) wasn't clear how we'd go about making that decision
2) this support in GCC11 is experimental

> That of course rests on the contended atomic_wait being out-of-line.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 18:12         ` Ville Voutilainen
@ 2021-03-01 19:44           ` Thiago Macieira
  2021-03-01 20:35             ` Ville Voutilainen
  0 siblings, 1 reply; 40+ messages in thread
From: Thiago Macieira @ 2021-03-01 19:44 UTC (permalink / raw)
  To: Ville Voutilainen, libstdc++; +Cc: gcc-patches List

On Monday, 1 March 2021 10:12:35 PST Ville Voutilainen wrote:
> I do have a question about the intent/concern here, regardless of what
> your patch technically
> does. The ABI break _is_ your concern, and the "heisenbugs" you were
> worried about would
> in fact be caused by the ABI break? So if you embed these things in
> your QAtomicThing, the ABI
> break may mess it up(*)? Is that a correct understanding of the concern
> here?

Let me clarify. I am stating that the current implementation is inefficient 
because Linux currently only implements 32-bit futexes. With that in mind, I 
am arguing we need to change the implementation in a way that will break ABI 
in a new release.

The concern is that such a change, despite being in experimental code for 
which ABI stability has never been promised, is still troublesome. The problem 
there is that even people who have read the documentation and waited until 
2024 to write code using the feature may still be affected. This isn't about 
Qt because I have no plans to use wait and notify in inline API.

But you can see someone doing:

#if __cplusplus >= 202002L && __has_include(<latch>)
#  include <latch>
#else
#  error "Please upgrade your compiler & standard library"
#endif

and using <latch> in their inline code. And as you say, if they then mix DSOs 
compiled with different versions of GCC, one of them might be the old, 
experimental and binary-incompatible code. Remember that before April 2024, 
the most recent Ubuntu LTS will be 22.04, which will be released before GCC 
12, which means it will contain GCC 11.

So, wholly aside from how we fix the inefficiencies, we must decide how to 
deal with the ABI break. We can:

a) not have an ABI break in the first place, by having the above code not 
   compile with GCC until we promise ABI compatibility
b) cause a non-silent ABI break
c) accept the silent ABI break

I advocate for (a) and vehemently disagree with (c). Meanwhile, (b) can be 
something like (untested!):

 _GLIBCXX_BEGIN_NAMESPACE_VERSION
   namespace __detail
   {
+    struct __waiters;
+    inline namespace __experimental
+    {
+      // from atomic_wait.cc
+      __waiters &__get_waiter(const void *);
+    }
     using __platform_wait_t = int;

     constexpr auto __atomic_spin_count_1 = 16;
@@ -187,11 +193,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       static __waiters&
       _S_for(const void* __t)
       {
-       const unsigned char __mask = 0xf;
-       static __waiters __w[__mask + 1];
-
-       auto __key = _Hash_impl::hash(__t) & __mask;
-       return __w[__key];
+       return __get_waiter(__t);
       }
     };

That way, any DSO compiled using GCC 11 will fail to load when using GCC 12's 
libstdc++. And probably put "std::__detail::__experimental" in the 
GLIBCXX_EXPERIMENTAL ELF version so it's even clearer that it's experimental, 
thus helping Linux distributions and other binary artefact distributors 
realise they've made a mistake.

I still don't like (b) because it pushes the problem to the wrong people: the 
packagers. But it's far better than (c).

-- 
Thiago Macieira - thiago.macieira (AT) intel.com
  Software Architect - Intel DPG Cloud Engineering

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 19:44           ` Thiago Macieira
@ 2021-03-01 20:35             ` Ville Voutilainen
  2021-03-01 21:54               ` Thiago Macieira
  0 siblings, 1 reply; 40+ messages in thread
From: Ville Voutilainen @ 2021-03-01 20:35 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: libstdc++, gcc-patches List

On Mon, 1 Mar 2021 at 21:44, Thiago Macieira <thiago.macieira@intel.com> wrote:
> But you can see someone doing:
>
> #if __cplusplus >= 202002L && __has_include(<latch>)
> #  include <latch>
> #else
> #  error "Please upgrade your compiler & standard library"
> #endif
>
> and using <latch> in their inline code. And as you say, if they then mix DSOs
> compiled with different versions of GCC, one of them might be the old,
> experimental and binary-incompatible code. Remember that before April 2024,
> the most recent Ubuntu LTS will be 22.04, which will be released before GCC
> 12, which means it will contain GCC 11.
>
> So, wholly aside from how we fix the inefficiencies, we must decide how to
> deal with the ABI break. We can:
>
> a) not have an ABI break in the first place, by having the above code not
>    compile with GCC until we promise ABI compatibility
> b) cause a non-silent ABI break
> c) accept the silent ABI break
>
> I advocate for (a) and vehemently disagree with (c). Meanwhile, (b) can be
> something like (untested!):

I can't make the above code work, in any reasonable manner, because
it's doing feature detection incorrectly. :)
What I can imagine doing, however, is this:

1) a published IS always bumps feature-macro values (this won't help
now, it's a future fix, we don't currently do this in WG21)
2) an implementation uses the pre-IS draft values before it deems itself stable
3) users who want stability should require the feature-testing macro
to have at least the IS value
4) adventurous users can either use the macro without checking its
value, or use at least the value that gives them
whatever shiny toy they're interested in

With that, we can keep allowing adventurous users to opt in early, and
you have a portable way to detect that
features are stable, for an implementation-defined definition of "stable".

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 20:35             ` Ville Voutilainen
@ 2021-03-01 21:54               ` Thiago Macieira
  2021-03-01 22:04                 ` Ville Voutilainen
  0 siblings, 1 reply; 40+ messages in thread
From: Thiago Macieira @ 2021-03-01 21:54 UTC (permalink / raw)
  To: Ville Voutilainen; +Cc: libstdc++, gcc-patches List

On Monday, 1 March 2021 12:35:05 PST Ville Voutilainen wrote:
> I can't make the above code work, in any reasonable manner, because
> it's doing feature detection incorrectly. :)
> What I can imagine doing, however, is this:
> 
> 1) a published IS always bumps feature-macro values (this won't help
> now, it's a future fix, we don't currently do this in WG21)

This is mostly already the case. I haven't seen any need to bump the values.

> 2) an implementation uses the pre-IS draft values before it deems itself
> stable 

Before the IS is stable? From what I've seen so far, the macros are always 
around the month of the latest draft of a given feature. So if an 
implementation implemented an earlier draft, the macro version would increase 
in a new draft coming before the IS.

I think we just need to be careful of updates done after the latest draft is 
adopted, usually by NBs. Those have to bump the macro too. I think you're in 
good speaking terms with the Finland NB :-)

> 3) users who want stability should require the feature-testing macro
> to have at least the IS value

True. That's not the stability I was talking about, but it could be done.

> 4) adventurous users can either use the macro without checking its
> value, or use at least the value that gives them
> whatever shiny toy they're interested in

Fair enough.

Please note I am not talking about the stability of the feature as described 
in the standard or a proposal or draft. I am talking about the stability of 
the implementation. C++20 is out and the atomic-wait feature is stable, as 
defined by the standard. What isn't stable is GCC's implementation of it.

But your suggestion does work. We don't need to apply them to all macros, only 
those that are new in a given version, like __cpp_lib_atomic_wait or 
__cpp_lib_latch in this case. Alternatively, implementations can set the macro 
to a given value below the standard-required one (incl. 1) to indicate that 
they haven't achieved stability.

That would make my check:

#if __cplusplus >= 202002L && __has_include(<latch>)
#  include <latch>
#endif
#if __cpp_lib_latch < 201907L
#  error "Please upgrade your Standard Library"
#endif

-- 
Thiago Macieira - thiago.macieira (AT) intel.com
  Software Architect - Intel DPG Cloud Engineering

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 21:54               ` Thiago Macieira
@ 2021-03-01 22:04                 ` Ville Voutilainen
  2021-03-01 22:21                   ` Thiago Macieira
  0 siblings, 1 reply; 40+ messages in thread
From: Ville Voutilainen @ 2021-03-01 22:04 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: libstdc++, gcc-patches List

On Mon, 1 Mar 2021 at 23:54, Thiago Macieira <thiago.macieira@intel.com> wrote:
> But your suggestion does work. We don't need to apply them to all macros, only
> those that are new in a given version, like __cpp_lib_atomic_wait or
> __cpp_lib_latch in this case. Alternatively, implementations can set the macro
> to a given value below the standard-required one (incl. 1) to indicate that
> they haven't achieved stability.
>
> That would make my check:
>
> #if __cplusplus >= 202002L && __has_include(<latch>)
> #  include <latch>
> #endif
> #if __cpp_lib_latch < 201907L
> #  error "Please upgrade your Standard Library"
> #endif

Well, this would be different. What I'm suggesting is not quite that;
for any *new* facility, we'd make sure
that its draft macro and the final IS macro are different, but the
minimum value is the first draft version,
not anything below it. I don't care too much, that approach and yours
would work the same way. Things that already
had an IS value for a macro and haven't changed since don't need to be
changed. And we don't
need to bump all values of existing facilities either, just for those
that got changes, so some existing macros
would be considered lost causes. Like the ones we're talking about,
because the cats are already out of the
bag.

I'll write a paper. That won't help you now, but it gives us tools in
the future.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 22:04                 ` Ville Voutilainen
@ 2021-03-01 22:21                   ` Thiago Macieira
  2021-03-01 22:31                     ` Ville Voutilainen
  0 siblings, 1 reply; 40+ messages in thread
From: Thiago Macieira @ 2021-03-01 22:21 UTC (permalink / raw)
  To: Ville Voutilainen; +Cc: libstdc++, gcc-patches List

On Monday, 1 March 2021 14:04:34 PST Ville Voutilainen wrote:
> Well, this would be different. What I'm suggesting is not quite that;
> for any *new* facility, we'd make sure
> that its draft macro and the final IS macro are different, but the
> minimum value is the first draft version,
> not anything below it. I don't care too much, that approach and yours
> would work the same way. Things that already
> had an IS value for a macro and haven't changed since don't need to be
> changed. And we don't
> need to bump all values of existing facilities either, just for those
> that got changes, so some existing macros
> would be considered lost causes. Like the ones we're talking about,
> because the cats are already out of the
> bag.

But the code I posted, if people are careful to use write like I did, would 
allow us to have the experimental "we're not sure this is right" 
implementation of atomic waits, latches, barriers and semaphores right now.

It would simply require that we decrement the macros by 1 in the libstdc++ 
headers.

-- 
Thiago Macieira - thiago.macieira (AT) intel.com
  Software Architect - Intel DPG Cloud Engineering




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 22:21                   ` Thiago Macieira
@ 2021-03-01 22:31                     ` Ville Voutilainen
  2021-03-01 22:40                       ` Thiago Macieira
  0 siblings, 1 reply; 40+ messages in thread
From: Ville Voutilainen @ 2021-03-01 22:31 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: libstdc++, gcc-patches List

On Tue, 2 Mar 2021 at 00:21, Thiago Macieira <thiago.macieira@intel.com> wrote:

> But the code I posted, if people are careful to use write like I did, would
> allow us to have the experimental "we're not sure this is right"
> implementation of atomic waits, latches, barriers and semaphores right now.

The code assumes that as soon as __cplusplus bumps and a header is
present, things
are stable. I don't think that's a safe assumption to make.
Furthermore, the second  #if
tries to check a feature-testing macro without including either the
corresponding header
or <version>. That doesn't work. You need to either include <version>
and check a macro,
or check that <latch> exists, then include <latch> and then check the macro.

But other than that, sure, as QoI, vendors could already provide the
standard macros with
numbers that are lower than the standard ever specified. Going
forward, if existing facilities
are changed, this stops working because now you'd have to track the
WPs to see which
values are "vendor-bogus". I find it better to just change the macros
whose facilities changed
during a standard cycle, and in those cases bump the IS macro, that'll
then work going forward.
In the meanwhile, when vendors can, they could use the technique you
describe, but that's
barely worth proposing as an SG10 guideline because they would've
needed to do it already, but didn't. :P

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/5] barrier: use int instead of unsigned char for the phase state
  2021-03-01 22:31                     ` Ville Voutilainen
@ 2021-03-01 22:40                       ` Thiago Macieira
  0 siblings, 0 replies; 40+ messages in thread
From: Thiago Macieira @ 2021-03-01 22:40 UTC (permalink / raw)
  To: Ville Voutilainen; +Cc: libstdc++, gcc-patches List

On Monday, 1 March 2021 14:31:09 PST Ville Voutilainen wrote:
> On Tue, 2 Mar 2021 at 00:21, Thiago Macieira <thiago.macieira@intel.com> 
wrote:
> > But the code I posted, if people are careful to use write like I did,
> > would
> > allow us to have the experimental "we're not sure this is right"
> > implementation of atomic waits, latches, barriers and semaphores right
> > now.
> 
> The code assumes that as soon as __cplusplus bumps and a header is
> present, things
> are stable.

Not exactly. Re-posting the code for reference:

#if __cplusplus >= 202002L && __has_include(<latch>)
#  include <latch>
#endif
#if __cpp_lib_latch < 201907L
#  error "Please upgrade your Standard Library"
#endif

The first section simply assumes that <latch> can be #included. The 
__cplusplus check is necessary because MSVC STL's headers are present but 
can't be #include'd otherwise (they cause #error).

It's the second check that is authoritative.

> I don't think that's a safe assumption to make.
> Furthermore, the second  #if
> tries to check a feature-testing macro without including either the
> corresponding header
> or <version>. That doesn't work. You need to either include <version>
> and check a macro,
> or check that <latch> exists, then include <latch> and then check the macro.

<version> would work, but why can't you check the macro anyway? It may trigger 
a warning with GCC's -Wundef, but it's just a warning. The preprocessor is 
defined to replace any undefined identifier with 0. If you want to avoid the 
warning, you could write:

#if defined(__cpp_lib_latch) && __cpp_lib_latch < 201907L

Is there anything I'm not seeing?

> But other than that, sure, as QoI, vendors could already provide the
> standard macros with
> numbers that are lower than the standard ever specified. Going
> forward, if existing facilities
> are changed, this stops working because now you'd have to track the
> WPs to see which
> values are "vendor-bogus". I find it better to just change the macros
> whose facilities changed
> during a standard cycle, and in those cases bump the IS macro, that'll
> then work going forward.
> In the meanwhile, when vendors can, they could use the technique you
> describe, but that's
> barely worth proposing as an SG10 guideline because they would've
> needed to do it already, but didn't. :P

I am not opposed to that. Your solution is better.

But this solution is less work on the standard and still works, even if it 
creates a little more work on the users of said features. It's not 
unsurmountable, since we often need to check which C++ standard edition it 
came with anyway. So it doesn't matter what value it assumes: we'll be 
consulting a table anyway.

-- 
Thiago Macieira - thiago.macieira (AT) intel.com
  Software Architect - Intel DPG Cloud Engineering




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-02-26 15:59 ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Thiago Macieira
                     ` (4 preceding siblings ...)
  2021-02-26 18:14   ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Andreas Schwab
@ 2021-03-03 14:34   ` Jonathan Wakely
  2021-03-03 17:14     ` Thiago Macieira
  5 siblings, 1 reply; 40+ messages in thread
From: Jonathan Wakely @ 2021-03-03 14:34 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: libstdc++, gcc-patches

On 26/02/21 07:59 -0800, Thiago Macieira via Libstdc++ wrote:
>ints can be used as futex on Linux. ptrdiff_t on 64-bit Linux can't.

Yes, we should do this for GCC 11.


> libstdc++-v3/include/std/latch | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
>diff --git a/libstdc++-v3/include/std/latch b/libstdc++-v3/include/std/latch
>index ef8c301e5e9..156aea5c5e5 100644
>--- a/libstdc++-v3/include/std/latch
>+++ b/libstdc++-v3/include/std/latch
>@@ -48,7 +48,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   public:
>     static constexpr ptrdiff_t
>     max() noexcept
>-    { return __gnu_cxx::__int_traits<ptrdiff_t>::__max; }
>+    { return __gnu_cxx::__int_traits<int>::__max; }
>
>     constexpr explicit latch(ptrdiff_t __expected) noexcept
>       : _M_a(__expected) { }
>@@ -85,7 +85,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>     }
>
>   private:
>-    alignas(__alignof__(ptrdiff_t)) ptrdiff_t _M_a;
>+    alignas(__alignof__(int)) int _M_a;
>   };
> _GLIBCXX_END_NAMESPACE_VERSION
> } // namespace
>-- 
>2.30.1
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int
  2021-02-26 15:59   ` [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int Thiago Macieira
@ 2021-03-03 14:34     ` Jonathan Wakely
  2021-03-03 16:21       ` Jonathan Wakely
  0 siblings, 1 reply; 40+ messages in thread
From: Jonathan Wakely @ 2021-03-03 14:34 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: libstdc++, gcc-patches

On 26/02/21 07:59 -0800, Thiago Macieira via Libstdc++ wrote:
>The kernel doesn't care what we store in those 32 bits, only that they are
>comparable. This commit adds:
> * pointers and long on 32-bit architectures
> * unsigned
> * untyped enums and typed enums on int & unsigned int
> * float
>
>We're not using FUTEX_OP anywhere today. The kernel reserves 4 bits for
>this field but has only used 6 values so far, so it can be extended to
>unsigned compares in the future, if needed.

And this one for GCC 11 too.

> libstdc++-v3/include/bits/atomic_wait.h | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
>diff --git a/libstdc++-v3/include/bits/atomic_wait.h b/libstdc++-v3/include/bits/atomic_wait.h
>index 424fccbe4c5..1c6bda2e2b6 100644
>--- a/libstdc++-v3/include/bits/atomic_wait.h
>+++ b/libstdc++-v3/include/bits/atomic_wait.h
>@@ -65,7 +65,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>     template<typename _Tp>
>       inline constexpr bool __platform_wait_uses_type
> #ifdef _GLIBCXX_HAVE_LINUX_FUTEX
>-	= is_same_v<remove_cv_t<_Tp>, __platform_wait_t>;
>+	= is_scalar_v<remove_cv_t<_Tp>> && sizeof(_Tp) == sizeof(__platform_wait_t)
>+	  && alignof(_Tp) >= alignof(__platform_wait_t);
> #else
> 	= false;
> #endif
>@@ -91,13 +92,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>     template<typename _Tp>
>       void
>-      __platform_wait(const _Tp* __addr, __platform_wait_t __val) noexcept
>+      __platform_wait(const _Tp* __addr, _Tp __val) noexcept
>       {
> 	for(;;)
> 	  {
> 	    auto __e = syscall (SYS_futex, static_cast<const void*>(__addr),
> 				  static_cast<int>(__futex_wait_flags::__wait_private),
>-				    __val, nullptr);
>+				  static_cast<__platform_wait_t>(__val), nullptr);
> 	    if (!__e || errno == EAGAIN)
> 	      break;
> 	    else if (errno != EINTR)
>-- 
>2.30.1
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 5/5] barrier: optimise by not having the hasher in a loop
  2021-02-26 15:59   ` [PATCH 5/5] barrier: optimise by not having the hasher in a loop Thiago Macieira
@ 2021-03-03 14:36     ` Jonathan Wakely
  0 siblings, 0 replies; 40+ messages in thread
From: Jonathan Wakely @ 2021-03-03 14:36 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: libstdc++, gcc-patches

On 26/02/21 07:59 -0800, Thiago Macieira via Libstdc++ wrote:
>Our thread's ID does not change so we don't have to get it every time
>and hash it every time.

This looks good for GCC 11.

(This one wouldn't be an ABI break to change later, but we might as
well do it now, as we have the patch now).


> libstdc++-v3/include/std/barrier | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
>diff --git a/libstdc++-v3/include/std/barrier b/libstdc++-v3/include/std/barrier
>index ae058bd3dc3..eb31a89b175 100644
>--- a/libstdc++-v3/include/std/barrier
>+++ b/libstdc++-v3/include/std/barrier
>@@ -101,12 +101,10 @@ It looks different from literature pseudocode for two main reasons:
>       }
>
>       bool
>-      _M_arrive(__barrier_phase_t __old_phase)
>+      _M_arrive(__barrier_phase_t __old_phase, size_t __current)
>       {
> 	size_t __current_expected = _M_expected;
>-	std::hash<std::thread::id> __hasher;
>-	size_t __current = __hasher(std::this_thread::get_id())
>-					  % ((_M_expected + 1) >> 1);
>+        __current %= ((__current_expected + 1) >> 1);
>
> 	const auto __half_step = _S_add_to_phase(__old_phase, 1);
> 	const auto __full_step = _S_add_to_phase(__old_phase, 2);
>@@ -167,11 +165,13 @@ It looks different from literature pseudocode for two main reasons:
>       [[nodiscard]] arrival_token
>       arrive(ptrdiff_t __update)
>       {
>+	std::hash<std::thread::id> __hasher;
>+	size_t __current = __hasher(std::this_thread::get_id());
> 	__atomic_phase_ref_t __phase(_M_phase);
> 	const auto __old_phase = __phase.load(memory_order_relaxed);
> 	for(; __update; --__update)
> 	  {
>-	    if(_M_arrive(__old_phase))
>+	    if(_M_arrive(__old_phase, __current))
> 	      {
> 		_M_completion();
> 		_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
>-- 
>2.30.1
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-03-01  8:56             ` Richard Biener
@ 2021-03-03 14:56               ` Jonathan Wakely
  2021-03-03 15:02                 ` Andreas Schwab
                                   ` (2 more replies)
  0 siblings, 3 replies; 40+ messages in thread
From: Jonathan Wakely @ 2021-03-03 14:56 UTC (permalink / raw)
  To: Richard Biener
  Cc: Hans-Peter Nilsson, Thiago Macieira, Andreas Schwab, libstdc++,
	GCC Patches

On 01/03/21 09:56 +0100, Richard Biener via Libstdc++ wrote:
>On Sun, Feb 28, 2021 at 10:53 PM Hans-Peter Nilsson <hp@bitrange.com> wrote:
>>
>>
>>
>> On Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote:
>>
>> > On Friday, 26 February 2021 11:31:00 PST Andreas Schwab wrote:
>> > > On Feb 26 2021, Thiago Macieira wrote:
>> > > > On Friday, 26 February 2021 10:14:42 PST Andreas Schwab wrote:
>> > > >> On Feb 26 2021, Thiago Macieira via Gcc-patches wrote:
>> > > >> > -    alignas(__alignof__(ptrdiff_t)) ptrdiff_t _M_a;
>> > > >> > +    alignas(__alignof__(int)) int _M_a;
>> > > >>
>> > > >> Futexes must be aligned to 4 bytes.
>> > > >
>> > > > Agreed, but doesn't this accomplish that?
>> > >
>> > > No.  It uses whatever alignment the type already has, and is an
>> > > elaborate no-op.
>> >
>> > I thought so too when I read the original line. But I expected it was written
>> > like that for a reason, especially since the same pattern appears in other
>> > places.
>> >
>> > I can change to "alignas(4)" (which is a GCC extension, I believe). Is that
>> > the correct solution?
>>
>> IMNSHO make use of the corresponding atomic type.  Then there'd
>> be no need for separate what's-the-right-align-curse games.

That won't work though, because we need direct access to the integer
object, not to a std::atomic<int> which contains it.

>Or align as the corresponding atomic type (in case using an actual
>std::atomic<int> is undesirable).  OTOH the proposed code isn't
>any more bogus than the previous ...

The previous code accounts for the fact that ptrdiff_t is a typedef
for an unspecified type, and that some ABIs allow struct members to have
weaker alignment than they would have otherwise.

e.g. __alignof__(long long) != alignof(long long) on x86.

Yes, I know ptrdiff_t isn't long long on x86, but since it's a typedef
for some target-specific type, it's still possible that
alignof != __alignof__. So alignas(__alignof__(T)) is not necessarily
a no-op. So not bogus.

For int, there shouldn't be any need to force the alignment. I don't
think any ABI supported by GCC allows int members to be aligned to
less than __alignof__(int). Users could break it by compiling with
#pragma pack, but I have no sympathy for such silliness.


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-03-03 14:56               ` Jonathan Wakely
@ 2021-03-03 15:02                 ` Andreas Schwab
  2021-03-03 15:10                 ` Jonathan Wakely
  2021-03-03 15:37                 ` Hans-Peter Nilsson
  2 siblings, 0 replies; 40+ messages in thread
From: Andreas Schwab @ 2021-03-03 15:02 UTC (permalink / raw)
  To: Jonathan Wakely
  Cc: Richard Biener, Hans-Peter Nilsson, Thiago Macieira, libstdc++,
	GCC Patches

On Mär 03 2021, Jonathan Wakely wrote:

> For int, there shouldn't be any need to force the alignment. I don't
> think any ABI supported by GCC allows int members to be aligned to
> less than __alignof__(int).

There is no requirement that __alignof__(int) is big enough.

Andreas.

-- 
Andreas Schwab, schwab@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-03-03 14:56               ` Jonathan Wakely
  2021-03-03 15:02                 ` Andreas Schwab
@ 2021-03-03 15:10                 ` Jonathan Wakely
  2021-03-03 15:37                 ` Hans-Peter Nilsson
  2 siblings, 0 replies; 40+ messages in thread
From: Jonathan Wakely @ 2021-03-03 15:10 UTC (permalink / raw)
  To: Richard Biener
  Cc: Hans-Peter Nilsson, Thiago Macieira, Andreas Schwab, libstdc++,
	GCC Patches

On 03/03/21 14:56 +0000, Jonathan Wakely wrote:
>On 01/03/21 09:56 +0100, Richard Biener via Libstdc++ wrote:
>>On Sun, Feb 28, 2021 at 10:53 PM Hans-Peter Nilsson <hp@bitrange.com> wrote:
>>>
>>>
>>>
>>>On Fri, 26 Feb 2021, Thiago Macieira via Gcc-patches wrote:
>>>
>>>> On Friday, 26 February 2021 11:31:00 PST Andreas Schwab wrote:
>>>> > On Feb 26 2021, Thiago Macieira wrote:
>>>> > > On Friday, 26 February 2021 10:14:42 PST Andreas Schwab wrote:
>>>> > >> On Feb 26 2021, Thiago Macieira via Gcc-patches wrote:
>>>> > >> > -    alignas(__alignof__(ptrdiff_t)) ptrdiff_t _M_a;
>>>> > >> > +    alignas(__alignof__(int)) int _M_a;
>>>> > >>
>>>> > >> Futexes must be aligned to 4 bytes.
>>>> > >
>>>> > > Agreed, but doesn't this accomplish that?
>>>> >
>>>> > No.  It uses whatever alignment the type already has, and is an
>>>> > elaborate no-op.
>>>>
>>>> I thought so too when I read the original line. But I expected it was written
>>>> like that for a reason, especially since the same pattern appears in other
>>>> places.
>>>>
>>>> I can change to "alignas(4)" (which is a GCC extension, I believe). Is that
>>>> the correct solution?

It's not a GCC extensions. You're thinking of alignas(obj) which is a
GCC extension.

>>>IMNSHO make use of the corresponding atomic type.  Then there'd
>>>be no need for separate what's-the-right-align-curse games.
>
>That won't work though, because we need direct access to the integer
>object, not to a std::atomic<int> which contains it.
>
>>Or align as the corresponding atomic type (in case using an actual
>>std::atomic<int> is undesirable).  OTOH the proposed code isn't
>>any more bogus than the previous ...
>
>The previous code accounts for the fact that ptrdiff_t is a typedef
>for an unspecified type, and that some ABIs allow struct members to have
>weaker alignment than they would have otherwise.
>
>e.g. __alignof__(long long) != alignof(long long) on x86.
>
>Yes, I know ptrdiff_t isn't long long on x86, but since it's a typedef
>for some target-specific type, it's still possible that
>alignof != __alignof__. So alignas(__alignof__(T)) is not necessarily
>a no-op. So not bogus.
>
>For int, there shouldn't be any need to force the alignment. I don't
>think any ABI supported by GCC allows int members to be aligned to
>less than __alignof__(int). Users could break it by compiling with
>#pragma pack, but I have no sympathy for such silliness.

Jakub said on IRC that m68k might have alignof(int) == 2, so we'd need
to increase that alignment to use it as a futex.

N.B. std::atomic and std::atomic_ref don't use alignas(__alignof__(T))
they do this instead:

       static_assert(is_trivially_copyable_v<_Tp>);

       // 1/2/4/8/16-byte types must be aligned to at least their size.
       static constexpr int _S_min_alignment
	= (sizeof(_Tp) & (sizeof(_Tp) - 1)) || sizeof(_Tp) > 16
	? 0 : sizeof(_Tp);

     public:

       static constexpr size_t required_alignment
	= _S_min_alignment > alignof(_Tp) ? _S_min_alignment : alignof(_Tp);

Using std::atomic_ref<T>::required_alignment would give that value.

For something being used as a futex we should just use alignas(4)
though, since that's what the kernel requires.


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-03-03 14:56               ` Jonathan Wakely
  2021-03-03 15:02                 ` Andreas Schwab
  2021-03-03 15:10                 ` Jonathan Wakely
@ 2021-03-03 15:37                 ` Hans-Peter Nilsson
  2 siblings, 0 replies; 40+ messages in thread
From: Hans-Peter Nilsson @ 2021-03-03 15:37 UTC (permalink / raw)
  To: Jonathan Wakely
  Cc: Richard Biener, Thiago Macieira, Andreas Schwab, libstdc++, GCC Patches

On Wed, 3 Mar 2021, Jonathan Wakely wrote:
> For int, there shouldn't be any need to force the alignment. I don't
> think any ABI supported by GCC allows int members to be aligned to
> less than __alignof__(int).

(sizeof(int) last)

The CRIS ABI does as in default packed, and ISTR there was some
other gcc port too, but I don't recall whether that (too) had no
(current) Linux port.

brgds, H-P

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int
  2021-03-03 14:34     ` Jonathan Wakely
@ 2021-03-03 16:21       ` Jonathan Wakely
  2021-03-03 17:27         ` Thiago Macieira
  2021-03-03 17:34         ` Ville Voutilainen
  0 siblings, 2 replies; 40+ messages in thread
From: Jonathan Wakely @ 2021-03-03 16:21 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: libstdc++, gcc-patches

On 03/03/21 14:34 +0000, Jonathan Wakely wrote:
>On 26/02/21 07:59 -0800, Thiago Macieira via Libstdc++ wrote:
>>The kernel doesn't care what we store in those 32 bits, only that they are
>>comparable. This commit adds:
>>* pointers and long on 32-bit architectures
>>* unsigned
>>* untyped enums and typed enums on int & unsigned int
>>* float
>>
>>We're not using FUTEX_OP anywhere today. The kernel reserves 4 bits for
>>this field but has only used 6 values so far, so it can be extended to
>>unsigned compares in the future, if needed.
>
>And this one for GCC 11 too.
>
>>libstdc++-v3/include/bits/atomic_wait.h | 7 ++++---
>>1 file changed, 4 insertions(+), 3 deletions(-)
>>
>>diff --git a/libstdc++-v3/include/bits/atomic_wait.h b/libstdc++-v3/include/bits/atomic_wait.h
>>index 424fccbe4c5..1c6bda2e2b6 100644
>>--- a/libstdc++-v3/include/bits/atomic_wait.h
>>+++ b/libstdc++-v3/include/bits/atomic_wait.h
>>@@ -65,7 +65,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>    template<typename _Tp>
>>      inline constexpr bool __platform_wait_uses_type
>>#ifdef _GLIBCXX_HAVE_LINUX_FUTEX
>>-	= is_same_v<remove_cv_t<_Tp>, __platform_wait_t>;
>>+	= is_scalar_v<remove_cv_t<_Tp>> && sizeof(_Tp) == sizeof(__platform_wait_t)

Oh, except that is_scalar is surprisingly expensive to instantiate
(its defined in a really expensive way) and since we control all uses
of this constant, I don't think we need it. It's only ever used from
atomic waiting functions which are only defined for scalar types.

>>+	  && alignof(_Tp) >= alignof(__platform_wait_t);
>>#else
>>	= false;
>>#endif
>>@@ -91,13 +92,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>
>>    template<typename _Tp>
>>      void
>>-      __platform_wait(const _Tp* __addr, __platform_wait_t __val) noexcept
>>+      __platform_wait(const _Tp* __addr, _Tp __val) noexcept
>>      {
>>	for(;;)
>>	  {
>>	    auto __e = syscall (SYS_futex, static_cast<const void*>(__addr),
>>				  static_cast<int>(__futex_wait_flags::__wait_private),
>>-				    __val, nullptr);
>>+				  static_cast<__platform_wait_t>(__val), nullptr);
>>	    if (!__e || errno == EAGAIN)
>>	      break;
>>	    else if (errno != EINTR)
>>-- 
>>2.30.1
>>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-03-03 14:34   ` Jonathan Wakely
@ 2021-03-03 17:14     ` Thiago Macieira
  2021-03-03 17:18       ` Jonathan Wakely
  0 siblings, 1 reply; 40+ messages in thread
From: Thiago Macieira @ 2021-03-03 17:14 UTC (permalink / raw)
  To: Jonathan Wakely; +Cc: libstdc++, gcc-patches

On Wednesday, 3 March 2021 06:34:26 PST Jonathan Wakely wrote:
> On 26/02/21 07:59 -0800, Thiago Macieira via Libstdc++ wrote:
> >ints can be used as futex on Linux. ptrdiff_t on 64-bit Linux can't.
> 
> Yes, we should do this for GCC 11.

Want me to re-submit this one alone, with the "alignas(4)" with a commend 
indicating it's what the kernel requires?

-- 
Thiago Macieira - thiago.macieira (AT) intel.com
  Software Architect - Intel DPG Cloud Engineering




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int
  2021-03-03 17:14     ` Thiago Macieira
@ 2021-03-03 17:18       ` Jonathan Wakely
  0 siblings, 0 replies; 40+ messages in thread
From: Jonathan Wakely @ 2021-03-03 17:18 UTC (permalink / raw)
  To: Thiago Macieira; +Cc: libstdc++, gcc-patches

On 03/03/21 09:14 -0800, Thiago Macieira via Libstdc++ wrote:
>On Wednesday, 3 March 2021 06:34:26 PST Jonathan Wakely wrote:
>> On 26/02/21 07:59 -0800, Thiago Macieira via Libstdc++ wrote:
>> >ints can be used as futex on Linux. ptrdiff_t on 64-bit Linux can't.
>>
>> Yes, we should do this for GCC 11.
>
>Want me to re-submit this one alone, with the "alignas(4)" with a commend
>indicating it's what the kernel requires?

Yes please.


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int
  2021-03-03 16:21       ` Jonathan Wakely
@ 2021-03-03 17:27         ` Thiago Macieira
  2021-03-03 17:34         ` Ville Voutilainen
  1 sibling, 0 replies; 40+ messages in thread
From: Thiago Macieira @ 2021-03-03 17:27 UTC (permalink / raw)
  To: Jonathan Wakely; +Cc: libstdc++, gcc-patches

On Wednesday, 3 March 2021 08:21:51 PST Jonathan Wakely wrote:
> >>-       = is_same_v<remove_cv_t<_Tp>, __platform_wait_t>;
> >>+       = is_scalar_v<remove_cv_t<_Tp>> && sizeof(_Tp) ==
> >>sizeof(__platform_wait_t)
> Oh, except that is_scalar is surprisingly expensive to instantiate
> (its defined in a really expensive way) and since we control all uses
> of this constant, I don't think we need it. It's only ever used from
> atomic waiting functions which are only defined for scalar types.

Thanks. Will update and re-submit.

-- 
Thiago Macieira - thiago.macieira (AT) intel.com
  Software Architect - Intel DPG Cloud Engineering




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int
  2021-03-03 16:21       ` Jonathan Wakely
  2021-03-03 17:27         ` Thiago Macieira
@ 2021-03-03 17:34         ` Ville Voutilainen
  2021-03-03 17:41           ` Jonathan Wakely
  1 sibling, 1 reply; 40+ messages in thread
From: Ville Voutilainen @ 2021-03-03 17:34 UTC (permalink / raw)
  To: Jonathan Wakely; +Cc: Thiago Macieira, libstdc++, gcc-patches List

On Wed, 3 Mar 2021 at 19:25, Jonathan Wakely via Libstdc++
<libstdc++@gcc.gnu.org> wrote:
> Oh, except that is_scalar is surprisingly expensive to instantiate
> (its defined in a really expensive way) and since we control all uses

I'll be more than happy to write you an __is_scalar for GCC 12. :P

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int
  2021-03-03 17:34         ` Ville Voutilainen
@ 2021-03-03 17:41           ` Jonathan Wakely
  0 siblings, 0 replies; 40+ messages in thread
From: Jonathan Wakely @ 2021-03-03 17:41 UTC (permalink / raw)
  To: Ville Voutilainen; +Cc: Thiago Macieira, libstdc++, gcc-patches List

On 03/03/21 19:34 +0200, Ville Voutilainen wrote:
>On Wed, 3 Mar 2021 at 19:25, Jonathan Wakely via Libstdc++
><libstdc++@gcc.gnu.org> wrote:
>> Oh, except that is_scalar is surprisingly expensive to instantiate
>> (its defined in a really expensive way) and since we control all uses
>
>I'll be more than happy to write you an __is_scalar for GCC 12. :P

That would help with https://gcc.gnu.org/PR96710 too. The current
implementation is inefficient and arguably wrong.



^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2021-03-03 17:41 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1968544.UC5HiB4uFJ@tjmaciei-mobl1>
2021-02-26 15:59 ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Thiago Macieira
2021-02-26 15:59   ` [PATCH 2/5] Atomic __platform_wait: accept any 32-bit type, not just int Thiago Macieira
2021-03-03 14:34     ` Jonathan Wakely
2021-03-03 16:21       ` Jonathan Wakely
2021-03-03 17:27         ` Thiago Macieira
2021-03-03 17:34         ` Ville Voutilainen
2021-03-03 17:41           ` Jonathan Wakely
2021-02-26 15:59   ` [PATCH 3/5] std::__atomic_wait: don't use __detail::__waiter with futex Thiago Macieira
2021-02-26 15:59   ` [PATCH 4/5] barrier: use int instead of unsigned char for the phase state Thiago Macieira
2021-02-28 15:05     ` Hans-Peter Nilsson
2021-03-01 16:28       ` Thomas Rodgers
2021-03-01 17:24       ` Thiago Macieira
2021-03-01 17:38         ` Thomas Rodgers
2021-03-01 17:40           ` Thomas Rodgers
2021-03-01 18:06           ` Thiago Macieira
2021-03-01 19:08             ` Thomas Rodgers
2021-03-01 18:12         ` Ville Voutilainen
2021-03-01 19:44           ` Thiago Macieira
2021-03-01 20:35             ` Ville Voutilainen
2021-03-01 21:54               ` Thiago Macieira
2021-03-01 22:04                 ` Ville Voutilainen
2021-03-01 22:21                   ` Thiago Macieira
2021-03-01 22:31                     ` Ville Voutilainen
2021-03-01 22:40                       ` Thiago Macieira
2021-02-26 15:59   ` [PATCH 5/5] barrier: optimise by not having the hasher in a loop Thiago Macieira
2021-03-03 14:36     ` Jonathan Wakely
2021-02-26 18:14   ` [PATCH 1/5] std::latch: reduce internal implementation from ptrdiff_t to int Andreas Schwab
2021-02-26 19:08     ` Thiago Macieira
2021-02-26 19:31       ` Andreas Schwab
2021-02-27  0:13         ` Thiago Macieira
2021-02-28 21:31           ` Hans-Peter Nilsson
2021-03-01  8:56             ` Richard Biener
2021-03-03 14:56               ` Jonathan Wakely
2021-03-03 15:02                 ` Andreas Schwab
2021-03-03 15:10                 ` Jonathan Wakely
2021-03-03 15:37                 ` Hans-Peter Nilsson
2021-03-01 16:32             ` Thomas Rodgers
2021-03-03 14:34   ` Jonathan Wakely
2021-03-03 17:14     ` Thiago Macieira
2021-03-03 17:18       ` Jonathan Wakely

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).