public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v1] nptl: Add single-threaded optimization to ADAPTIVE_NP mutex
@ 2023-04-19  1:17 Noah Goldstein
  2023-04-19 15:41 ` H.J. Lu
  0 siblings, 1 reply; 3+ messages in thread
From: Noah Goldstein @ 2023-04-19  1:17 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

ADAPTIVED_NP mutex is generally the most performant mutex option and
is commonly used so it seems preferable for it to also benefit from
the optimization.

make check passes on linux-x86_64
---
 nptl/pthread_mutex_cond_lock.c |  1 +
 nptl/pthread_mutex_lock.c      | 40 ++++++++++++++++++++++++++--------
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/nptl/pthread_mutex_cond_lock.c b/nptl/pthread_mutex_cond_lock.c
index f3af514305..fcc90bb16c 100644
--- a/nptl/pthread_mutex_cond_lock.c
+++ b/nptl/pthread_mutex_cond_lock.c
@@ -3,6 +3,7 @@
 #define LLL_MUTEX_LOCK(mutex) \
   lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
 #define LLL_MUTEX_LOCK_OPTIMIZED(mutex) LLL_MUTEX_LOCK (mutex)
+#define LLL_MUTEX_TRYLOCK_OPTIMIZED(mutex) LLL_MUTEX_TRYLOCK (mutex)
 
 /* Not actually elided so far. Needed? */
 #define LLL_MUTEX_LOCK_ELISION(mutex)  \
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
index d4f96c70ef..011bd7488d 100644
--- a/nptl/pthread_mutex_lock.c
+++ b/nptl/pthread_mutex_lock.c
@@ -30,9 +30,9 @@
 /* Some of the following definitions differ when pthread_mutex_cond_lock.c
    includes this file.  */
 #ifndef LLL_MUTEX_LOCK
-/* lll_lock with single-thread optimization.  */
-static inline void
-lll_mutex_lock_optimized (pthread_mutex_t *mutex)
+
+static inline int
+lll_mutex_try_singlethreaded_opt (pthread_mutex_t *mutex, int private)
 {
   /* The single-threaded optimization is only valid for private
      mutexes.  For process-shared mutexes, the mutex could be in a
@@ -41,16 +41,38 @@ lll_mutex_lock_optimized (pthread_mutex_t *mutex)
      acquired, POSIX requires that pthread_mutex_lock deadlocks for
      normal mutexes, so skip the optimization in that case as
      well.  */
-  int private = PTHREAD_MUTEX_PSHARED (mutex);
   if (private == LLL_PRIVATE && SINGLE_THREAD_P && mutex->__data.__lock == 0)
-    mutex->__data.__lock = 1;
-  else
+    {
+      mutex->__data.__lock = 1;
+      return 0;
+    }
+  return 1;
+}
+
+/* lll_trylock with single-thread optimization.  */
+static inline int
+lll_mutex_trylock_optimized (pthread_mutex_t *mutex)
+{
+  if (lll_mutex_try_singlethreaded_opt (mutex, PTHREAD_MUTEX_PSHARED (mutex))
+      == 0)
+    return 0;
+  return lll_trylock (mutex->__data.__lock);
+}
+
+/* lll_lock with single-thread optimization.  */
+static inline void
+lll_mutex_lock_optimized (pthread_mutex_t *mutex)
+{
+  int private = PTHREAD_MUTEX_PSHARED (mutex);
+  if (lll_mutex_try_singlethreaded_opt (mutex, private))
     lll_lock (mutex->__data.__lock, private);
 }
 
 # define LLL_MUTEX_LOCK(mutex)						\
   lll_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
 # define LLL_MUTEX_LOCK_OPTIMIZED(mutex) lll_mutex_lock_optimized (mutex)
+# define LLL_MUTEX_TRYLOCK_OPTIMIZED(mutex) \
+  lll_mutex_trylock_optimized (mutex)
 # define LLL_MUTEX_TRYLOCK(mutex) \
   lll_trylock ((mutex)->__data.__lock)
 # define LLL_ROBUST_MUTEX_LOCK_MODIFIER 0
@@ -133,11 +155,11 @@ PTHREAD_MUTEX_LOCK (pthread_mutex_t *mutex)
   else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
 			  == PTHREAD_MUTEX_ADAPTIVE_NP, 1))
     {
-      if (LLL_MUTEX_TRYLOCK (mutex) != 0)
+      if (LLL_MUTEX_TRYLOCK_OPTIMIZED (mutex) != 0)
 	{
 	  int cnt = 0;
-	  int max_cnt = MIN (max_adaptive_count (),
-			     mutex->__data.__spins * 2 + 10);
+	  int max_cnt
+	      = MIN (max_adaptive_count (), mutex->__data.__spins * 2 + 10);
 	  int spin_count, exp_backoff = 1;
 	  unsigned int jitter = get_jitter ();
 	  do
-- 
2.34.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v1] nptl: Add single-threaded optimization to ADAPTIVE_NP mutex
  2023-04-19  1:17 [PATCH v1] nptl: Add single-threaded optimization to ADAPTIVE_NP mutex Noah Goldstein
@ 2023-04-19 15:41 ` H.J. Lu
  2023-04-19 16:57   ` Noah Goldstein
  0 siblings, 1 reply; 3+ messages in thread
From: H.J. Lu @ 2023-04-19 15:41 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: libc-alpha, carlos

On Tue, Apr 18, 2023 at 6:17 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> ADAPTIVED_NP mutex is generally the most performant mutex option and
> is commonly used so it seems preferable for it to also benefit from
> the optimization.

Adaptive mutex works better than normal mutex when futex syscall is the main
bottleneck.   For single threaded applications or machines with fewer cores,
adaptive mutex may not improve performance.   If it is the case, shouldn't we
focus on improving performance when futex syscall is the bottleneck?

> make check passes on linux-x86_64
> ---
>  nptl/pthread_mutex_cond_lock.c |  1 +
>  nptl/pthread_mutex_lock.c      | 40 ++++++++++++++++++++++++++--------
>  2 files changed, 32 insertions(+), 9 deletions(-)
>
> diff --git a/nptl/pthread_mutex_cond_lock.c b/nptl/pthread_mutex_cond_lock.c
> index f3af514305..fcc90bb16c 100644
> --- a/nptl/pthread_mutex_cond_lock.c
> +++ b/nptl/pthread_mutex_cond_lock.c
> @@ -3,6 +3,7 @@
>  #define LLL_MUTEX_LOCK(mutex) \
>    lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
>  #define LLL_MUTEX_LOCK_OPTIMIZED(mutex) LLL_MUTEX_LOCK (mutex)
> +#define LLL_MUTEX_TRYLOCK_OPTIMIZED(mutex) LLL_MUTEX_TRYLOCK (mutex)
>
>  /* Not actually elided so far. Needed? */
>  #define LLL_MUTEX_LOCK_ELISION(mutex)  \
> diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
> index d4f96c70ef..011bd7488d 100644
> --- a/nptl/pthread_mutex_lock.c
> +++ b/nptl/pthread_mutex_lock.c
> @@ -30,9 +30,9 @@
>  /* Some of the following definitions differ when pthread_mutex_cond_lock.c
>     includes this file.  */
>  #ifndef LLL_MUTEX_LOCK
> -/* lll_lock with single-thread optimization.  */
> -static inline void
> -lll_mutex_lock_optimized (pthread_mutex_t *mutex)
> +
> +static inline int
> +lll_mutex_try_singlethreaded_opt (pthread_mutex_t *mutex, int private)
>  {
>    /* The single-threaded optimization is only valid for private
>       mutexes.  For process-shared mutexes, the mutex could be in a
> @@ -41,16 +41,38 @@ lll_mutex_lock_optimized (pthread_mutex_t *mutex)
>       acquired, POSIX requires that pthread_mutex_lock deadlocks for
>       normal mutexes, so skip the optimization in that case as
>       well.  */
> -  int private = PTHREAD_MUTEX_PSHARED (mutex);
>    if (private == LLL_PRIVATE && SINGLE_THREAD_P && mutex->__data.__lock == 0)
> -    mutex->__data.__lock = 1;
> -  else
> +    {
> +      mutex->__data.__lock = 1;
> +      return 0;
> +    }
> +  return 1;
> +}
> +
> +/* lll_trylock with single-thread optimization.  */
> +static inline int
> +lll_mutex_trylock_optimized (pthread_mutex_t *mutex)
> +{
> +  if (lll_mutex_try_singlethreaded_opt (mutex, PTHREAD_MUTEX_PSHARED (mutex))
> +      == 0)
> +    return 0;
> +  return lll_trylock (mutex->__data.__lock);
> +}
> +
> +/* lll_lock with single-thread optimization.  */
> +static inline void
> +lll_mutex_lock_optimized (pthread_mutex_t *mutex)
> +{
> +  int private = PTHREAD_MUTEX_PSHARED (mutex);
> +  if (lll_mutex_try_singlethreaded_opt (mutex, private))
>      lll_lock (mutex->__data.__lock, private);
>  }
>
>  # define LLL_MUTEX_LOCK(mutex)                                         \
>    lll_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
>  # define LLL_MUTEX_LOCK_OPTIMIZED(mutex) lll_mutex_lock_optimized (mutex)
> +# define LLL_MUTEX_TRYLOCK_OPTIMIZED(mutex) \
> +  lll_mutex_trylock_optimized (mutex)
>  # define LLL_MUTEX_TRYLOCK(mutex) \
>    lll_trylock ((mutex)->__data.__lock)
>  # define LLL_ROBUST_MUTEX_LOCK_MODIFIER 0
> @@ -133,11 +155,11 @@ PTHREAD_MUTEX_LOCK (pthread_mutex_t *mutex)
>    else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
>                           == PTHREAD_MUTEX_ADAPTIVE_NP, 1))
>      {
> -      if (LLL_MUTEX_TRYLOCK (mutex) != 0)
> +      if (LLL_MUTEX_TRYLOCK_OPTIMIZED (mutex) != 0)
>         {
>           int cnt = 0;
> -         int max_cnt = MIN (max_adaptive_count (),
> -                            mutex->__data.__spins * 2 + 10);
> +         int max_cnt
> +             = MIN (max_adaptive_count (), mutex->__data.__spins * 2 + 10);
>           int spin_count, exp_backoff = 1;
>           unsigned int jitter = get_jitter ();
>           do
> --
> 2.34.1
>


-- 
H.J.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v1] nptl: Add single-threaded optimization to ADAPTIVE_NP mutex
  2023-04-19 15:41 ` H.J. Lu
@ 2023-04-19 16:57   ` Noah Goldstein
  0 siblings, 0 replies; 3+ messages in thread
From: Noah Goldstein @ 2023-04-19 16:57 UTC (permalink / raw)
  To: H.J. Lu; +Cc: libc-alpha, carlos

On Wed, Apr 19, 2023 at 10:42 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Apr 18, 2023 at 6:17 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > ADAPTIVED_NP mutex is generally the most performant mutex option and
> > is commonly used so it seems preferable for it to also benefit from
> > the optimization.
>
> Adaptive mutex works better than normal mutex when futex syscall is the main
> bottleneck.   For single threaded applications or machines with fewer cores,
> adaptive mutex may not improve performance.   If it is the case, shouldn't we
> focus on improving performance when futex syscall is the bottleneck?

I think adaptive mutex is more general than just avoiding mutexes. It seems
to be the most general purpose lock (adaptive between spin/futex based on
its usage) and is being pitched as the true default for things like std::mutex.
The single-threaded optimization seems to fit with that mentality.

>
> > make check passes on linux-x86_64
> > ---
> >  nptl/pthread_mutex_cond_lock.c |  1 +
> >  nptl/pthread_mutex_lock.c      | 40 ++++++++++++++++++++++++++--------
> >  2 files changed, 32 insertions(+), 9 deletions(-)
> >
> > diff --git a/nptl/pthread_mutex_cond_lock.c b/nptl/pthread_mutex_cond_lock.c
> > index f3af514305..fcc90bb16c 100644
> > --- a/nptl/pthread_mutex_cond_lock.c
> > +++ b/nptl/pthread_mutex_cond_lock.c
> > @@ -3,6 +3,7 @@
> >  #define LLL_MUTEX_LOCK(mutex) \
> >    lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
> >  #define LLL_MUTEX_LOCK_OPTIMIZED(mutex) LLL_MUTEX_LOCK (mutex)
> > +#define LLL_MUTEX_TRYLOCK_OPTIMIZED(mutex) LLL_MUTEX_TRYLOCK (mutex)
> >
> >  /* Not actually elided so far. Needed? */
> >  #define LLL_MUTEX_LOCK_ELISION(mutex)  \
> > diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
> > index d4f96c70ef..011bd7488d 100644
> > --- a/nptl/pthread_mutex_lock.c
> > +++ b/nptl/pthread_mutex_lock.c
> > @@ -30,9 +30,9 @@
> >  /* Some of the following definitions differ when pthread_mutex_cond_lock.c
> >     includes this file.  */
> >  #ifndef LLL_MUTEX_LOCK
> > -/* lll_lock with single-thread optimization.  */
> > -static inline void
> > -lll_mutex_lock_optimized (pthread_mutex_t *mutex)
> > +
> > +static inline int
> > +lll_mutex_try_singlethreaded_opt (pthread_mutex_t *mutex, int private)
> >  {
> >    /* The single-threaded optimization is only valid for private
> >       mutexes.  For process-shared mutexes, the mutex could be in a
> > @@ -41,16 +41,38 @@ lll_mutex_lock_optimized (pthread_mutex_t *mutex)
> >       acquired, POSIX requires that pthread_mutex_lock deadlocks for
> >       normal mutexes, so skip the optimization in that case as
> >       well.  */
> > -  int private = PTHREAD_MUTEX_PSHARED (mutex);
> >    if (private == LLL_PRIVATE && SINGLE_THREAD_P && mutex->__data.__lock == 0)
> > -    mutex->__data.__lock = 1;
> > -  else
> > +    {
> > +      mutex->__data.__lock = 1;
> > +      return 0;
> > +    }
> > +  return 1;
> > +}
> > +
> > +/* lll_trylock with single-thread optimization.  */
> > +static inline int
> > +lll_mutex_trylock_optimized (pthread_mutex_t *mutex)
> > +{
> > +  if (lll_mutex_try_singlethreaded_opt (mutex, PTHREAD_MUTEX_PSHARED (mutex))
> > +      == 0)
> > +    return 0;
> > +  return lll_trylock (mutex->__data.__lock);
> > +}
> > +
> > +/* lll_lock with single-thread optimization.  */
> > +static inline void
> > +lll_mutex_lock_optimized (pthread_mutex_t *mutex)
> > +{
> > +  int private = PTHREAD_MUTEX_PSHARED (mutex);
> > +  if (lll_mutex_try_singlethreaded_opt (mutex, private))
> >      lll_lock (mutex->__data.__lock, private);
> >  }
> >
> >  # define LLL_MUTEX_LOCK(mutex)                                         \
> >    lll_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
> >  # define LLL_MUTEX_LOCK_OPTIMIZED(mutex) lll_mutex_lock_optimized (mutex)
> > +# define LLL_MUTEX_TRYLOCK_OPTIMIZED(mutex) \
> > +  lll_mutex_trylock_optimized (mutex)
> >  # define LLL_MUTEX_TRYLOCK(mutex) \
> >    lll_trylock ((mutex)->__data.__lock)
> >  # define LLL_ROBUST_MUTEX_LOCK_MODIFIER 0
> > @@ -133,11 +155,11 @@ PTHREAD_MUTEX_LOCK (pthread_mutex_t *mutex)
> >    else if (__builtin_expect (PTHREAD_MUTEX_TYPE (mutex)
> >                           == PTHREAD_MUTEX_ADAPTIVE_NP, 1))
> >      {
> > -      if (LLL_MUTEX_TRYLOCK (mutex) != 0)
> > +      if (LLL_MUTEX_TRYLOCK_OPTIMIZED (mutex) != 0)
> >         {
> >           int cnt = 0;
> > -         int max_cnt = MIN (max_adaptive_count (),
> > -                            mutex->__data.__spins * 2 + 10);
> > +         int max_cnt
> > +             = MIN (max_adaptive_count (), mutex->__data.__spins * 2 + 10);
> >           int spin_count, exp_backoff = 1;
> >           unsigned int jitter = get_jitter ();
> >           do
> > --
> > 2.34.1
> >
>
>
> --
> H.J.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-04-19 16:57 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-19  1:17 [PATCH v1] nptl: Add single-threaded optimization to ADAPTIVE_NP mutex Noah Goldstein
2023-04-19 15:41 ` H.J. Lu
2023-04-19 16:57   ` Noah Goldstein

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).