public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Noah Goldstein <goldstein.w.n@gmail.com>
To: "H.J. Lu" <hjl.tools@gmail.com>
Cc: GNU C Library <libc-alpha@sourceware.org>,
	Florian Weimer <fweimer@redhat.com>,
	 Arjan van de Ven <arjan@linux.intel.com>,
	Andreas Schwab <schwab@linux-m68k.org>
Subject: Re: [PATCH v4 1/3] Reduce CAS in low level locks [BZ #28537]
Date: Tue, 9 Nov 2021 19:56:10 -0600	[thread overview]
Message-ID: <CAFUsyfKJtdr7T-eRmSbzdVYaX6iWRoArDmC=g61DuLXVQJ=17g@mail.gmail.com> (raw)
In-Reply-To: <20211110001614.2087610-2-hjl.tools@gmail.com>

On Tue, Nov 9, 2021 at 6:21 PM H.J. Lu via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> CAS instruction is expensive.  From the x86 CPU's point of view, getting
> a cache line for writing is more expensive than reading.  See Appendix
> A.2 Spinlock in:
>
> https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/xeon-lock-scaling-analysis-paper.pdf
>
> The full compare and swap will grab the cache line exclusive and cause
> excessive cache line bouncing.
>
> 1. Change low level locks to do an atomic load and skip CAS if compare
> may fail to reduce cache line bouncing on contended locks.
> 2. In __lll_lock, replace atomic_compare_and_exchange_bool_acq with
> atomic_compare_and_exchange_val_acq and pass down the result to
> __lll_lock_wait and __lll_lock_wait_private to avoid the redundant load
> there.
> 3. Drop __glibc_unlikely in __lll_trylock and lll_cond_trylock since we
> don't know if it's actually rare; in the contended case it is clearly not
> rare.

Think especially since you have a manual check for old vs new, the atomic
CAS failure should still be wrapped in a `glibc_unlikely`.

> ---
>  nptl/lowlevellock.c         | 12 ++++++------
>  sysdeps/nptl/lowlevellock.h | 29 ++++++++++++++++++++---------
>  2 files changed, 26 insertions(+), 15 deletions(-)
>
> diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c
> index 8c740529c4..d1965c01ca 100644
> --- a/nptl/lowlevellock.c
> +++ b/nptl/lowlevellock.c
> @@ -22,30 +22,30 @@
>  #include <stap-probe.h>
>
>  void
> -__lll_lock_wait_private (int *futex)
> +__lll_lock_wait_private (int *futex, int futex_value)
>  {
> -  if (atomic_load_relaxed (futex) == 2)
> +  if (futex_value == 2)
>      goto futex;
>
>    while (atomic_exchange_acquire (futex, 2) != 0)
>      {
>      futex:
> -      LIBC_PROBE (lll_lock_wait_private, 1, futex);
> +      LIBC_PROBE (lll_lock_wait_private, 2, futex, futex_value);
>        futex_wait ((unsigned int *) futex, 2, LLL_PRIVATE); /* Wait if *futex == 2.  */
>      }
>  }
>  libc_hidden_def (__lll_lock_wait_private)
>
>  void
> -__lll_lock_wait (int *futex, int private)
> +__lll_lock_wait (int *futex, int futex_value, int private)
>  {
> -  if (atomic_load_relaxed (futex) == 2)
> +  if (futex_value == 2)
>      goto futex;
>
>    while (atomic_exchange_acquire (futex, 2) != 0)
>      {
>      futex:
> -      LIBC_PROBE (lll_lock_wait, 1, futex);
> +      LIBC_PROBE (lll_lock_wait, 2, futex, futex_value);
>        futex_wait ((unsigned int *) futex, 2, private); /* Wait if *futex == 2.  */
>      }
>  }
> diff --git a/sysdeps/nptl/lowlevellock.h b/sysdeps/nptl/lowlevellock.h
> index 4d95114ed3..4235d13de9 100644
> --- a/sysdeps/nptl/lowlevellock.h
> +++ b/sysdeps/nptl/lowlevellock.h
> @@ -66,7 +66,11 @@
>     0.  Otherwise leave lock unchanged and return non-zero to indicate that the
>     lock was not acquired.  */
>  #define __lll_trylock(lock)    \
> -  __glibc_unlikely (atomic_compare_and_exchange_bool_acq ((lock), 1, 0))
> +  (__extension__ ({                                                    \
> +    __typeof (*(lock)) __lock_value = atomic_load_relaxed (lock);      \
> +    (__lock_value != 0                                                 \
> +     || atomic_compare_and_exchange_bool_acq ((lock), 1, 0));          \
> +  }))
>  #define lll_trylock(lock)      \
>     __lll_trylock (&(lock))
>
> @@ -74,11 +78,15 @@
>     return 0.  Otherwise leave lock unchanged and return non-zero to indicate
>     that the lock was not acquired.  */
>  #define lll_cond_trylock(lock) \
> -  __glibc_unlikely (atomic_compare_and_exchange_bool_acq (&(lock), 2, 0))
> +  (__extension__ ({                                                    \
> +    __typeof (lock) __lock_value = atomic_load_relaxed (&(lock));      \
> +    (__lock_value != 0                                                 \
> +     || atomic_compare_and_exchange_bool_acq (&(lock), 2, 0));         \
> +  }))
>
> -extern void __lll_lock_wait_private (int *futex);
> +extern void __lll_lock_wait_private (int *futex, int futex_value);
>  libc_hidden_proto (__lll_lock_wait_private)
> -extern void __lll_lock_wait (int *futex, int private);
> +extern void __lll_lock_wait (int *futex, int futex_value, int private);
>  libc_hidden_proto (__lll_lock_wait)
>
>  /* This is an expression rather than a statement even though its value is
> @@ -95,13 +103,15 @@ libc_hidden_proto (__lll_lock_wait)
>    ((void)                                                               \
>     ({                                                                   \
>       int *__futex = (futex);                                            \
> -     if (__glibc_unlikely                                               \
> -         (atomic_compare_and_exchange_bool_acq (__futex, 1, 0)))        \
> +     int __futex_value = atomic_load_relaxed (futex);                   \
> +     if (__futex_value != 0                                             \
> +         || ((__futex_value = atomic_compare_and_exchange_val_acq       \
> +              (__futex, 1, 0) != 0)))                                   \
>         {                                                                \
>           if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \
> -           __lll_lock_wait_private (__futex);                           \
> +           __lll_lock_wait_private (futex, __futex_value);              \
>           else                                                           \
> -           __lll_lock_wait (__futex, private);                          \
> +           __lll_lock_wait (futex, __futex_value, private);             \
>         }                                                                \
>     }))
>  #define lll_lock(futex, private)       \
> @@ -120,7 +130,8 @@ libc_hidden_proto (__lll_lock_wait)
>     ({                                                                   \
>       int *__futex = (futex);                                            \
>       if (__glibc_unlikely (atomic_exchange_acq (__futex, 2) != 0))      \
> -       __lll_lock_wait (__futex, private);                              \
> +       __lll_lock_wait (__futex, atomic_load_relaxed (__futex),         \
> +                       private); \
>     }))
>  #define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
>
> --
> 2.33.1
>

  reply	other threads:[~2021-11-10  1:56 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-10  0:16 [PATCH v4 0/3] Optimize CAS " H.J. Lu
2021-11-10  0:16 ` [PATCH v4 1/3] Reduce CAS in low level locks " H.J. Lu
2021-11-10  1:56   ` Noah Goldstein [this message]
2021-11-10  0:16 ` [PATCH v4 2/3] Reduce CAS in __pthread_mutex_lock_full " H.J. Lu
2021-11-10  0:16 ` [PATCH v4 3/3] Optimize " H.J. Lu
2021-11-10 14:26 ` [PATCH v4 0/3] Optimize CAS " Paul E Murphy
2021-11-10 20:07   ` Paul A. Clarke
2021-11-10 21:33     ` H.J. Lu
2021-11-11  0:30       ` Paul A. Clarke
2021-11-10 23:34     ` H.J. Lu
2021-11-10 15:35 ` Paul A. Clarke
2021-11-10 15:42   ` H.J. Lu
2021-11-10 15:50     ` Paul A. Clarke
2021-11-10 15:52       ` H.J. Lu
2021-11-10 15:52   ` Florian Weimer
2021-11-10 16:03     ` H.J. Lu
2021-11-10 16:04     ` Paul A. Clarke
2021-11-10 16:14     ` Andreas Schwab

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAFUsyfKJtdr7T-eRmSbzdVYaX6iWRoArDmC=g61DuLXVQJ=17g@mail.gmail.com' \
    --to=goldstein.w.n@gmail.com \
    --cc=arjan@linux.intel.com \
    --cc=fweimer@redhat.com \
    --cc=hjl.tools@gmail.com \
    --cc=libc-alpha@sourceware.org \
    --cc=schwab@linux-m68k.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).