From: Noah Goldstein <goldstein.w.n@gmail.com>
To: "H.J. Lu" <hjl.tools@gmail.com>
Cc: GNU C Library <libc-alpha@sourceware.org>,
Florian Weimer <fweimer@redhat.com>,
Arjan van de Ven <arjan@linux.intel.com>,
Andreas Schwab <schwab@linux-m68k.org>
Subject: Re: [PATCH v4 1/3] Reduce CAS in low level locks [BZ #28537]
Date: Tue, 9 Nov 2021 19:56:10 -0600 [thread overview]
Message-ID: <CAFUsyfKJtdr7T-eRmSbzdVYaX6iWRoArDmC=g61DuLXVQJ=17g@mail.gmail.com> (raw)
In-Reply-To: <20211110001614.2087610-2-hjl.tools@gmail.com>
On Tue, Nov 9, 2021 at 6:21 PM H.J. Lu via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> CAS instruction is expensive. From the x86 CPU's point of view, getting
> a cache line for writing is more expensive than reading. See Appendix
> A.2 Spinlock in:
>
> https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/xeon-lock-scaling-analysis-paper.pdf
>
> The full compare and swap will grab the cache line exclusive and cause
> excessive cache line bouncing.
>
> 1. Change low level locks to do an atomic load and skip CAS if compare
> may fail to reduce cache line bouncing on contended locks.
> 2. In __lll_lock, replace atomic_compare_and_exchange_bool_acq with
> atomic_compare_and_exchange_val_acq and pass down the result to
> __lll_lock_wait and __lll_lock_wait_private to avoid the redundant load
> there.
> 3. Drop __glibc_unlikely in __lll_trylock and lll_cond_trylock since we
> don't know if it's actually rare; in the contended case it is clearly not
> rare.
Think especially since you have a manual check for old vs new, the atomic
CAS failure should still be wrapped in a `glibc_unlikely`.
> ---
> nptl/lowlevellock.c | 12 ++++++------
> sysdeps/nptl/lowlevellock.h | 29 ++++++++++++++++++++---------
> 2 files changed, 26 insertions(+), 15 deletions(-)
>
> diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c
> index 8c740529c4..d1965c01ca 100644
> --- a/nptl/lowlevellock.c
> +++ b/nptl/lowlevellock.c
> @@ -22,30 +22,30 @@
> #include <stap-probe.h>
>
> void
> -__lll_lock_wait_private (int *futex)
> +__lll_lock_wait_private (int *futex, int futex_value)
> {
> - if (atomic_load_relaxed (futex) == 2)
> + if (futex_value == 2)
> goto futex;
>
> while (atomic_exchange_acquire (futex, 2) != 0)
> {
> futex:
> - LIBC_PROBE (lll_lock_wait_private, 1, futex);
> + LIBC_PROBE (lll_lock_wait_private, 2, futex, futex_value);
> futex_wait ((unsigned int *) futex, 2, LLL_PRIVATE); /* Wait if *futex == 2. */
> }
> }
> libc_hidden_def (__lll_lock_wait_private)
>
> void
> -__lll_lock_wait (int *futex, int private)
> +__lll_lock_wait (int *futex, int futex_value, int private)
> {
> - if (atomic_load_relaxed (futex) == 2)
> + if (futex_value == 2)
> goto futex;
>
> while (atomic_exchange_acquire (futex, 2) != 0)
> {
> futex:
> - LIBC_PROBE (lll_lock_wait, 1, futex);
> + LIBC_PROBE (lll_lock_wait, 2, futex, futex_value);
> futex_wait ((unsigned int *) futex, 2, private); /* Wait if *futex == 2. */
> }
> }
> diff --git a/sysdeps/nptl/lowlevellock.h b/sysdeps/nptl/lowlevellock.h
> index 4d95114ed3..4235d13de9 100644
> --- a/sysdeps/nptl/lowlevellock.h
> +++ b/sysdeps/nptl/lowlevellock.h
> @@ -66,7 +66,11 @@
> 0. Otherwise leave lock unchanged and return non-zero to indicate that the
> lock was not acquired. */
> #define __lll_trylock(lock) \
> - __glibc_unlikely (atomic_compare_and_exchange_bool_acq ((lock), 1, 0))
> + (__extension__ ({ \
> + __typeof (*(lock)) __lock_value = atomic_load_relaxed (lock); \
> + (__lock_value != 0 \
> + || atomic_compare_and_exchange_bool_acq ((lock), 1, 0)); \
> + }))
> #define lll_trylock(lock) \
> __lll_trylock (&(lock))
>
> @@ -74,11 +78,15 @@
> return 0. Otherwise leave lock unchanged and return non-zero to indicate
> that the lock was not acquired. */
> #define lll_cond_trylock(lock) \
> - __glibc_unlikely (atomic_compare_and_exchange_bool_acq (&(lock), 2, 0))
> + (__extension__ ({ \
> + __typeof (lock) __lock_value = atomic_load_relaxed (&(lock)); \
> + (__lock_value != 0 \
> + || atomic_compare_and_exchange_bool_acq (&(lock), 2, 0)); \
> + }))
>
> -extern void __lll_lock_wait_private (int *futex);
> +extern void __lll_lock_wait_private (int *futex, int futex_value);
> libc_hidden_proto (__lll_lock_wait_private)
> -extern void __lll_lock_wait (int *futex, int private);
> +extern void __lll_lock_wait (int *futex, int futex_value, int private);
> libc_hidden_proto (__lll_lock_wait)
>
> /* This is an expression rather than a statement even though its value is
> @@ -95,13 +103,15 @@ libc_hidden_proto (__lll_lock_wait)
> ((void) \
> ({ \
> int *__futex = (futex); \
> - if (__glibc_unlikely \
> - (atomic_compare_and_exchange_bool_acq (__futex, 1, 0))) \
> + int __futex_value = atomic_load_relaxed (futex); \
> + if (__futex_value != 0 \
> + || ((__futex_value = atomic_compare_and_exchange_val_acq \
> + (__futex, 1, 0) != 0))) \
> { \
> if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \
> - __lll_lock_wait_private (__futex); \
> + __lll_lock_wait_private (futex, __futex_value); \
> else \
> - __lll_lock_wait (__futex, private); \
> + __lll_lock_wait (futex, __futex_value, private); \
> } \
> }))
> #define lll_lock(futex, private) \
> @@ -120,7 +130,8 @@ libc_hidden_proto (__lll_lock_wait)
> ({ \
> int *__futex = (futex); \
> if (__glibc_unlikely (atomic_exchange_acq (__futex, 2) != 0)) \
> - __lll_lock_wait (__futex, private); \
> + __lll_lock_wait (__futex, atomic_load_relaxed (__futex), \
> + private); \
> }))
> #define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
>
> --
> 2.33.1
>
next prev parent reply other threads:[~2021-11-10 1:56 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-11-10 0:16 [PATCH v4 0/3] Optimize CAS " H.J. Lu
2021-11-10 0:16 ` [PATCH v4 1/3] Reduce CAS in low level locks " H.J. Lu
2021-11-10 1:56 ` Noah Goldstein [this message]
2021-11-10 0:16 ` [PATCH v4 2/3] Reduce CAS in __pthread_mutex_lock_full " H.J. Lu
2021-11-10 0:16 ` [PATCH v4 3/3] Optimize " H.J. Lu
2021-11-10 14:26 ` [PATCH v4 0/3] Optimize CAS " Paul E Murphy
2021-11-10 20:07 ` Paul A. Clarke
2021-11-10 21:33 ` H.J. Lu
2021-11-11 0:30 ` Paul A. Clarke
2021-11-10 23:34 ` H.J. Lu
2021-11-10 15:35 ` Paul A. Clarke
2021-11-10 15:42 ` H.J. Lu
2021-11-10 15:50 ` Paul A. Clarke
2021-11-10 15:52 ` H.J. Lu
2021-11-10 15:52 ` Florian Weimer
2021-11-10 16:03 ` H.J. Lu
2021-11-10 16:04 ` Paul A. Clarke
2021-11-10 16:14 ` Andreas Schwab
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAFUsyfKJtdr7T-eRmSbzdVYaX6iWRoArDmC=g61DuLXVQJ=17g@mail.gmail.com' \
--to=goldstein.w.n@gmail.com \
--cc=arjan@linux.intel.com \
--cc=fweimer@redhat.com \
--cc=hjl.tools@gmail.com \
--cc=libc-alpha@sourceware.org \
--cc=schwab@linux-m68k.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).