From: "H.J. Lu" <hjl.tools@gmail.com>
To: libc-alpha@sourceware.org
Cc: Florian Weimer <fweimer@redhat.com>,
Oleh Derevenko <oleh.derevenko@gmail.com>,
Arjan van de Ven <arjan@linux.intel.com>,
Andreas Schwab <schwab@linux-m68k.org>,
"Paul A . Clarke" <pc@us.ibm.com>,
Noah Goldstein <goldstein.w.n@gmail.com>
Subject: [PATCH v5 1/3] Reduce CAS in low level locks [BZ #28537]
Date: Wed, 10 Nov 2021 10:41:51 -0800 [thread overview]
Message-ID: <20211110184153.2269857-2-hjl.tools@gmail.com> (raw)
In-Reply-To: <20211110184153.2269857-1-hjl.tools@gmail.com>
CAS instruction is expensive. From the x86 CPU's point of view, getting
a cache line for writing is more expensive than reading. See Appendix
A.2 Spinlock in:
https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/xeon-lock-scaling-analysis-paper.pdf
The full compare and swap will grab the cache line exclusive and cause
excessive cache line bouncing.
1. Change low level locks to do an atomic load and skip CAS if compare
may fail to reduce cache line bouncing on contended locks.
2. In __lll_lock, replace atomic_compare_and_exchange_bool_acq with
atomic_compare_and_exchange_val_acq and pass down the result to
__lll_lock_wait and __lll_lock_wait_private to avoid the redundant load
there.
---
nptl/lowlevellock.c | 12 ++++++------
sysdeps/nptl/lowlevellock.h | 33 +++++++++++++++++++++++----------
2 files changed, 29 insertions(+), 16 deletions(-)
diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c
index 8c740529c4..d1965c01ca 100644
--- a/nptl/lowlevellock.c
+++ b/nptl/lowlevellock.c
@@ -22,30 +22,30 @@
#include <stap-probe.h>
void
-__lll_lock_wait_private (int *futex)
+__lll_lock_wait_private (int *futex, int futex_value)
{
- if (atomic_load_relaxed (futex) == 2)
+ if (futex_value == 2)
goto futex;
while (atomic_exchange_acquire (futex, 2) != 0)
{
futex:
- LIBC_PROBE (lll_lock_wait_private, 1, futex);
+ LIBC_PROBE (lll_lock_wait_private, 2, futex, futex_value);
futex_wait ((unsigned int *) futex, 2, LLL_PRIVATE); /* Wait if *futex == 2. */
}
}
libc_hidden_def (__lll_lock_wait_private)
void
-__lll_lock_wait (int *futex, int private)
+__lll_lock_wait (int *futex, int futex_value, int private)
{
- if (atomic_load_relaxed (futex) == 2)
+ if (futex_value == 2)
goto futex;
while (atomic_exchange_acquire (futex, 2) != 0)
{
futex:
- LIBC_PROBE (lll_lock_wait, 1, futex);
+ LIBC_PROBE (lll_lock_wait, 2, futex, futex_value);
futex_wait ((unsigned int *) futex, 2, private); /* Wait if *futex == 2. */
}
}
diff --git a/sysdeps/nptl/lowlevellock.h b/sysdeps/nptl/lowlevellock.h
index 4d95114ed3..05260eb706 100644
--- a/sysdeps/nptl/lowlevellock.h
+++ b/sysdeps/nptl/lowlevellock.h
@@ -66,7 +66,12 @@
0. Otherwise leave lock unchanged and return non-zero to indicate that the
lock was not acquired. */
#define __lll_trylock(lock) \
- __glibc_unlikely (atomic_compare_and_exchange_bool_acq ((lock), 1, 0))
+ (__extension__ ({ \
+ __typeof (*(lock)) __lock_value = atomic_load_relaxed (lock); \
+ (__lock_value != 0 \
+ || __glibc_unlikely (atomic_compare_and_exchange_bool_acq ((lock), \
+ 1, 0)));\
+ }))
#define lll_trylock(lock) \
__lll_trylock (&(lock))
@@ -74,11 +79,16 @@
return 0. Otherwise leave lock unchanged and return non-zero to indicate
that the lock was not acquired. */
#define lll_cond_trylock(lock) \
- __glibc_unlikely (atomic_compare_and_exchange_bool_acq (&(lock), 2, 0))
-
-extern void __lll_lock_wait_private (int *futex);
+ (__extension__ ({ \
+ __typeof (lock) __lock_value = atomic_load_relaxed (&(lock)); \
+ (__lock_value != 0 \
+ || __glibc_unlikely (atomic_compare_and_exchange_bool_acq (&(lock),\
+ 2, 0)));\
+ }))
+
+extern void __lll_lock_wait_private (int *futex, int futex_value);
libc_hidden_proto (__lll_lock_wait_private)
-extern void __lll_lock_wait (int *futex, int private);
+extern void __lll_lock_wait (int *futex, int futex_value, int private);
libc_hidden_proto (__lll_lock_wait)
/* This is an expression rather than a statement even though its value is
@@ -95,13 +105,15 @@ libc_hidden_proto (__lll_lock_wait)
((void) \
({ \
int *__futex = (futex); \
- if (__glibc_unlikely \
- (atomic_compare_and_exchange_bool_acq (__futex, 1, 0))) \
+ int __futex_value = atomic_load_relaxed (futex); \
+ if (__futex_value != 0 \
+ || ((__futex_value = atomic_compare_and_exchange_val_acq \
+ (__futex, 1, 0) != 0))) \
{ \
if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \
- __lll_lock_wait_private (__futex); \
+ __lll_lock_wait_private (futex, __futex_value); \
else \
- __lll_lock_wait (__futex, private); \
+ __lll_lock_wait (futex, __futex_value, private); \
} \
}))
#define lll_lock(futex, private) \
@@ -120,7 +132,8 @@ libc_hidden_proto (__lll_lock_wait)
({ \
int *__futex = (futex); \
if (__glibc_unlikely (atomic_exchange_acq (__futex, 2) != 0)) \
- __lll_lock_wait (__futex, private); \
+ __lll_lock_wait (__futex, atomic_load_relaxed (__futex), \
+ private); \
}))
#define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
--
2.33.1
next prev parent reply other threads:[~2021-11-10 18:41 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-11-10 18:41 [PATCH v5 0/3] Optimize CAS " H.J. Lu
2021-11-10 18:41 ` H.J. Lu [this message]
2021-11-10 18:41 ` [PATCH v5 2/3] Reduce CAS in __pthread_mutex_lock_full " H.J. Lu
2021-11-10 18:41 ` [PATCH v5 3/3] Optimize " H.J. Lu
2021-11-10 20:23 ` [PATCH v5 0/3] Optimize CAS " Paul A. Clarke
2021-11-10 20:53 ` H.J. Lu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211110184153.2269857-2-hjl.tools@gmail.com \
--to=hjl.tools@gmail.com \
--cc=arjan@linux.intel.com \
--cc=fweimer@redhat.com \
--cc=goldstein.w.n@gmail.com \
--cc=libc-alpha@sourceware.org \
--cc=oleh.derevenko@gmail.com \
--cc=pc@us.ibm.com \
--cc=schwab@linux-m68k.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).