public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: libc-alpha@sourceware.org
Cc: Florian Weimer <fweimer@redhat.com>,
	Oleh Derevenko <oleh.derevenko@gmail.com>,
	Arjan van de Ven <arjan@linux.intel.com>,
	Andreas Schwab <schwab@linux-m68k.org>
Subject: [PATCH v4 1/3] Reduce CAS in low level locks [BZ #28537]
Date: Tue,  9 Nov 2021 16:16:12 -0800	[thread overview]
Message-ID: <20211110001614.2087610-2-hjl.tools@gmail.com> (raw)
In-Reply-To: <20211110001614.2087610-1-hjl.tools@gmail.com>

CAS instruction is expensive.  From the x86 CPU's point of view, getting
a cache line for writing is more expensive than reading.  See Appendix
A.2 Spinlock in:

https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/xeon-lock-scaling-analysis-paper.pdf

The full compare and swap will grab the cache line exclusive and cause
excessive cache line bouncing.

1. Change low level locks to do an atomic load and skip CAS if compare
may fail to reduce cache line bouncing on contended locks.
2. In __lll_lock, replace atomic_compare_and_exchange_bool_acq with
atomic_compare_and_exchange_val_acq and pass down the result to
__lll_lock_wait and __lll_lock_wait_private to avoid the redundant load
there.
3. Drop __glibc_unlikely in __lll_trylock and lll_cond_trylock since we
don't know if it's actually rare; in the contended case it is clearly not
rare.
---
 nptl/lowlevellock.c         | 12 ++++++------
 sysdeps/nptl/lowlevellock.h | 29 ++++++++++++++++++++---------
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c
index 8c740529c4..d1965c01ca 100644
--- a/nptl/lowlevellock.c
+++ b/nptl/lowlevellock.c
@@ -22,30 +22,30 @@
 #include <stap-probe.h>
 
 void
-__lll_lock_wait_private (int *futex)
+__lll_lock_wait_private (int *futex, int futex_value)
 {
-  if (atomic_load_relaxed (futex) == 2)
+  if (futex_value == 2)
     goto futex;
 
   while (atomic_exchange_acquire (futex, 2) != 0)
     {
     futex:
-      LIBC_PROBE (lll_lock_wait_private, 1, futex);
+      LIBC_PROBE (lll_lock_wait_private, 2, futex, futex_value);
       futex_wait ((unsigned int *) futex, 2, LLL_PRIVATE); /* Wait if *futex == 2.  */
     }
 }
 libc_hidden_def (__lll_lock_wait_private)
 
 void
-__lll_lock_wait (int *futex, int private)
+__lll_lock_wait (int *futex, int futex_value, int private)
 {
-  if (atomic_load_relaxed (futex) == 2)
+  if (futex_value == 2)
     goto futex;
 
   while (atomic_exchange_acquire (futex, 2) != 0)
     {
     futex:
-      LIBC_PROBE (lll_lock_wait, 1, futex);
+      LIBC_PROBE (lll_lock_wait, 2, futex, futex_value);
       futex_wait ((unsigned int *) futex, 2, private); /* Wait if *futex == 2.  */
     }
 }
diff --git a/sysdeps/nptl/lowlevellock.h b/sysdeps/nptl/lowlevellock.h
index 4d95114ed3..4235d13de9 100644
--- a/sysdeps/nptl/lowlevellock.h
+++ b/sysdeps/nptl/lowlevellock.h
@@ -66,7 +66,11 @@
    0.  Otherwise leave lock unchanged and return non-zero to indicate that the
    lock was not acquired.  */
 #define __lll_trylock(lock)	\
-  __glibc_unlikely (atomic_compare_and_exchange_bool_acq ((lock), 1, 0))
+  (__extension__ ({							\
+    __typeof (*(lock)) __lock_value = atomic_load_relaxed (lock);	\
+    (__lock_value != 0							\
+     || atomic_compare_and_exchange_bool_acq ((lock), 1, 0));		\
+  }))
 #define lll_trylock(lock)	\
    __lll_trylock (&(lock))
 
@@ -74,11 +78,15 @@
    return 0.  Otherwise leave lock unchanged and return non-zero to indicate
    that the lock was not acquired.  */
 #define lll_cond_trylock(lock)	\
-  __glibc_unlikely (atomic_compare_and_exchange_bool_acq (&(lock), 2, 0))
+  (__extension__ ({							\
+    __typeof (lock) __lock_value = atomic_load_relaxed (&(lock));	\
+    (__lock_value != 0							\
+     || atomic_compare_and_exchange_bool_acq (&(lock), 2, 0));		\
+  }))
 
-extern void __lll_lock_wait_private (int *futex);
+extern void __lll_lock_wait_private (int *futex, int futex_value);
 libc_hidden_proto (__lll_lock_wait_private)
-extern void __lll_lock_wait (int *futex, int private);
+extern void __lll_lock_wait (int *futex, int futex_value, int private);
 libc_hidden_proto (__lll_lock_wait)
 
 /* This is an expression rather than a statement even though its value is
@@ -95,13 +103,15 @@ libc_hidden_proto (__lll_lock_wait)
   ((void)                                                               \
    ({                                                                   \
      int *__futex = (futex);                                            \
-     if (__glibc_unlikely                                               \
-         (atomic_compare_and_exchange_bool_acq (__futex, 1, 0)))        \
+     int __futex_value = atomic_load_relaxed (futex);                   \
+     if (__futex_value != 0                                             \
+         || ((__futex_value = atomic_compare_and_exchange_val_acq       \
+              (__futex, 1, 0) != 0)))                                   \
        {                                                                \
          if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \
-           __lll_lock_wait_private (__futex);                           \
+           __lll_lock_wait_private (futex, __futex_value);              \
          else                                                           \
-           __lll_lock_wait (__futex, private);                          \
+           __lll_lock_wait (futex, __futex_value, private);             \
        }                                                                \
    }))
 #define lll_lock(futex, private)	\
@@ -120,7 +130,8 @@ libc_hidden_proto (__lll_lock_wait)
    ({                                                                   \
      int *__futex = (futex);                                            \
      if (__glibc_unlikely (atomic_exchange_acq (__futex, 2) != 0))      \
-       __lll_lock_wait (__futex, private);                              \
+       __lll_lock_wait (__futex, atomic_load_relaxed (__futex),         \
+			private); \
    }))
 #define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
 
-- 
2.33.1


  reply	other threads:[~2021-11-10  0:16 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-10  0:16 [PATCH v4 0/3] Optimize CAS " H.J. Lu
2021-11-10  0:16 ` H.J. Lu [this message]
2021-11-10  1:56   ` [PATCH v4 1/3] Reduce CAS in low level locks " Noah Goldstein
2021-11-10  0:16 ` [PATCH v4 2/3] Reduce CAS in __pthread_mutex_lock_full " H.J. Lu
2021-11-10  0:16 ` [PATCH v4 3/3] Optimize " H.J. Lu
2021-11-10 14:26 ` [PATCH v4 0/3] Optimize CAS " Paul E Murphy
2021-11-10 20:07   ` Paul A. Clarke
2021-11-10 21:33     ` H.J. Lu
2021-11-11  0:30       ` Paul A. Clarke
2021-11-10 23:34     ` H.J. Lu
2021-11-10 15:35 ` Paul A. Clarke
2021-11-10 15:42   ` H.J. Lu
2021-11-10 15:50     ` Paul A. Clarke
2021-11-10 15:52       ` H.J. Lu
2021-11-10 15:52   ` Florian Weimer
2021-11-10 16:03     ` H.J. Lu
2021-11-10 16:04     ` Paul A. Clarke
2021-11-10 16:14     ` Andreas Schwab

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211110001614.2087610-2-hjl.tools@gmail.com \
    --to=hjl.tools@gmail.com \
    --cc=arjan@linux.intel.com \
    --cc=fweimer@redhat.com \
    --cc=libc-alpha@sourceware.org \
    --cc=oleh.derevenko@gmail.com \
    --cc=schwab@linux-m68k.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).