public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v1 1/4] Benchtests: Add benchtests for pthread_spin_lock and mutex_trylock
@ 2022-09-29  3:14 Noah Goldstein
  2022-09-29  3:14 ` [PATCH v1 2/4] nptl: Continue use arch prefered atomic exchange in spinlock loop Noah Goldstein
  0 siblings, 1 reply; 6+ messages in thread
From: Noah Goldstein @ 2022-09-29  3:14 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

Reuses infrastructure from previous pthread_mutex_lock benchmarks to
test other performance sensitive functions.
---
 benchtests/Makefile                           |  8 +++-
 ...utex-locks.c => bench-pthread-lock-base.c} | 20 +++++-----
 benchtests/bench-pthread-mutex-lock.c         | 32 ++++++++++++++++
 benchtests/bench-pthread-mutex-trylock.c      | 37 +++++++++++++++++++
 benchtests/bench-pthread-spinlock.c           | 30 +++++++++++++++
 5 files changed, 115 insertions(+), 12 deletions(-)
 rename benchtests/{bench-pthread-mutex-locks.c => bench-pthread-lock-base.c} (93%)
 create mode 100644 benchtests/bench-pthread-mutex-lock.c
 create mode 100644 benchtests/bench-pthread-mutex-trylock.c
 create mode 100644 benchtests/bench-pthread-spinlock.c

diff --git a/benchtests/Makefile b/benchtests/Makefile
index d99771be74..841997e0a9 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -103,11 +103,17 @@ endif
 
 bench-pthread := \
   pthread-locks \
-  pthread-mutex-locks \
+  pthread-mutex-lock \
+  pthread-mutex-trylock \
+  pthread-spinlock \
   pthread_once \
   thread_create \
 # bench-pthread
 
+LDLIBS-bench-pthread-mutex-lock += -lm
+LDLIBS-bench-pthread-mutex-trylock += -lm
+LDLIBS-bench-pthread-spinlock += -lm
+
 bench-string := \
   ffs \
   ffsll \
diff --git a/benchtests/bench-pthread-mutex-locks.c b/benchtests/bench-pthread-lock-base.c
similarity index 93%
rename from benchtests/bench-pthread-mutex-locks.c
rename to benchtests/bench-pthread-lock-base.c
index 1685b9dd1f..fac8a12b52 100644
--- a/benchtests/bench-pthread-mutex-locks.c
+++ b/benchtests/bench-pthread-lock-base.c
@@ -1,4 +1,4 @@
-/* Measure mutex_lock for different threads and critical sections.
+/* Measure lock functions for different threads and critical sections.
    Copyright (C) 2022 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -17,7 +17,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define TEST_MAIN
-#define TEST_NAME "pthread-mutex-locks"
 #define TIMEOUT (20 * 60)
 
 #include <stdio.h>
@@ -31,8 +30,8 @@
 #include "bench-timing.h"
 #include "json-lib.h"
 
-static pthread_mutex_t lock;
-static pthread_mutexattr_t attr;
+static bench_lock_t lock;
+static bench_lock_attr_t attr;
 static pthread_barrier_t barrier;
 
 #define START_ITERS 1000
@@ -104,9 +103,9 @@ worker (void *v)
   TIMING_NOW (start);
   while (iters--)
     {
-      pthread_mutex_lock (&lock);
+      LOCK (&lock);
       critical_section (crt_len);
-      pthread_mutex_unlock (&lock);
+      UNLOCK (&lock);
       non_critical_section (non_crt_len);
     }
   TIMING_NOW (stop);
@@ -123,7 +122,7 @@ do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
   Worker_Params *p, params[num_threads];
   pthread_t threads[num_threads];
 
-  pthread_mutex_init (&lock, &attr);
+  LOCK_INIT (&lock, &attr);
   pthread_barrier_init (&barrier, NULL, num_threads);
 
   for (i = 0; i < num_threads; i++)
@@ -137,7 +136,7 @@ do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
   for (i = 0; i < num_threads; i++)
     pthread_join (threads[i], NULL);
 
-  pthread_mutex_destroy (&lock);
+  LOCK_DESTROY (&lock);
   pthread_barrier_destroy (&barrier);
 
   mean = 0;
@@ -246,7 +245,7 @@ do_bench (void)
   char name[128];
 
   json_init (&json_ctx, 2, stdout);
-  json_attr_object_begin (&json_ctx, "pthread_mutex_locks");
+  json_attr_object_begin (&json_ctx, TEST_NAME);
 
   /* The thread config begins from 1, and increases by 2x until nprocs.
      We also wants to test over-saturation case (1.25*nprocs).  */
@@ -260,8 +259,7 @@ do_bench (void)
   threads[th_conf++] = nprocs;
   threads[th_conf++] = nprocs + nprocs / 4;
 
-  pthread_mutexattr_init (&attr);
-  pthread_mutexattr_settype (&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+  LOCK_ATTR_INIT (&attr);
   snprintf (name, sizeof name, "type=adaptive");
 
   for (k = 0; k < (sizeof (non_crt_lens) / sizeof (int)); k++)
diff --git a/benchtests/bench-pthread-mutex-lock.c b/benchtests/bench-pthread-mutex-lock.c
new file mode 100644
index 0000000000..16556d4116
--- /dev/null
+++ b/benchtests/bench-pthread-mutex-lock.c
@@ -0,0 +1,32 @@
+/* Measure mutex_lock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock) pthread_mutex_lock (lock)
+#define UNLOCK(lock) pthread_mutex_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_mutex_init (lock, attr)
+#define LOCK_DESTROY(lock) pthread_mutex_destroy (lock)
+#define LOCK_ATTR_INIT(attr)                                                  \
+  pthread_mutexattr_init (attr);                                              \
+  pthread_mutexattr_settype (attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+
+#define bench_lock_t pthread_mutex_t
+#define bench_lock_attr_t pthread_mutexattr_t
+
+#define TEST_NAME "pthread-mutex-lock"
+
+#include "bench-pthread-lock-base.c"
diff --git a/benchtests/bench-pthread-mutex-trylock.c b/benchtests/bench-pthread-mutex-trylock.c
new file mode 100644
index 0000000000..66318f499f
--- /dev/null
+++ b/benchtests/bench-pthread-mutex-trylock.c
@@ -0,0 +1,37 @@
+/* Measure mutex_trylock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock)                                                            \
+  while (pthread_mutex_trylock (lock) != 0)                                   \
+    {                                                                         \
+      non_critical_section (non_crt_len);                                     \
+    }
+
+#define UNLOCK(lock) pthread_mutex_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_mutex_init (lock, attr)
+#define LOCK_DESTROY(lock) pthread_mutex_destroy (lock)
+#define LOCK_ATTR_INIT(attr)                                                  \
+  pthread_mutexattr_init (attr);                                              \
+  pthread_mutexattr_settype (attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+
+#define bench_lock_t pthread_mutex_t
+#define bench_lock_attr_t pthread_mutexattr_t
+
+#define TEST_NAME "pthread-mutex-trylock"
+
+#include "bench-pthread-lock-base.c"
diff --git a/benchtests/bench-pthread-spinlock.c b/benchtests/bench-pthread-spinlock.c
new file mode 100644
index 0000000000..2174933d6b
--- /dev/null
+++ b/benchtests/bench-pthread-spinlock.c
@@ -0,0 +1,30 @@
+/* Measure mutex_trylock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock) pthread_spin_lock (lock)
+#define UNLOCK(lock) pthread_spin_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_spin_init (lock, *(attr))
+#define LOCK_DESTROY(lock) pthread_spin_destroy (lock)
+#define LOCK_ATTR_INIT(attr) *(attr) = 0
+
+#define bench_lock_t pthread_spinlock_t
+#define bench_lock_attr_t int
+
+#define TEST_NAME "pthread-spin-lock"
+
+#include "bench-pthread-lock-base.c"
-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v1 2/4] nptl: Continue use arch prefered atomic exchange in spinlock loop
  2022-09-29  3:14 [PATCH v1 1/4] Benchtests: Add benchtests for pthread_spin_lock and mutex_trylock Noah Goldstein
@ 2022-09-29  3:14 ` Noah Goldstein
  0 siblings, 0 replies; 6+ messages in thread
From: Noah Goldstein @ 2022-09-29  3:14 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

Despite using the preferred atomic exchange in the initial check, the
loop was unconditionally using CAS which is not desired on some
architectures (those that didn't set `ATOMIC_EXCHANGE_USES_CAS`).

No meaningful perf changes measured on broadwell but still seems like
a reasonable change.

Full check passes on x86-64.
---
 nptl/pthread_spin_lock.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/nptl/pthread_spin_lock.c b/nptl/pthread_spin_lock.c
index 19d1759f9a..1bdd6e2048 100644
--- a/nptl/pthread_spin_lock.c
+++ b/nptl/pthread_spin_lock.c
@@ -20,6 +20,20 @@
 #include "pthreadP.h"
 #include <shlib-compat.h>
 
+#if ATOMIC_EXCHANGE_USES_CAS
+/* Try to acquire the lock with a CAS instruction as this architecture
+   has no exchange instruction.  The acquisition succeeds if the lock is not
+   acquired.  */
+#  define pthread_spin_lock_grab_lock(mem, val, c) \
+    atomic_compare_exchange_weak_acquire (lock, &val, 1))
+#else
+/* Try to acquire the lock with an exchange instruction as this architecture
+   has such an instruction and we assume it is faster than a CAS.
+   The acquisition succeeds if the lock is not in an acquired state.  */
+#  define pthread_spin_lock_grab_lock(mem, val, c) \
+    (atomic_exchange_acquire (lock, 1) == 0)
+#endif
+
 int
 __pthread_spin_lock (pthread_spinlock_t *lock)
 {
@@ -36,19 +50,8 @@ __pthread_spin_lock (pthread_spinlock_t *lock)
      We use acquire MO to synchronize-with the release MO store in
      pthread_spin_unlock, and thus ensure that prior critical sections
      happen-before this critical section.  */
-#if ! ATOMIC_EXCHANGE_USES_CAS
-  /* Try to acquire the lock with an exchange instruction as this architecture
-     has such an instruction and we assume it is faster than a CAS.
-     The acquisition succeeds if the lock is not in an acquired state.  */
-  if (__glibc_likely (atomic_exchange_acquire (lock, 1) == 0))
+  if (__glibc_likely (pthread_spin_lock_grab_lock (lock, &val, 1)))
     return 0;
-#else
-  /* Try to acquire the lock with a CAS instruction as this architecture
-     has no exchange instruction.  The acquisition succeeds if the lock is not
-     acquired.  */
-  if (__glibc_likely (atomic_compare_exchange_weak_acquire (lock, &val, 1)))
-    return 0;
-#endif
 
   do
     {
@@ -75,7 +78,7 @@ __pthread_spin_lock (pthread_spinlock_t *lock)
       /* We need acquire memory order here for the same reason as mentioned
 	 for the first try to lock the spinlock.  */
     }
-  while (!atomic_compare_exchange_weak_acquire (lock, &val, 1));
+  while (!pthread_spin_lock_grab_lock (lock, &val, 1));
 
   return 0;
 }
-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v1 2/4] nptl: Continue use arch prefered atomic exchange in spinlock loop
  2022-09-29 18:38 ` Noah Goldstein
  2022-09-29 18:50   ` Wilco Dijkstra
@ 2022-09-29 18:51   ` H.J. Lu
  1 sibling, 0 replies; 6+ messages in thread
From: H.J. Lu @ 2022-09-29 18:51 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: Wilco Dijkstra, GNU C Library

On Thu, Sep 29, 2022 at 11:40 AM Noah Goldstein via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Thu, Sep 29, 2022 at 9:35 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> >
> > Hi Noah,
> >
> > Did you try building it both ways? I don't think this could ever compile:
> >
> > +  if (__glibc_likely (pthread_spin_lock_grab_lock (lock, &val, 1)))
> >
> > and:
> >
> > +#  define pthread_spin_lock_grab_lock(mem, val, c) \
> > +    atomic_compare_exchange_weak_acquire (lock, &val, 1))
> >
> > The define uses 'lock' and 'mem' inconsistently and the use of the macro
> > expands into &&val...
> >
> > Apart from that there is the question whether we should keep the weird
> > ATOMIC_EXCHANGE_USES_CAS setting - I have removed it in my atomic
> > patch series since most targets appear confused as to what it means (so
> > are likely to have the wrong setting). Also there is no evidence it is actually
> > faster. So using exchange in both cases is easier (and less error prone!).
> >
> > Also you do realize that no matter how much you change this code, it
> > won't make a difference on x86, right?
>
> Why's that?

x86-64 uses sysdeps/x86_64/nptl/pthread_spin_lock.S

-- 
H.J.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v1 2/4] nptl: Continue use arch prefered atomic exchange in spinlock loop
  2022-09-29 18:38 ` Noah Goldstein
@ 2022-09-29 18:50   ` Wilco Dijkstra
  2022-09-29 18:51   ` H.J. Lu
  1 sibling, 0 replies; 6+ messages in thread
From: Wilco Dijkstra @ 2022-09-29 18:50 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library

Hi Noah,

Because various targets override it:

> find -name pthread_spin_lock.*
./nptl/pthread_spin_lock.c
./sysdeps/alpha/nptl/pthread_spin_lock.S
./sysdeps/powerpc/nptl/pthread_spin_lock.c
./sysdeps/i386/nptl/pthread_spin_lock.S
./sysdeps/sparc/sparc64/pthread_spin_lock.S
./sysdeps/sparc/sparc32/sparcv9/pthread_spin_lock.S
./sysdeps/sparc/sparc32/pthread_spin_lock.S
./sysdeps/x86_64/nptl/pthread_spin_lock.S
./sysdeps/ia64/nptl/pthread_spin_lock.c
./sysdeps/sh/nptl/pthread_spin_lock.c

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v1 2/4] nptl: Continue use arch prefered atomic exchange in spinlock loop
  2022-09-29 16:35 Wilco Dijkstra
@ 2022-09-29 18:38 ` Noah Goldstein
  2022-09-29 18:50   ` Wilco Dijkstra
  2022-09-29 18:51   ` H.J. Lu
  0 siblings, 2 replies; 6+ messages in thread
From: Noah Goldstein @ 2022-09-29 18:38 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GNU C Library

On Thu, Sep 29, 2022 at 9:35 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>
> Hi Noah,
>
> Did you try building it both ways? I don't think this could ever compile:
>
> +  if (__glibc_likely (pthread_spin_lock_grab_lock (lock, &val, 1)))
>
> and:
>
> +#  define pthread_spin_lock_grab_lock(mem, val, c) \
> +    atomic_compare_exchange_weak_acquire (lock, &val, 1))
>
> The define uses 'lock' and 'mem' inconsistently and the use of the macro
> expands into &&val...
>
> Apart from that there is the question whether we should keep the weird
> ATOMIC_EXCHANGE_USES_CAS setting - I have removed it in my atomic
> patch series since most targets appear confused as to what it means (so
> are likely to have the wrong setting). Also there is no evidence it is actually
> faster. So using exchange in both cases is easier (and less error prone!).
>
> Also you do realize that no matter how much you change this code, it
> won't make a difference on x86, right?

Why's that?
>
> Cheers,
> Wilco

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v1 2/4] nptl: Continue use arch prefered atomic exchange in spinlock loop
@ 2022-09-29 16:35 Wilco Dijkstra
  2022-09-29 18:38 ` Noah Goldstein
  0 siblings, 1 reply; 6+ messages in thread
From: Wilco Dijkstra @ 2022-09-29 16:35 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: 'GNU C Library'

Hi Noah,

Did you try building it both ways? I don't think this could ever compile:

+  if (__glibc_likely (pthread_spin_lock_grab_lock (lock, &val, 1)))

and:

+#  define pthread_spin_lock_grab_lock(mem, val, c) \
+    atomic_compare_exchange_weak_acquire (lock, &val, 1))

The define uses 'lock' and 'mem' inconsistently and the use of the macro
expands into &&val...

Apart from that there is the question whether we should keep the weird
ATOMIC_EXCHANGE_USES_CAS setting - I have removed it in my atomic
patch series since most targets appear confused as to what it means (so
are likely to have the wrong setting). Also there is no evidence it is actually
faster. So using exchange in both cases is easier (and less error prone!).

Also you do realize that no matter how much you change this code, it
won't make a difference on x86, right?

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-09-29 18:52 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-29  3:14 [PATCH v1 1/4] Benchtests: Add benchtests for pthread_spin_lock and mutex_trylock Noah Goldstein
2022-09-29  3:14 ` [PATCH v1 2/4] nptl: Continue use arch prefered atomic exchange in spinlock loop Noah Goldstein
2022-09-29 16:35 Wilco Dijkstra
2022-09-29 18:38 ` Noah Goldstein
2022-09-29 18:50   ` Wilco Dijkstra
2022-09-29 18:51   ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).