public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v1 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock
@ 2022-10-01  2:33 Noah Goldstein
  2022-10-01  2:33 ` [PATCH v1 2/2] x86: Optimize pthread_spin_{try}lock.S Noah Goldstein
  0 siblings, 1 reply; 6+ messages in thread
From: Noah Goldstein @ 2022-10-01  2:33 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

Reuses infrastructure from previous pthread_mutex_lock benchmarks to
test other performance sensitive functions.
---
 benchtests/Makefile                           | 10 ++++-
 ...utex-locks.c => bench-pthread-lock-base.c} | 20 +++++-----
 benchtests/bench-pthread-mutex-lock.c         | 32 ++++++++++++++++
 benchtests/bench-pthread-mutex-trylock.c      | 37 +++++++++++++++++++
 benchtests/bench-pthread-spin-lock.c          | 30 +++++++++++++++
 benchtests/bench-pthread-spin-trylock.c       | 34 +++++++++++++++++
 6 files changed, 151 insertions(+), 12 deletions(-)
 rename benchtests/{bench-pthread-mutex-locks.c => bench-pthread-lock-base.c} (93%)
 create mode 100644 benchtests/bench-pthread-mutex-lock.c
 create mode 100644 benchtests/bench-pthread-mutex-trylock.c
 create mode 100644 benchtests/bench-pthread-spin-lock.c
 create mode 100644 benchtests/bench-pthread-spin-trylock.c

diff --git a/benchtests/Makefile b/benchtests/Makefile
index d99771be74..fc1cda7fc3 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -103,11 +103,19 @@ endif
 
 bench-pthread := \
   pthread-locks \
-  pthread-mutex-locks \
+  pthread-mutex-lock \
+  pthread-mutex-trylock \
+  pthread-spin-lock \
+  pthread-spin-trylock \
   pthread_once \
   thread_create \
 # bench-pthread
 
+LDLIBS-bench-pthread-mutex-lock += -lm
+LDLIBS-bench-pthread-mutex-trylock += -lm
+LDLIBS-bench-pthread-spin-lock += -lm
+LDLIBS-bench-pthread-spin-trylock += -lm
+
 bench-string := \
   ffs \
   ffsll \
diff --git a/benchtests/bench-pthread-mutex-locks.c b/benchtests/bench-pthread-lock-base.c
similarity index 93%
rename from benchtests/bench-pthread-mutex-locks.c
rename to benchtests/bench-pthread-lock-base.c
index 1685b9dd1f..fac8a12b52 100644
--- a/benchtests/bench-pthread-mutex-locks.c
+++ b/benchtests/bench-pthread-lock-base.c
@@ -1,4 +1,4 @@
-/* Measure mutex_lock for different threads and critical sections.
+/* Measure lock functions for different threads and critical sections.
    Copyright (C) 2022 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -17,7 +17,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define TEST_MAIN
-#define TEST_NAME "pthread-mutex-locks"
 #define TIMEOUT (20 * 60)
 
 #include <stdio.h>
@@ -31,8 +30,8 @@
 #include "bench-timing.h"
 #include "json-lib.h"
 
-static pthread_mutex_t lock;
-static pthread_mutexattr_t attr;
+static bench_lock_t lock;
+static bench_lock_attr_t attr;
 static pthread_barrier_t barrier;
 
 #define START_ITERS 1000
@@ -104,9 +103,9 @@ worker (void *v)
   TIMING_NOW (start);
   while (iters--)
     {
-      pthread_mutex_lock (&lock);
+      LOCK (&lock);
       critical_section (crt_len);
-      pthread_mutex_unlock (&lock);
+      UNLOCK (&lock);
       non_critical_section (non_crt_len);
     }
   TIMING_NOW (stop);
@@ -123,7 +122,7 @@ do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
   Worker_Params *p, params[num_threads];
   pthread_t threads[num_threads];
 
-  pthread_mutex_init (&lock, &attr);
+  LOCK_INIT (&lock, &attr);
   pthread_barrier_init (&barrier, NULL, num_threads);
 
   for (i = 0; i < num_threads; i++)
@@ -137,7 +136,7 @@ do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
   for (i = 0; i < num_threads; i++)
     pthread_join (threads[i], NULL);
 
-  pthread_mutex_destroy (&lock);
+  LOCK_DESTROY (&lock);
   pthread_barrier_destroy (&barrier);
 
   mean = 0;
@@ -246,7 +245,7 @@ do_bench (void)
   char name[128];
 
   json_init (&json_ctx, 2, stdout);
-  json_attr_object_begin (&json_ctx, "pthread_mutex_locks");
+  json_attr_object_begin (&json_ctx, TEST_NAME);
 
   /* The thread config begins from 1, and increases by 2x until nprocs.
      We also wants to test over-saturation case (1.25*nprocs).  */
@@ -260,8 +259,7 @@ do_bench (void)
   threads[th_conf++] = nprocs;
   threads[th_conf++] = nprocs + nprocs / 4;
 
-  pthread_mutexattr_init (&attr);
-  pthread_mutexattr_settype (&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+  LOCK_ATTR_INIT (&attr);
   snprintf (name, sizeof name, "type=adaptive");
 
   for (k = 0; k < (sizeof (non_crt_lens) / sizeof (int)); k++)
diff --git a/benchtests/bench-pthread-mutex-lock.c b/benchtests/bench-pthread-mutex-lock.c
new file mode 100644
index 0000000000..16556d4116
--- /dev/null
+++ b/benchtests/bench-pthread-mutex-lock.c
@@ -0,0 +1,32 @@
+/* Measure mutex_lock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock) pthread_mutex_lock (lock)
+#define UNLOCK(lock) pthread_mutex_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_mutex_init (lock, attr)
+#define LOCK_DESTROY(lock) pthread_mutex_destroy (lock)
+#define LOCK_ATTR_INIT(attr)                                                  \
+  pthread_mutexattr_init (attr);                                              \
+  pthread_mutexattr_settype (attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+
+#define bench_lock_t pthread_mutex_t
+#define bench_lock_attr_t pthread_mutexattr_t
+
+#define TEST_NAME "pthread-mutex-lock"
+
+#include "bench-pthread-lock-base.c"
diff --git a/benchtests/bench-pthread-mutex-trylock.c b/benchtests/bench-pthread-mutex-trylock.c
new file mode 100644
index 0000000000..66318f499f
--- /dev/null
+++ b/benchtests/bench-pthread-mutex-trylock.c
@@ -0,0 +1,37 @@
+/* Measure mutex_trylock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock)                                                            \
+  while (pthread_mutex_trylock (lock) != 0)                                   \
+    {                                                                         \
+      non_critical_section (non_crt_len);                                     \
+    }
+
+#define UNLOCK(lock) pthread_mutex_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_mutex_init (lock, attr)
+#define LOCK_DESTROY(lock) pthread_mutex_destroy (lock)
+#define LOCK_ATTR_INIT(attr)                                                  \
+  pthread_mutexattr_init (attr);                                              \
+  pthread_mutexattr_settype (attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+
+#define bench_lock_t pthread_mutex_t
+#define bench_lock_attr_t pthread_mutexattr_t
+
+#define TEST_NAME "pthread-mutex-trylock"
+
+#include "bench-pthread-lock-base.c"
diff --git a/benchtests/bench-pthread-spin-lock.c b/benchtests/bench-pthread-spin-lock.c
new file mode 100644
index 0000000000..2174933d6b
--- /dev/null
+++ b/benchtests/bench-pthread-spin-lock.c
@@ -0,0 +1,30 @@
+/* Measure mutex_trylock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock) pthread_spin_lock (lock)
+#define UNLOCK(lock) pthread_spin_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_spin_init (lock, *(attr))
+#define LOCK_DESTROY(lock) pthread_spin_destroy (lock)
+#define LOCK_ATTR_INIT(attr) *(attr) = 0
+
+#define bench_lock_t pthread_spinlock_t
+#define bench_lock_attr_t int
+
+#define TEST_NAME "pthread-spin-lock"
+
+#include "bench-pthread-lock-base.c"
diff --git a/benchtests/bench-pthread-spin-trylock.c b/benchtests/bench-pthread-spin-trylock.c
new file mode 100644
index 0000000000..49eb972761
--- /dev/null
+++ b/benchtests/bench-pthread-spin-trylock.c
@@ -0,0 +1,34 @@
+/* Measure spin_trylock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock)                                                            \
+  while (pthread_spin_lock (lock) != 0)                                       \
+    {                                                                         \
+      non_critical_section (non_crt_len);                                     \
+    }
+#define UNLOCK(lock) pthread_spin_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_spin_init (lock, *(attr))
+#define LOCK_DESTROY(lock) pthread_spin_destroy (lock)
+#define LOCK_ATTR_INIT(attr) *(attr) = 0
+
+#define bench_lock_t pthread_spinlock_t
+#define bench_lock_attr_t int
+
+#define TEST_NAME "pthread-spin-trylock"
+
+#include "bench-pthread-lock-base.c"
-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v1 2/2] x86: Optimize pthread_spin_{try}lock.S
  2022-10-01  2:33 [PATCH v1 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock Noah Goldstein
@ 2022-10-01  2:33 ` Noah Goldstein
  2022-10-01  4:13   ` [PATCH v2 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock Noah Goldstein
  0 siblings, 1 reply; 6+ messages in thread
From: Noah Goldstein @ 2022-10-01  2:33 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

Save a jmp on the lock path coming from an initial failure in
pthread_spin_lock.S.  This costs 4-bytes of code but since the
function still fits in the same number of 16-byte blocks (default
function alignment) it does not have affect on the total binary size
of libc.so (unchanged after this commit).

pthread_spin_trylock was using a CAS when a simple xchg works which
is often more expensive.

Full check passes on x86-64.
---
 sysdeps/x86_64/nptl/pthread_spin_lock.S    | 23 +++++++++++++++-------
 sysdeps/x86_64/nptl/pthread_spin_trylock.S | 18 ++++++++++++-----
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/sysdeps/x86_64/nptl/pthread_spin_lock.S b/sysdeps/x86_64/nptl/pthread_spin_lock.S
index 44b837d9db..1e09e59b10 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_lock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_lock.S
@@ -19,18 +19,27 @@
 #include <shlib-compat.h>
 
 ENTRY(__pthread_spin_lock)
-1:	LOCK
-	decl	0(%rdi)
-	jne	2f
+	/* Always return zero.  */
 	xor	%eax, %eax
+	LOCK
+	decl	0(%rdi)
+	jne	1f
 	ret
 
 	.align	16
-2:	rep
+1:
+	/* `rep nop` == `pause`.  */
+	rep
 	nop
-	cmpl	$0, 0(%rdi)
-	jg	1b
-	jmp	2b
+	cmpl	%eax, 0(%rdi)
+	jle	1b
+	/* Just repeat the `lock decl` logic here.  The code size save
+	   of jumping back to entry doesn't change how many 16-byte
+	   chunks (default function alignment) that the code fits in.  */
+	LOCK
+	decl	0(%rdi)
+	jne	1b
+	ret
 END(__pthread_spin_lock)
 versioned_symbol (libc, __pthread_spin_lock, pthread_spin_lock, GLIBC_2_34)
 
diff --git a/sysdeps/x86_64/nptl/pthread_spin_trylock.S b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
index fffdb27dd9..a1f97cb420 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_trylock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
@@ -20,13 +20,21 @@
 #include <shlib-compat.h>
 
 ENTRY(__pthread_spin_trylock)
-	movl	$1, %eax
 	xorl	%ecx, %ecx
-	lock
-	cmpxchgl %ecx, (%rdi)
+	/* xchg has implicit LOCK prefix.  */
+	xchgl	%ecx, (%rdi)
+
+	/* Branch on result.  Expectation is the use of trylock will be
+	   branching on success/failure so this branch can be used to
+	   to predict the coming branch.  It has the benefit of
+	   breaking the likely expensive memory dependency on (%rdi).  */
+	cmpl	$1, %ecx
+	jnz	1f
+	xorl	%eax, %eax
+	ret
+1:
 	movl	$EBUSY, %eax
-	cmovel	%ecx, %eax
-	retq
+	ret
 END(__pthread_spin_trylock)
 versioned_symbol (libc, __pthread_spin_trylock, pthread_spin_trylock,
 		  GLIBC_2_34)
-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock
  2022-10-01  2:33 ` [PATCH v1 2/2] x86: Optimize pthread_spin_{try}lock.S Noah Goldstein
@ 2022-10-01  4:13   ` Noah Goldstein
  2022-10-01  4:13     ` [PATCH v2 2/2] x86: Cleanup pthread_spin_{try}lock.S Noah Goldstein
  2022-10-03 17:10     ` [PATCH v2 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock H.J. Lu
  0 siblings, 2 replies; 6+ messages in thread
From: Noah Goldstein @ 2022-10-01  4:13 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

Reuses infrastructure from previous pthread_mutex_lock benchmarks to
test other performance sensitive functions.
---
 benchtests/Makefile                           | 10 ++++-
 ...utex-locks.c => bench-pthread-lock-base.c} | 20 +++++-----
 benchtests/bench-pthread-mutex-lock.c         | 32 ++++++++++++++++
 benchtests/bench-pthread-mutex-trylock.c      | 37 +++++++++++++++++++
 benchtests/bench-pthread-spin-lock.c          | 30 +++++++++++++++
 benchtests/bench-pthread-spin-trylock.c       | 34 +++++++++++++++++
 6 files changed, 151 insertions(+), 12 deletions(-)
 rename benchtests/{bench-pthread-mutex-locks.c => bench-pthread-lock-base.c} (93%)
 create mode 100644 benchtests/bench-pthread-mutex-lock.c
 create mode 100644 benchtests/bench-pthread-mutex-trylock.c
 create mode 100644 benchtests/bench-pthread-spin-lock.c
 create mode 100644 benchtests/bench-pthread-spin-trylock.c

diff --git a/benchtests/Makefile b/benchtests/Makefile
index d99771be74..fc1cda7fc3 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -103,11 +103,19 @@ endif
 
 bench-pthread := \
   pthread-locks \
-  pthread-mutex-locks \
+  pthread-mutex-lock \
+  pthread-mutex-trylock \
+  pthread-spin-lock \
+  pthread-spin-trylock \
   pthread_once \
   thread_create \
 # bench-pthread
 
+LDLIBS-bench-pthread-mutex-lock += -lm
+LDLIBS-bench-pthread-mutex-trylock += -lm
+LDLIBS-bench-pthread-spin-lock += -lm
+LDLIBS-bench-pthread-spin-trylock += -lm
+
 bench-string := \
   ffs \
   ffsll \
diff --git a/benchtests/bench-pthread-mutex-locks.c b/benchtests/bench-pthread-lock-base.c
similarity index 93%
rename from benchtests/bench-pthread-mutex-locks.c
rename to benchtests/bench-pthread-lock-base.c
index 1685b9dd1f..fac8a12b52 100644
--- a/benchtests/bench-pthread-mutex-locks.c
+++ b/benchtests/bench-pthread-lock-base.c
@@ -1,4 +1,4 @@
-/* Measure mutex_lock for different threads and critical sections.
+/* Measure lock functions for different threads and critical sections.
    Copyright (C) 2022 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -17,7 +17,6 @@
    <https://www.gnu.org/licenses/>.  */
 
 #define TEST_MAIN
-#define TEST_NAME "pthread-mutex-locks"
 #define TIMEOUT (20 * 60)
 
 #include <stdio.h>
@@ -31,8 +30,8 @@
 #include "bench-timing.h"
 #include "json-lib.h"
 
-static pthread_mutex_t lock;
-static pthread_mutexattr_t attr;
+static bench_lock_t lock;
+static bench_lock_attr_t attr;
 static pthread_barrier_t barrier;
 
 #define START_ITERS 1000
@@ -104,9 +103,9 @@ worker (void *v)
   TIMING_NOW (start);
   while (iters--)
     {
-      pthread_mutex_lock (&lock);
+      LOCK (&lock);
       critical_section (crt_len);
-      pthread_mutex_unlock (&lock);
+      UNLOCK (&lock);
       non_critical_section (non_crt_len);
     }
   TIMING_NOW (stop);
@@ -123,7 +122,7 @@ do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
   Worker_Params *p, params[num_threads];
   pthread_t threads[num_threads];
 
-  pthread_mutex_init (&lock, &attr);
+  LOCK_INIT (&lock, &attr);
   pthread_barrier_init (&barrier, NULL, num_threads);
 
   for (i = 0; i < num_threads; i++)
@@ -137,7 +136,7 @@ do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
   for (i = 0; i < num_threads; i++)
     pthread_join (threads[i], NULL);
 
-  pthread_mutex_destroy (&lock);
+  LOCK_DESTROY (&lock);
   pthread_barrier_destroy (&barrier);
 
   mean = 0;
@@ -246,7 +245,7 @@ do_bench (void)
   char name[128];
 
   json_init (&json_ctx, 2, stdout);
-  json_attr_object_begin (&json_ctx, "pthread_mutex_locks");
+  json_attr_object_begin (&json_ctx, TEST_NAME);
 
   /* The thread config begins from 1, and increases by 2x until nprocs.
      We also wants to test over-saturation case (1.25*nprocs).  */
@@ -260,8 +259,7 @@ do_bench (void)
   threads[th_conf++] = nprocs;
   threads[th_conf++] = nprocs + nprocs / 4;
 
-  pthread_mutexattr_init (&attr);
-  pthread_mutexattr_settype (&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+  LOCK_ATTR_INIT (&attr);
   snprintf (name, sizeof name, "type=adaptive");
 
   for (k = 0; k < (sizeof (non_crt_lens) / sizeof (int)); k++)
diff --git a/benchtests/bench-pthread-mutex-lock.c b/benchtests/bench-pthread-mutex-lock.c
new file mode 100644
index 0000000000..16556d4116
--- /dev/null
+++ b/benchtests/bench-pthread-mutex-lock.c
@@ -0,0 +1,32 @@
+/* Measure mutex_lock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock) pthread_mutex_lock (lock)
+#define UNLOCK(lock) pthread_mutex_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_mutex_init (lock, attr)
+#define LOCK_DESTROY(lock) pthread_mutex_destroy (lock)
+#define LOCK_ATTR_INIT(attr)                                                  \
+  pthread_mutexattr_init (attr);                                              \
+  pthread_mutexattr_settype (attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+
+#define bench_lock_t pthread_mutex_t
+#define bench_lock_attr_t pthread_mutexattr_t
+
+#define TEST_NAME "pthread-mutex-lock"
+
+#include "bench-pthread-lock-base.c"
diff --git a/benchtests/bench-pthread-mutex-trylock.c b/benchtests/bench-pthread-mutex-trylock.c
new file mode 100644
index 0000000000..66318f499f
--- /dev/null
+++ b/benchtests/bench-pthread-mutex-trylock.c
@@ -0,0 +1,37 @@
+/* Measure mutex_trylock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock)                                                            \
+  while (pthread_mutex_trylock (lock) != 0)                                   \
+    {                                                                         \
+      non_critical_section (non_crt_len);                                     \
+    }
+
+#define UNLOCK(lock) pthread_mutex_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_mutex_init (lock, attr)
+#define LOCK_DESTROY(lock) pthread_mutex_destroy (lock)
+#define LOCK_ATTR_INIT(attr)                                                  \
+  pthread_mutexattr_init (attr);                                              \
+  pthread_mutexattr_settype (attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+
+#define bench_lock_t pthread_mutex_t
+#define bench_lock_attr_t pthread_mutexattr_t
+
+#define TEST_NAME "pthread-mutex-trylock"
+
+#include "bench-pthread-lock-base.c"
diff --git a/benchtests/bench-pthread-spin-lock.c b/benchtests/bench-pthread-spin-lock.c
new file mode 100644
index 0000000000..2174933d6b
--- /dev/null
+++ b/benchtests/bench-pthread-spin-lock.c
@@ -0,0 +1,30 @@
+/* Measure mutex_trylock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock) pthread_spin_lock (lock)
+#define UNLOCK(lock) pthread_spin_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_spin_init (lock, *(attr))
+#define LOCK_DESTROY(lock) pthread_spin_destroy (lock)
+#define LOCK_ATTR_INIT(attr) *(attr) = 0
+
+#define bench_lock_t pthread_spinlock_t
+#define bench_lock_attr_t int
+
+#define TEST_NAME "pthread-spin-lock"
+
+#include "bench-pthread-lock-base.c"
diff --git a/benchtests/bench-pthread-spin-trylock.c b/benchtests/bench-pthread-spin-trylock.c
new file mode 100644
index 0000000000..49eb972761
--- /dev/null
+++ b/benchtests/bench-pthread-spin-trylock.c
@@ -0,0 +1,34 @@
+/* Measure spin_trylock for different threads and critical sections.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define LOCK(lock)                                                            \
+  while (pthread_spin_lock (lock) != 0)                                       \
+    {                                                                         \
+      non_critical_section (non_crt_len);                                     \
+    }
+#define UNLOCK(lock) pthread_spin_unlock (lock)
+#define LOCK_INIT(lock, attr) pthread_spin_init (lock, *(attr))
+#define LOCK_DESTROY(lock) pthread_spin_destroy (lock)
+#define LOCK_ATTR_INIT(attr) *(attr) = 0
+
+#define bench_lock_t pthread_spinlock_t
+#define bench_lock_attr_t int
+
+#define TEST_NAME "pthread-spin-trylock"
+
+#include "bench-pthread-lock-base.c"
-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 2/2] x86: Cleanup pthread_spin_{try}lock.S
  2022-10-01  4:13   ` [PATCH v2 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock Noah Goldstein
@ 2022-10-01  4:13     ` Noah Goldstein
  2022-10-03 17:08       ` H.J. Lu
  2022-10-03 17:10     ` [PATCH v2 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock H.J. Lu
  1 sibling, 1 reply; 6+ messages in thread
From: Noah Goldstein @ 2022-10-01  4:13 UTC (permalink / raw)
  To: libc-alpha; +Cc: goldstein.w.n, hjl.tools, carlos

Save a jmp on the lock path coming from an initial failure in
pthread_spin_lock.S.  This costs 4-bytes of code but since the
function still fits in the same number of 16-byte blocks (default
function alignment) it does not have affect on the total binary size
of libc.so (unchanged after this commit).

pthread_spin_trylock was using a CAS when a simple xchg works which
is often more expensive.

Full check passes on x86-64.
---
 sysdeps/x86_64/nptl/pthread_spin_lock.S    | 23 +++++++++++++++-------
 sysdeps/x86_64/nptl/pthread_spin_trylock.S | 18 ++++++++++++-----
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/sysdeps/x86_64/nptl/pthread_spin_lock.S b/sysdeps/x86_64/nptl/pthread_spin_lock.S
index 44b837d9db..1e09e59b10 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_lock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_lock.S
@@ -19,18 +19,27 @@
 #include <shlib-compat.h>
 
 ENTRY(__pthread_spin_lock)
-1:	LOCK
-	decl	0(%rdi)
-	jne	2f
+	/* Always return zero.  */
 	xor	%eax, %eax
+	LOCK
+	decl	0(%rdi)
+	jne	1f
 	ret
 
 	.align	16
-2:	rep
+1:
+	/* `rep nop` == `pause`.  */
+	rep
 	nop
-	cmpl	$0, 0(%rdi)
-	jg	1b
-	jmp	2b
+	cmpl	%eax, 0(%rdi)
+	jle	1b
+	/* Just repeat the `lock decl` logic here.  The code size save
+	   of jumping back to entry doesn't change how many 16-byte
+	   chunks (default function alignment) that the code fits in.  */
+	LOCK
+	decl	0(%rdi)
+	jne	1b
+	ret
 END(__pthread_spin_lock)
 versioned_symbol (libc, __pthread_spin_lock, pthread_spin_lock, GLIBC_2_34)
 
diff --git a/sysdeps/x86_64/nptl/pthread_spin_trylock.S b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
index fffdb27dd9..a1f97cb420 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_trylock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
@@ -20,13 +20,21 @@
 #include <shlib-compat.h>
 
 ENTRY(__pthread_spin_trylock)
-	movl	$1, %eax
 	xorl	%ecx, %ecx
-	lock
-	cmpxchgl %ecx, (%rdi)
+	/* xchg has implicit LOCK prefix.  */
+	xchgl	%ecx, (%rdi)
+
+	/* Branch on result.  Expectation is the use of trylock will be
+	   branching on success/failure so this branch can be used to
+	   to predict the coming branch.  It has the benefit of
+	   breaking the likely expensive memory dependency on (%rdi).  */
+	cmpl	$1, %ecx
+	jnz	1f
+	xorl	%eax, %eax
+	ret
+1:
 	movl	$EBUSY, %eax
-	cmovel	%ecx, %eax
-	retq
+	ret
 END(__pthread_spin_trylock)
 versioned_symbol (libc, __pthread_spin_trylock, pthread_spin_trylock,
 		  GLIBC_2_34)
-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/2] x86: Cleanup pthread_spin_{try}lock.S
  2022-10-01  4:13     ` [PATCH v2 2/2] x86: Cleanup pthread_spin_{try}lock.S Noah Goldstein
@ 2022-10-03 17:08       ` H.J. Lu
  0 siblings, 0 replies; 6+ messages in thread
From: H.J. Lu @ 2022-10-03 17:08 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: libc-alpha, carlos

On Fri, Sep 30, 2022 at 9:13 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Save a jmp on the lock path coming from an initial failure in
> pthread_spin_lock.S.  This costs 4-bytes of code but since the
> function still fits in the same number of 16-byte blocks (default
> function alignment) it does not have affect on the total binary size
> of libc.so (unchanged after this commit).
>
> pthread_spin_trylock was using a CAS when a simple xchg works which
> is often more expensive.
>
> Full check passes on x86-64.
> ---
>  sysdeps/x86_64/nptl/pthread_spin_lock.S    | 23 +++++++++++++++-------
>  sysdeps/x86_64/nptl/pthread_spin_trylock.S | 18 ++++++++++++-----
>  2 files changed, 29 insertions(+), 12 deletions(-)
>
> diff --git a/sysdeps/x86_64/nptl/pthread_spin_lock.S b/sysdeps/x86_64/nptl/pthread_spin_lock.S
> index 44b837d9db..1e09e59b10 100644
> --- a/sysdeps/x86_64/nptl/pthread_spin_lock.S
> +++ b/sysdeps/x86_64/nptl/pthread_spin_lock.S
> @@ -19,18 +19,27 @@
>  #include <shlib-compat.h>
>
>  ENTRY(__pthread_spin_lock)
> -1:     LOCK
> -       decl    0(%rdi)
> -       jne     2f
> +       /* Always return zero.  */
>         xor     %eax, %eax
> +       LOCK
> +       decl    0(%rdi)
> +       jne     1f
>         ret
>
>         .align  16
> -2:     rep
> +1:
> +       /* `rep nop` == `pause`.  */
> +       rep
>         nop
> -       cmpl    $0, 0(%rdi)
> -       jg      1b
> -       jmp     2b
> +       cmpl    %eax, 0(%rdi)
> +       jle     1b
> +       /* Just repeat the `lock decl` logic here.  The code size save
> +          of jumping back to entry doesn't change how many 16-byte
> +          chunks (default function alignment) that the code fits in.  */
> +       LOCK
> +       decl    0(%rdi)
> +       jne     1b
> +       ret
>  END(__pthread_spin_lock)
>  versioned_symbol (libc, __pthread_spin_lock, pthread_spin_lock, GLIBC_2_34)
>
> diff --git a/sysdeps/x86_64/nptl/pthread_spin_trylock.S b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
> index fffdb27dd9..a1f97cb420 100644
> --- a/sysdeps/x86_64/nptl/pthread_spin_trylock.S
> +++ b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
> @@ -20,13 +20,21 @@
>  #include <shlib-compat.h>
>
>  ENTRY(__pthread_spin_trylock)
> -       movl    $1, %eax
>         xorl    %ecx, %ecx
> -       lock
> -       cmpxchgl %ecx, (%rdi)
> +       /* xchg has implicit LOCK prefix.  */
> +       xchgl   %ecx, (%rdi)
> +
> +       /* Branch on result.  Expectation is the use of trylock will be
> +          branching on success/failure so this branch can be used to
> +          to predict the coming branch.  It has the benefit of
> +          breaking the likely expensive memory dependency on (%rdi).  */
> +       cmpl    $1, %ecx
> +       jnz     1f
> +       xorl    %eax, %eax
> +       ret
> +1:
>         movl    $EBUSY, %eax
> -       cmovel  %ecx, %eax
> -       retq
> +       ret
>  END(__pthread_spin_trylock)
>  versioned_symbol (libc, __pthread_spin_trylock, pthread_spin_trylock,
>                   GLIBC_2_34)
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock
  2022-10-01  4:13   ` [PATCH v2 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock Noah Goldstein
  2022-10-01  4:13     ` [PATCH v2 2/2] x86: Cleanup pthread_spin_{try}lock.S Noah Goldstein
@ 2022-10-03 17:10     ` H.J. Lu
  1 sibling, 0 replies; 6+ messages in thread
From: H.J. Lu @ 2022-10-03 17:10 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: libc-alpha, carlos

On Fri, Sep 30, 2022 at 9:13 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Reuses infrastructure from previous pthread_mutex_lock benchmarks to
> test other performance sensitive functions.
> ---
>  benchtests/Makefile                           | 10 ++++-
>  ...utex-locks.c => bench-pthread-lock-base.c} | 20 +++++-----
>  benchtests/bench-pthread-mutex-lock.c         | 32 ++++++++++++++++
>  benchtests/bench-pthread-mutex-trylock.c      | 37 +++++++++++++++++++
>  benchtests/bench-pthread-spin-lock.c          | 30 +++++++++++++++
>  benchtests/bench-pthread-spin-trylock.c       | 34 +++++++++++++++++
>  6 files changed, 151 insertions(+), 12 deletions(-)
>  rename benchtests/{bench-pthread-mutex-locks.c => bench-pthread-lock-base.c} (93%)
>  create mode 100644 benchtests/bench-pthread-mutex-lock.c
>  create mode 100644 benchtests/bench-pthread-mutex-trylock.c
>  create mode 100644 benchtests/bench-pthread-spin-lock.c
>  create mode 100644 benchtests/bench-pthread-spin-trylock.c
>
> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index d99771be74..fc1cda7fc3 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -103,11 +103,19 @@ endif
>
>  bench-pthread := \
>    pthread-locks \
> -  pthread-mutex-locks \
> +  pthread-mutex-lock \
> +  pthread-mutex-trylock \
> +  pthread-spin-lock \
> +  pthread-spin-trylock \
>    pthread_once \
>    thread_create \
>  # bench-pthread
>
> +LDLIBS-bench-pthread-mutex-lock += -lm
> +LDLIBS-bench-pthread-mutex-trylock += -lm
> +LDLIBS-bench-pthread-spin-lock += -lm
> +LDLIBS-bench-pthread-spin-trylock += -lm
> +
>  bench-string := \
>    ffs \
>    ffsll \
> diff --git a/benchtests/bench-pthread-mutex-locks.c b/benchtests/bench-pthread-lock-base.c
> similarity index 93%
> rename from benchtests/bench-pthread-mutex-locks.c
> rename to benchtests/bench-pthread-lock-base.c
> index 1685b9dd1f..fac8a12b52 100644
> --- a/benchtests/bench-pthread-mutex-locks.c
> +++ b/benchtests/bench-pthread-lock-base.c
> @@ -1,4 +1,4 @@
> -/* Measure mutex_lock for different threads and critical sections.
> +/* Measure lock functions for different threads and critical sections.
>     Copyright (C) 2022 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>
> @@ -17,7 +17,6 @@
>     <https://www.gnu.org/licenses/>.  */
>
>  #define TEST_MAIN
> -#define TEST_NAME "pthread-mutex-locks"
>  #define TIMEOUT (20 * 60)
>
>  #include <stdio.h>
> @@ -31,8 +30,8 @@
>  #include "bench-timing.h"
>  #include "json-lib.h"
>
> -static pthread_mutex_t lock;
> -static pthread_mutexattr_t attr;
> +static bench_lock_t lock;
> +static bench_lock_attr_t attr;
>  static pthread_barrier_t barrier;
>
>  #define START_ITERS 1000
> @@ -104,9 +103,9 @@ worker (void *v)
>    TIMING_NOW (start);
>    while (iters--)
>      {
> -      pthread_mutex_lock (&lock);
> +      LOCK (&lock);
>        critical_section (crt_len);
> -      pthread_mutex_unlock (&lock);
> +      UNLOCK (&lock);
>        non_critical_section (non_crt_len);
>      }
>    TIMING_NOW (stop);
> @@ -123,7 +122,7 @@ do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
>    Worker_Params *p, params[num_threads];
>    pthread_t threads[num_threads];
>
> -  pthread_mutex_init (&lock, &attr);
> +  LOCK_INIT (&lock, &attr);
>    pthread_barrier_init (&barrier, NULL, num_threads);
>
>    for (i = 0; i < num_threads; i++)
> @@ -137,7 +136,7 @@ do_one_test (int num_threads, int crt_len, int non_crt_len, long iters)
>    for (i = 0; i < num_threads; i++)
>      pthread_join (threads[i], NULL);
>
> -  pthread_mutex_destroy (&lock);
> +  LOCK_DESTROY (&lock);
>    pthread_barrier_destroy (&barrier);
>
>    mean = 0;
> @@ -246,7 +245,7 @@ do_bench (void)
>    char name[128];
>
>    json_init (&json_ctx, 2, stdout);
> -  json_attr_object_begin (&json_ctx, "pthread_mutex_locks");
> +  json_attr_object_begin (&json_ctx, TEST_NAME);
>
>    /* The thread config begins from 1, and increases by 2x until nprocs.
>       We also wants to test over-saturation case (1.25*nprocs).  */
> @@ -260,8 +259,7 @@ do_bench (void)
>    threads[th_conf++] = nprocs;
>    threads[th_conf++] = nprocs + nprocs / 4;
>
> -  pthread_mutexattr_init (&attr);
> -  pthread_mutexattr_settype (&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
> +  LOCK_ATTR_INIT (&attr);
>    snprintf (name, sizeof name, "type=adaptive");
>
>    for (k = 0; k < (sizeof (non_crt_lens) / sizeof (int)); k++)
> diff --git a/benchtests/bench-pthread-mutex-lock.c b/benchtests/bench-pthread-mutex-lock.c
> new file mode 100644
> index 0000000000..16556d4116
> --- /dev/null
> +++ b/benchtests/bench-pthread-mutex-lock.c
> @@ -0,0 +1,32 @@
> +/* Measure mutex_lock for different threads and critical sections.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define LOCK(lock) pthread_mutex_lock (lock)
> +#define UNLOCK(lock) pthread_mutex_unlock (lock)
> +#define LOCK_INIT(lock, attr) pthread_mutex_init (lock, attr)
> +#define LOCK_DESTROY(lock) pthread_mutex_destroy (lock)
> +#define LOCK_ATTR_INIT(attr)                                                  \
> +  pthread_mutexattr_init (attr);                                              \
> +  pthread_mutexattr_settype (attr, PTHREAD_MUTEX_ADAPTIVE_NP);
> +
> +#define bench_lock_t pthread_mutex_t
> +#define bench_lock_attr_t pthread_mutexattr_t
> +
> +#define TEST_NAME "pthread-mutex-lock"
> +
> +#include "bench-pthread-lock-base.c"
> diff --git a/benchtests/bench-pthread-mutex-trylock.c b/benchtests/bench-pthread-mutex-trylock.c
> new file mode 100644
> index 0000000000..66318f499f
> --- /dev/null
> +++ b/benchtests/bench-pthread-mutex-trylock.c
> @@ -0,0 +1,37 @@
> +/* Measure mutex_trylock for different threads and critical sections.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define LOCK(lock)                                                            \
> +  while (pthread_mutex_trylock (lock) != 0)                                   \
> +    {                                                                         \
> +      non_critical_section (non_crt_len);                                     \
> +    }
> +
> +#define UNLOCK(lock) pthread_mutex_unlock (lock)
> +#define LOCK_INIT(lock, attr) pthread_mutex_init (lock, attr)
> +#define LOCK_DESTROY(lock) pthread_mutex_destroy (lock)
> +#define LOCK_ATTR_INIT(attr)                                                  \
> +  pthread_mutexattr_init (attr);                                              \
> +  pthread_mutexattr_settype (attr, PTHREAD_MUTEX_ADAPTIVE_NP);
> +
> +#define bench_lock_t pthread_mutex_t
> +#define bench_lock_attr_t pthread_mutexattr_t
> +
> +#define TEST_NAME "pthread-mutex-trylock"
> +
> +#include "bench-pthread-lock-base.c"
> diff --git a/benchtests/bench-pthread-spin-lock.c b/benchtests/bench-pthread-spin-lock.c
> new file mode 100644
> index 0000000000..2174933d6b
> --- /dev/null
> +++ b/benchtests/bench-pthread-spin-lock.c
> @@ -0,0 +1,30 @@
> +/* Measure mutex_trylock for different threads and critical sections.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define LOCK(lock) pthread_spin_lock (lock)
> +#define UNLOCK(lock) pthread_spin_unlock (lock)
> +#define LOCK_INIT(lock, attr) pthread_spin_init (lock, *(attr))
> +#define LOCK_DESTROY(lock) pthread_spin_destroy (lock)
> +#define LOCK_ATTR_INIT(attr) *(attr) = 0
> +
> +#define bench_lock_t pthread_spinlock_t
> +#define bench_lock_attr_t int
> +
> +#define TEST_NAME "pthread-spin-lock"
> +
> +#include "bench-pthread-lock-base.c"
> diff --git a/benchtests/bench-pthread-spin-trylock.c b/benchtests/bench-pthread-spin-trylock.c
> new file mode 100644
> index 0000000000..49eb972761
> --- /dev/null
> +++ b/benchtests/bench-pthread-spin-trylock.c
> @@ -0,0 +1,34 @@
> +/* Measure spin_trylock for different threads and critical sections.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define LOCK(lock)                                                            \
> +  while (pthread_spin_lock (lock) != 0)                                       \
> +    {                                                                         \
> +      non_critical_section (non_crt_len);                                     \
> +    }
> +#define UNLOCK(lock) pthread_spin_unlock (lock)
> +#define LOCK_INIT(lock, attr) pthread_spin_init (lock, *(attr))
> +#define LOCK_DESTROY(lock) pthread_spin_destroy (lock)
> +#define LOCK_ATTR_INIT(attr) *(attr) = 0
> +
> +#define bench_lock_t pthread_spinlock_t
> +#define bench_lock_attr_t int
> +
> +#define TEST_NAME "pthread-spin-trylock"
> +
> +#include "bench-pthread-lock-base.c"
> --
> 2.34.1
>

LGTM.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-10-03 17:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-01  2:33 [PATCH v1 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock Noah Goldstein
2022-10-01  2:33 ` [PATCH v1 2/2] x86: Optimize pthread_spin_{try}lock.S Noah Goldstein
2022-10-01  4:13   ` [PATCH v2 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock Noah Goldstein
2022-10-01  4:13     ` [PATCH v2 2/2] x86: Cleanup pthread_spin_{try}lock.S Noah Goldstein
2022-10-03 17:08       ` H.J. Lu
2022-10-03 17:10     ` [PATCH v2 1/2] Benchtests: Add bench for pthread_spin_{try}lock and mutex_trylock H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).