public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 3/7] Remove atomic_bit_set/bit_test_set
@ 2022-07-06 15:14 Wilco Dijkstra
  2022-07-06 16:14 ` Noah Goldstein
  0 siblings, 1 reply; 18+ messages in thread
From: Wilco Dijkstra @ 2022-07-06 15:14 UTC (permalink / raw)
  To: 'GNU C Library'

Replace the 3 uses of atomic_bit_set and atomic_bit_test_set with atomic_fetch_or_acquire.
This enables removal of a lot of target specific implementations.

---

diff --git a/include/atomic.h b/include/atomic.h
index 73cc772f0149f94fa0c3e14fa858fa89fee6985f..ed1fc38e7569fcbdd0473ab6f69956a44d62354f 100644
--- a/include/atomic.h
+++ b/include/atomic.h
@@ -255,28 +255,6 @@
 #endif
 
 
-#ifndef atomic_bit_set
-# define atomic_bit_set(mem, bit) \
-  (void) atomic_bit_test_set(mem, bit)
-#endif
-
-
-#ifndef atomic_bit_test_set
-# define atomic_bit_test_set(mem, bit) \
-  ({ __typeof (*(mem)) __atg14_old;					      \
-     __typeof (mem) __atg14_memp = (mem);				      \
-     __typeof (*(mem)) __atg14_mask = ((__typeof (*(mem))) 1 << (bit));	      \
-									      \
-     do									      \
-       __atg14_old = (*__atg14_memp);					      \
-     while (__builtin_expect						      \
-	    (atomic_compare_and_exchange_bool_acq (__atg14_memp,	      \
-						   __atg14_old | __atg14_mask,\
-						   __atg14_old), 0));	      \
-									      \
-     __atg14_old & __atg14_mask; })
-#endif
-
 /* Atomically *mem &= mask.  */
 #ifndef atomic_and
 # define atomic_and(mem, mask) \
diff --git a/misc/tst-atomic.c b/misc/tst-atomic.c
index 13acf0da8f1ab41774175401a68ce28a0eb7ed30..765b036873e181d6c49299c0ad08d5dbfcc05387 100644
--- a/misc/tst-atomic.c
+++ b/misc/tst-atomic.c
@@ -325,66 +325,6 @@ do_test (void)
       ret = 1;
     }
 
-  mem = 0;
-  atomic_bit_set (&mem, 1);
-  if (mem != 2)
-    {
-      puts ("atomic_bit_set test 1 failed");
-      ret = 1;
-    }
-
-  mem = 8;
-  atomic_bit_set (&mem, 3);
-  if (mem != 8)
-    {
-      puts ("atomic_bit_set test 2 failed");
-      ret = 1;
-    }
-
-#ifdef TEST_ATOMIC64
-  mem = 16;
-  atomic_bit_set (&mem, 35);
-  if (mem != 0x800000010LL)
-    {
-      puts ("atomic_bit_set test 3 failed");
-      ret = 1;
-    }
-#endif
-
-  mem = 0;
-  if (atomic_bit_test_set (&mem, 1)
-      || mem != 2)
-    {
-      puts ("atomic_bit_test_set test 1 failed");
-      ret = 1;
-    }
-
-  mem = 8;
-  if (! atomic_bit_test_set (&mem, 3)
-      || mem != 8)
-    {
-      puts ("atomic_bit_test_set test 2 failed");
-      ret = 1;
-    }
-
-#ifdef TEST_ATOMIC64
-  mem = 16;
-  if (atomic_bit_test_set (&mem, 35)
-      || mem != 0x800000010LL)
-    {
-      puts ("atomic_bit_test_set test 3 failed");
-      ret = 1;
-    }
-
-  mem = 0x100000000LL;
-  if (! atomic_bit_test_set (&mem, 32)
-      || mem != 0x100000000LL)
-    {
-      puts ("atomic_bit_test_set test 4 failed");
-      ret = 1;
-    }
-#endif
-
   /* Tests for C11-like atomics.  */
   mem = 11;
   if (atomic_load_relaxed (&mem) != 11 || atomic_load_acquire (&mem) != 11)
diff --git a/nptl/nptl_free_tcb.c b/nptl/nptl_free_tcb.c
index 0d31143ac849de6398e06c399b94813ae57dcff3..b63ec3bc3c1df4a1fc5272a24d453e679dbedd5e 100644
--- a/nptl/nptl_free_tcb.c
+++ b/nptl/nptl_free_tcb.c
@@ -24,7 +24,8 @@ void
 __nptl_free_tcb (struct pthread *pd)
 {
   /* The thread is exiting now.  */
-  if (atomic_bit_test_set (&pd->cancelhandling, TERMINATED_BIT) == 0)
+  if ((atomic_fetch_or_acquire (&pd->cancelhandling, 1 << TERMINATED_BIT)
+      & (1 << TERMINATED_BIT)) == 0)
     {
       /* Free TPP data.  */
       if (pd->tpp != NULL)
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index 870a8fcb34eb43b58c2260fee6a4624f0fbbd469..4b7f3edc384748f300ca935ad878eb0e3547e163 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -487,7 +487,7 @@ start_thread (void *arg)
   /* The thread is exiting now.  Don't set this bit until after we've hit
      the event-reporting breakpoint, so that td_thr_get_info on us while at
      the breakpoint reports TD_THR_RUN state rather than TD_THR_ZOMBIE.  */
-  atomic_bit_set (&pd->cancelhandling, EXITING_BIT);
+  atomic_fetch_or_acquire (&pd->cancelhandling, 1 << EXITING_BIT);
 
   if (__glibc_unlikely (atomic_decrement_and_test (&__nptl_nthreads)))
     /* This was the last thread.  */
diff --git a/sysdeps/alpha/atomic-machine.h b/sysdeps/alpha/atomic-machine.h
index df28b90261aa485125d951bc1abf76602730bfd0..115a9df5d77cd08bcb0a49d9b59f0c53b4a20d78 100644
--- a/sysdeps/alpha/atomic-machine.h
+++ b/sysdeps/alpha/atomic-machine.h
@@ -325,15 +325,6 @@
 #define atomic_exchange_and_add(mem, value) \
   __atomic_val_bysize (__arch_exchange_and_add, int, mem, value, __MB, __MB)
 
-
-/* ??? Blah, I'm lazy.  Implement these later.  Can do better than the
-   compare-and-exchange loop provided by generic code.
-
-#define atomic_decrement_if_positive(mem)
-#define atomic_bit_test_set(mem, bit)
-
-*/
-
 #define atomic_full_barrier()	__asm ("mb" : : : "memory");
 #define atomic_read_barrier()	__asm ("mb" : : : "memory");
 #define atomic_write_barrier()	__asm ("wmb" : : : "memory");
diff --git a/sysdeps/ia64/atomic-machine.h b/sysdeps/ia64/atomic-machine.h
index 71afcfe0d0031a6ceae5879a2b3b19ed2ee2f110..b2f5d2f4774cc2503c7595cb82f30f60fbcbe89c 100644
--- a/sysdeps/ia64/atomic-machine.h
+++ b/sysdeps/ia64/atomic-machine.h
@@ -77,20 +77,4 @@
      while (__builtin_expect (__val != __oldval, 0));			      \
      __oldval; })
 
-#define atomic_bit_test_set(mem, bit) \
-  ({ __typeof (*mem) __oldval, __val;					      \
-     __typeof (mem) __memp = (mem);					      \
-     __typeof (*mem) __mask = ((__typeof (*mem)) 1 << (bit));		      \
-									      \
-     __val = (*__memp);							      \
-     do									      \
-       {								      \
-	 __oldval = __val;						      \
-	 __val = atomic_compare_and_exchange_val_acq (__memp,		      \
-						      __oldval | __mask,      \
-						      __oldval);	      \
-       }								      \
-     while (__builtin_expect (__val != __oldval, 0));			      \
-     __oldval & __mask; })
-
 #define atomic_full_barrier() __sync_synchronize ()
diff --git a/sysdeps/m68k/m680x0/m68020/atomic-machine.h b/sysdeps/m68k/m680x0/m68020/atomic-machine.h
index 70cda007341b49d28dc1d9847a88198b47589db4..72a3b81642c1a23207054092d16bb856556cd897 100644
--- a/sysdeps/m68k/m680x0/m68020/atomic-machine.h
+++ b/sysdeps/m68k/m680x0/m68020/atomic-machine.h
@@ -218,15 +218,3 @@
 			   : "memory");					      \
        }								      \
      __result; })
-
-#define atomic_bit_set(mem, bit) \
-  __asm __volatile ("bfset %0{%1,#1}"					      \
-		    : "+m" (*(mem))					      \
-		    : "di" (sizeof (*(mem)) * 8 - (bit) - 1))
-
-#define atomic_bit_test_set(mem, bit) \
-  ({ char __result;							      \
-     __asm __volatile ("bfset %1{%2,#1}; sne %0"			      \
-		       : "=dm" (__result), "+m" (*(mem))		      \
-		       : "di" (sizeof (*(mem)) * 8 - (bit) - 1));	      \
-     __result; })
diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h
index 39af275c254ef3e737736bd5c38099bada8746d6..e8bd4c6dcbf7b7785857ff08c15dac9a5cfb2a5d 100644
--- a/sysdeps/nptl/pthreadP.h
+++ b/sysdeps/nptl/pthreadP.h
@@ -43,12 +43,6 @@
   atomic_compare_and_exchange_val_acq (&(descr)->member, new, old)
 #endif
 
-#ifndef THREAD_ATOMIC_BIT_SET
-# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
-  atomic_bit_set (&(descr)->member, bit)
-#endif
-
-
 static inline short max_adaptive_count (void)
 {
 #if HAVE_TUNABLES
@@ -276,7 +270,7 @@ __do_cancel (void)
   struct pthread *self = THREAD_SELF;
 
   /* Make sure we get no more cancellations.  */
-  atomic_bit_set (&self->cancelhandling, EXITING_BIT);
+  atomic_fetch_or_acquire (&self->cancelhandling, 1 << EXITING_BIT);
 
   __pthread_unwind ((__pthread_unwind_buf_t *)
 		    THREAD_GETMEM (self, cleanup_jmp_buf));
diff --git a/sysdeps/s390/atomic-machine.h b/sysdeps/s390/atomic-machine.h
index fe7f04cf30cf5b62fea743a72384f8fc248d2d25..db31e377970c4ab6285ef65fb21419db7c6ca373 100644
--- a/sysdeps/s390/atomic-machine.h
+++ b/sysdeps/s390/atomic-machine.h
@@ -93,17 +93,6 @@
     atomic_or_val (mem, mask);			\
   } while (0)
 
-/* Atomically *mem |= 1 << bit and return true if the bit was set in old value
-   of *mem.  */
-/* The load-and-or instruction is used on z196 zarch and higher cpus
-   instead of a loop with compare-and-swap instruction.  */
-#define atomic_bit_test_set(mem, bit)					\
-  ({ __typeof (*(mem)) __atg14_old;					\
-    __typeof (mem) __atg14_memp = (mem);				\
-    __typeof (*(mem)) __atg14_mask = ((__typeof (*(mem))) 1 << (bit));	\
-    __atg14_old = atomic_or_val (__atg14_memp, __atg14_mask);		\
-    __atg14_old & __atg14_mask; })
-
 /* Atomically *mem &= mask and return the old value of *mem.  */
 /* The gcc builtin uses load-and-and instruction on z196 zarch and higher cpus
    instead of a loop with compare-and-swap instruction.  */
diff --git a/sysdeps/unix/sysv/linux/riscv/atomic-machine.h b/sysdeps/unix/sysv/linux/riscv/atomic-machine.h
index deb6cadaa35e841e23cee55d169a20538bdb1f8d..c5eb5c639fb59d7395c0a2d8f4fd72452845914b 100644
--- a/sysdeps/unix/sysv/linux/riscv/atomic-machine.h
+++ b/sysdeps/unix/sysv/linux/riscv/atomic-machine.h
@@ -159,10 +159,6 @@
 
 # define atomic_max_relaxed(mem, value) asm_amo ("amomaxu", "", mem, value)
 
-# define atomic_bit_test_set(mem, bit)                   \
-  ({ typeof (*mem) __mask = (typeof (*mem))1 << (bit);    \
-     asm_amo ("amoor", ".aq", mem, __mask) & __mask; })
-
 #else /* __riscv_atomic */
 # error "ISAs that do not subsume the A extension are not supported"
 #endif /* !__riscv_atomic */
diff --git a/sysdeps/unix/sysv/linux/sh/atomic-machine.h b/sysdeps/unix/sysv/linux/sh/atomic-machine.h
index aead3d5866d12b3b6bba27a1ef4a273400e1715b..3bf84d06bc875fca75b38b5ab01fca6683d390f7 100644
--- a/sysdeps/unix/sysv/linux/sh/atomic-machine.h
+++ b/sysdeps/unix/sysv/linux/sh/atomic-machine.h
@@ -301,97 +301,3 @@
 
 #define atomic_increment_and_test(mem) atomic_add_zero((mem), 1)
 #define atomic_decrement_and_test(mem) atomic_add_zero((mem), -1)
-
-#define atomic_bit_set(mem, bit) \
-  (void) ({ unsigned int __mask = 1 << (bit); \
-	    if (sizeof (*(mem)) == 1) \
-	      __asm __volatile ("\
-		mova 1f,r0\n\
-		mov r15,r1\n\
-		.align 2\n\
-		mov #(0f-1f),r15\n\
-	     0: mov.b @%0,r2\n\
-		or %1,r2\n\
-		mov.b r2,@%0\n\
-	     1: mov r1,r15"\
-		: : "u" (mem), "u" (__mask) \
-		: "r0", "r1", "r2", "memory"); \
-	    else if (sizeof (*(mem)) == 2) \
-	      __asm __volatile ("\
-		mova 1f,r0\n\
-		mov r15,r1\n\
-		.align 2\n\
-		mov #(0f-1f),r15\n\
-	     0: mov.w @%0,r2\n\
-		or %1,r2\n\
-		mov.w r2,@%0\n\
-	     1: mov r1,r15"\
-		: : "u" (mem), "u" (__mask) \
-		: "r0", "r1", "r2", "memory"); \
-	    else if (sizeof (*(mem)) == 4) \
-	      __asm __volatile ("\
-		mova 1f,r0\n\
-		mov r15,r1\n\
-		.align 2\n\
-		mov #(0f-1f),r15\n\
-	     0: mov.l @%0,r2\n\
-		or %1,r2\n\
-		mov.l r2,@%0\n\
-	     1: mov r1,r15"\
-		: : "u" (mem), "u" (__mask) \
-		: "r0", "r1", "r2", "memory"); \
-	    else \
-	      abort (); \
-	    })
-
-#define atomic_bit_test_set(mem, bit) \
-  ({ unsigned int __mask = 1 << (bit); \
-     unsigned int __result = __mask; \
-     if (sizeof (*(mem)) == 1) \
-       __asm __volatile ("\
-	  mova 1f,r0\n\
-	  .align 2\n\
-	  mov r15,r1\n\
-	  mov #(0f-1f),r15\n\
-       0: mov.b @%2,r2\n\
-	  mov r2,r3\n\
-	  or %1,r2\n\
-	  mov.b r2,@%2\n\
-       1: mov r1,r15\n\
-	  and r3,%0"\
-	: "=&r" (__result), "=&r" (__mask) \
-	: "u" (mem), "0" (__result), "1" (__mask) \
-	: "r0", "r1", "r2", "r3", "memory");	\
-     else if (sizeof (*(mem)) == 2) \
-       __asm __volatile ("\
-	  mova 1f,r0\n\
-	  .align 2\n\
-	  mov r15,r1\n\
-	  mov #(0f-1f),r15\n\
-       0: mov.w @%2,r2\n\
-	  mov r2,r3\n\
-	  or %1,r2\n\
-	  mov.w %1,@%2\n\
-       1: mov r1,r15\n\
-	  and r3,%0"\
-	: "=&r" (__result), "=&r" (__mask) \
-	: "u" (mem), "0" (__result), "1" (__mask) \
-	: "r0", "r1", "r2", "r3", "memory"); \
-     else if (sizeof (*(mem)) == 4) \
-       __asm __volatile ("\
-	  mova 1f,r0\n\
-	  .align 2\n\
-	  mov r15,r1\n\
-	  mov #(0f-1f),r15\n\
-       0: mov.l @%2,r2\n\
-	  mov r2,r3\n\
-	  or r2,%1\n\
-	  mov.l %1,@%2\n\
-       1: mov r1,r15\n\
-	  and r3,%0"\
-	: "=&r" (__result), "=&r" (__mask) \
-	: "u" (mem), "0" (__result), "1" (__mask) \
-	: "r0", "r1", "r2", "r3", "memory"); \
-     else \
-       abort (); \
-     __result; })
diff --git a/sysdeps/x86/atomic-machine.h b/sysdeps/x86/atomic-machine.h
index c27a437ec1b129fc18ca832708a69627959fc107..6adc219bf69f1507624618ed931dad11fea4e150 100644
--- a/sysdeps/x86/atomic-machine.h
+++ b/sysdeps/x86/atomic-machine.h
@@ -292,56 +292,6 @@
      __result; })
 
 
-#define atomic_bit_set(mem, bit) \
-  do {									      \
-    if (sizeof (*mem) == 1)						      \
-      __asm __volatile (LOCK_PREFIX "orb %b2, %0"			      \
-			: "=m" (*mem)					      \
-			: "m" (*mem), IBR_CONSTRAINT (1L << (bit)));	      \
-    else if (sizeof (*mem) == 2)					      \
-      __asm __volatile (LOCK_PREFIX "orw %w2, %0"			      \
-			: "=m" (*mem)					      \
-			: "m" (*mem), "ir" (1L << (bit)));		      \
-    else if (sizeof (*mem) == 4)					      \
-      __asm __volatile (LOCK_PREFIX "orl %2, %0"			      \
-			: "=m" (*mem)					      \
-			: "m" (*mem), "ir" (1L << (bit)));		      \
-    else if (__builtin_constant_p (bit) && (bit) < 32)			      \
-      __asm __volatile (LOCK_PREFIX "orq %2, %0"			      \
-			: "=m" (*mem)					      \
-			: "m" (*mem), "i" (1L << (bit)));		      \
-    else if (__HAVE_64B_ATOMICS)					      \
-      __asm __volatile (LOCK_PREFIX "orq %q2, %0"			      \
-			: "=m" (*mem)					      \
-			: "m" (*mem), "r" (1UL << (bit)));		      \
-    else								      \
-      __atomic_link_error ();						      \
-  } while (0)
-
-
-#define atomic_bit_test_set(mem, bit) \
-  ({ unsigned char __result;						      \
-     if (sizeof (*mem) == 1)						      \
-       __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0"		      \
-			 : "=q" (__result), "=m" (*mem)			      \
-			 : "m" (*mem), IBR_CONSTRAINT (bit));		      \
-     else if (sizeof (*mem) == 2)					      \
-       __asm __volatile (LOCK_PREFIX "btsw %3, %1; setc %0"		      \
-			 : "=q" (__result), "=m" (*mem)			      \
-			 : "m" (*mem), "ir" (bit));			      \
-     else if (sizeof (*mem) == 4)					      \
-       __asm __volatile (LOCK_PREFIX "btsl %3, %1; setc %0"		      \
-			 : "=q" (__result), "=m" (*mem)			      \
-			 : "m" (*mem), "ir" (bit));			      \
-     else if (__HAVE_64B_ATOMICS)					      \
-       __asm __volatile (LOCK_PREFIX "btsq %3, %1; setc %0"		      \
-			 : "=q" (__result), "=m" (*mem)			      \
-			 : "m" (*mem), "ir" (bit));			      \
-     else							      	      \
-       __atomic_link_error ();					      \
-     __result; })
-
-
 #define __arch_and_body(lock, mem, mask) \
   do {									      \
     if (sizeof (*mem) == 1)						      \

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 15:14 [PATCH 3/7] Remove atomic_bit_set/bit_test_set Wilco Dijkstra
@ 2022-07-06 16:14 ` Noah Goldstein
  2022-07-06 18:25   ` Noah Goldstein
  0 siblings, 1 reply; 18+ messages in thread
From: Noah Goldstein @ 2022-07-06 16:14 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GNU C Library

On Wed, Jul 6, 2022 at 8:14 AM Wilco Dijkstra via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> Replace the 3 uses of atomic_bit_set and atomic_bit_test_set with atomic_fetch_or_acquire.
> This enables removal of a lot of target specific implementations.
>
> ---
>
> diff --git a/include/atomic.h b/include/atomic.h
> index 73cc772f0149f94fa0c3e14fa858fa89fee6985f..ed1fc38e7569fcbdd0473ab6f69956a44d62354f 100644
> --- a/include/atomic.h
> +++ b/include/atomic.h
> @@ -255,28 +255,6 @@
>  #endif
>
>
> -#ifndef atomic_bit_set
> -# define atomic_bit_set(mem, bit) \
> -  (void) atomic_bit_test_set(mem, bit)
> -#endif
> -
> -
> -#ifndef atomic_bit_test_set
> -# define atomic_bit_test_set(mem, bit) \
> -  ({ __typeof (*(mem)) __atg14_old;                                          \
> -     __typeof (mem) __atg14_memp = (mem);                                    \
> -     __typeof (*(mem)) __atg14_mask = ((__typeof (*(mem))) 1 << (bit));              \
> -                                                                             \
> -     do                                                                              \
> -       __atg14_old = (*__atg14_memp);                                        \
> -     while (__builtin_expect                                                 \
> -           (atomic_compare_and_exchange_bool_acq (__atg14_memp,              \
> -                                                  __atg14_old | __atg14_mask,\
> -                                                  __atg14_old), 0));         \
> -                                                                             \
> -     __atg14_old & __atg14_mask; })
> -#endif
> -
>  /* Atomically *mem &= mask.  */
>  #ifndef atomic_and
>  # define atomic_and(mem, mask) \
> diff --git a/misc/tst-atomic.c b/misc/tst-atomic.c
> index 13acf0da8f1ab41774175401a68ce28a0eb7ed30..765b036873e181d6c49299c0ad08d5dbfcc05387 100644
> --- a/misc/tst-atomic.c
> +++ b/misc/tst-atomic.c
> @@ -325,66 +325,6 @@ do_test (void)
>        ret = 1;
>      }
>
> -  mem = 0;
> -  atomic_bit_set (&mem, 1);
> -  if (mem != 2)
> -    {
> -      puts ("atomic_bit_set test 1 failed");
> -      ret = 1;
> -    }
> -
> -  mem = 8;
> -  atomic_bit_set (&mem, 3);
> -  if (mem != 8)
> -    {
> -      puts ("atomic_bit_set test 2 failed");
> -      ret = 1;
> -    }
> -
> -#ifdef TEST_ATOMIC64
> -  mem = 16;
> -  atomic_bit_set (&mem, 35);
> -  if (mem != 0x800000010LL)
> -    {
> -      puts ("atomic_bit_set test 3 failed");
> -      ret = 1;
> -    }
> -#endif
> -
> -  mem = 0;
> -  if (atomic_bit_test_set (&mem, 1)
> -      || mem != 2)
> -    {
> -      puts ("atomic_bit_test_set test 1 failed");
> -      ret = 1;
> -    }
> -
> -  mem = 8;
> -  if (! atomic_bit_test_set (&mem, 3)
> -      || mem != 8)
> -    {
> -      puts ("atomic_bit_test_set test 2 failed");
> -      ret = 1;
> -    }
> -
> -#ifdef TEST_ATOMIC64
> -  mem = 16;
> -  if (atomic_bit_test_set (&mem, 35)
> -      || mem != 0x800000010LL)
> -    {
> -      puts ("atomic_bit_test_set test 3 failed");
> -      ret = 1;
> -    }
> -
> -  mem = 0x100000000LL;
> -  if (! atomic_bit_test_set (&mem, 32)
> -      || mem != 0x100000000LL)
> -    {
> -      puts ("atomic_bit_test_set test 4 failed");
> -      ret = 1;
> -    }
> -#endif
> -
>    /* Tests for C11-like atomics.  */
>    mem = 11;
>    if (atomic_load_relaxed (&mem) != 11 || atomic_load_acquire (&mem) != 11)
> diff --git a/nptl/nptl_free_tcb.c b/nptl/nptl_free_tcb.c
> index 0d31143ac849de6398e06c399b94813ae57dcff3..b63ec3bc3c1df4a1fc5272a24d453e679dbedd5e 100644
> --- a/nptl/nptl_free_tcb.c
> +++ b/nptl/nptl_free_tcb.c
> @@ -24,7 +24,8 @@ void
>  __nptl_free_tcb (struct pthread *pd)
>  {
>    /* The thread is exiting now.  */
> -  if (atomic_bit_test_set (&pd->cancelhandling, TERMINATED_BIT) == 0)
> +  if ((atomic_fetch_or_acquire (&pd->cancelhandling, 1 << TERMINATED_BIT)
> +      & (1 << TERMINATED_BIT)) == 0)

I think this change may cause regressions on x86 because at the moment
it seems the only way to generate `lock bts` is with inline assembly:

https://godbolt.org/z/bdchE53Ps

Can we keep atomic bit_test_set?
>      {
>        /* Free TPP data.  */
>        if (pd->tpp != NULL)
> diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
> index 870a8fcb34eb43b58c2260fee6a4624f0fbbd469..4b7f3edc384748f300ca935ad878eb0e3547e163 100644
> --- a/nptl/pthread_create.c
> +++ b/nptl/pthread_create.c
> @@ -487,7 +487,7 @@ start_thread (void *arg)
>    /* The thread is exiting now.  Don't set this bit until after we've hit
>       the event-reporting breakpoint, so that td_thr_get_info on us while at
>       the breakpoint reports TD_THR_RUN state rather than TD_THR_ZOMBIE.  */
> -  atomic_bit_set (&pd->cancelhandling, EXITING_BIT);
> +  atomic_fetch_or_acquire (&pd->cancelhandling, 1 << EXITING_BIT);
>
>    if (__glibc_unlikely (atomic_decrement_and_test (&__nptl_nthreads)))
>      /* This was the last thread.  */
> diff --git a/sysdeps/alpha/atomic-machine.h b/sysdeps/alpha/atomic-machine.h
> index df28b90261aa485125d951bc1abf76602730bfd0..115a9df5d77cd08bcb0a49d9b59f0c53b4a20d78 100644
> --- a/sysdeps/alpha/atomic-machine.h
> +++ b/sysdeps/alpha/atomic-machine.h
> @@ -325,15 +325,6 @@
>  #define atomic_exchange_and_add(mem, value) \
>    __atomic_val_bysize (__arch_exchange_and_add, int, mem, value, __MB, __MB)
>
> -
> -/* ??? Blah, I'm lazy.  Implement these later.  Can do better than the
> -   compare-and-exchange loop provided by generic code.
> -
> -#define atomic_decrement_if_positive(mem)
> -#define atomic_bit_test_set(mem, bit)
> -
> -*/
> -
>  #define atomic_full_barrier()  __asm ("mb" : : : "memory");
>  #define atomic_read_barrier()  __asm ("mb" : : : "memory");
>  #define atomic_write_barrier() __asm ("wmb" : : : "memory");
> diff --git a/sysdeps/ia64/atomic-machine.h b/sysdeps/ia64/atomic-machine.h
> index 71afcfe0d0031a6ceae5879a2b3b19ed2ee2f110..b2f5d2f4774cc2503c7595cb82f30f60fbcbe89c 100644
> --- a/sysdeps/ia64/atomic-machine.h
> +++ b/sysdeps/ia64/atomic-machine.h
> @@ -77,20 +77,4 @@
>       while (__builtin_expect (__val != __oldval, 0));                        \
>       __oldval; })
>
> -#define atomic_bit_test_set(mem, bit) \
> -  ({ __typeof (*mem) __oldval, __val;                                        \
> -     __typeof (mem) __memp = (mem);                                          \
> -     __typeof (*mem) __mask = ((__typeof (*mem)) 1 << (bit));                \
> -                                                                             \
> -     __val = (*__memp);                                                              \
> -     do                                                                              \
> -       {                                                                     \
> -        __oldval = __val;                                                    \
> -        __val = atomic_compare_and_exchange_val_acq (__memp,                 \
> -                                                     __oldval | __mask,      \
> -                                                     __oldval);              \
> -       }                                                                     \
> -     while (__builtin_expect (__val != __oldval, 0));                        \
> -     __oldval & __mask; })
> -
>  #define atomic_full_barrier() __sync_synchronize ()
> diff --git a/sysdeps/m68k/m680x0/m68020/atomic-machine.h b/sysdeps/m68k/m680x0/m68020/atomic-machine.h
> index 70cda007341b49d28dc1d9847a88198b47589db4..72a3b81642c1a23207054092d16bb856556cd897 100644
> --- a/sysdeps/m68k/m680x0/m68020/atomic-machine.h
> +++ b/sysdeps/m68k/m680x0/m68020/atomic-machine.h
> @@ -218,15 +218,3 @@
>                            : "memory");                                       \
>         }                                                                     \
>       __result; })
> -
> -#define atomic_bit_set(mem, bit) \
> -  __asm __volatile ("bfset %0{%1,#1}"                                        \
> -                   : "+m" (*(mem))                                           \
> -                   : "di" (sizeof (*(mem)) * 8 - (bit) - 1))
> -
> -#define atomic_bit_test_set(mem, bit) \
> -  ({ char __result;                                                          \
> -     __asm __volatile ("bfset %1{%2,#1}; sne %0"                             \
> -                      : "=dm" (__result), "+m" (*(mem))                      \
> -                      : "di" (sizeof (*(mem)) * 8 - (bit) - 1));             \
> -     __result; })
> diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h
> index 39af275c254ef3e737736bd5c38099bada8746d6..e8bd4c6dcbf7b7785857ff08c15dac9a5cfb2a5d 100644
> --- a/sysdeps/nptl/pthreadP.h
> +++ b/sysdeps/nptl/pthreadP.h
> @@ -43,12 +43,6 @@
>    atomic_compare_and_exchange_val_acq (&(descr)->member, new, old)
>  #endif
>
> -#ifndef THREAD_ATOMIC_BIT_SET
> -# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
> -  atomic_bit_set (&(descr)->member, bit)
> -#endif
> -
> -
>  static inline short max_adaptive_count (void)
>  {
>  #if HAVE_TUNABLES
> @@ -276,7 +270,7 @@ __do_cancel (void)
>    struct pthread *self = THREAD_SELF;
>
>    /* Make sure we get no more cancellations.  */
> -  atomic_bit_set (&self->cancelhandling, EXITING_BIT);
> +  atomic_fetch_or_acquire (&self->cancelhandling, 1 << EXITING_BIT);
>
>    __pthread_unwind ((__pthread_unwind_buf_t *)
>                     THREAD_GETMEM (self, cleanup_jmp_buf));
> diff --git a/sysdeps/s390/atomic-machine.h b/sysdeps/s390/atomic-machine.h
> index fe7f04cf30cf5b62fea743a72384f8fc248d2d25..db31e377970c4ab6285ef65fb21419db7c6ca373 100644
> --- a/sysdeps/s390/atomic-machine.h
> +++ b/sysdeps/s390/atomic-machine.h
> @@ -93,17 +93,6 @@
>      atomic_or_val (mem, mask);                 \
>    } while (0)
>
> -/* Atomically *mem |= 1 << bit and return true if the bit was set in old value
> -   of *mem.  */
> -/* The load-and-or instruction is used on z196 zarch and higher cpus
> -   instead of a loop with compare-and-swap instruction.  */
> -#define atomic_bit_test_set(mem, bit)                                  \
> -  ({ __typeof (*(mem)) __atg14_old;                                    \
> -    __typeof (mem) __atg14_memp = (mem);                               \
> -    __typeof (*(mem)) __atg14_mask = ((__typeof (*(mem))) 1 << (bit)); \
> -    __atg14_old = atomic_or_val (__atg14_memp, __atg14_mask);          \
> -    __atg14_old & __atg14_mask; })
> -
>  /* Atomically *mem &= mask and return the old value of *mem.  */
>  /* The gcc builtin uses load-and-and instruction on z196 zarch and higher cpus
>     instead of a loop with compare-and-swap instruction.  */
> diff --git a/sysdeps/unix/sysv/linux/riscv/atomic-machine.h b/sysdeps/unix/sysv/linux/riscv/atomic-machine.h
> index deb6cadaa35e841e23cee55d169a20538bdb1f8d..c5eb5c639fb59d7395c0a2d8f4fd72452845914b 100644
> --- a/sysdeps/unix/sysv/linux/riscv/atomic-machine.h
> +++ b/sysdeps/unix/sysv/linux/riscv/atomic-machine.h
> @@ -159,10 +159,6 @@
>
>  # define atomic_max_relaxed(mem, value) asm_amo ("amomaxu", "", mem, value)
>
> -# define atomic_bit_test_set(mem, bit)                   \
> -  ({ typeof (*mem) __mask = (typeof (*mem))1 << (bit);    \
> -     asm_amo ("amoor", ".aq", mem, __mask) & __mask; })
> -
>  #else /* __riscv_atomic */
>  # error "ISAs that do not subsume the A extension are not supported"
>  #endif /* !__riscv_atomic */
> diff --git a/sysdeps/unix/sysv/linux/sh/atomic-machine.h b/sysdeps/unix/sysv/linux/sh/atomic-machine.h
> index aead3d5866d12b3b6bba27a1ef4a273400e1715b..3bf84d06bc875fca75b38b5ab01fca6683d390f7 100644
> --- a/sysdeps/unix/sysv/linux/sh/atomic-machine.h
> +++ b/sysdeps/unix/sysv/linux/sh/atomic-machine.h
> @@ -301,97 +301,3 @@
>
>  #define atomic_increment_and_test(mem) atomic_add_zero((mem), 1)
>  #define atomic_decrement_and_test(mem) atomic_add_zero((mem), -1)
> -
> -#define atomic_bit_set(mem, bit) \
> -  (void) ({ unsigned int __mask = 1 << (bit); \
> -           if (sizeof (*(mem)) == 1) \
> -             __asm __volatile ("\
> -               mova 1f,r0\n\
> -               mov r15,r1\n\
> -               .align 2\n\
> -               mov #(0f-1f),r15\n\
> -            0: mov.b @%0,r2\n\
> -               or %1,r2\n\
> -               mov.b r2,@%0\n\
> -            1: mov r1,r15"\
> -               : : "u" (mem), "u" (__mask) \
> -               : "r0", "r1", "r2", "memory"); \
> -           else if (sizeof (*(mem)) == 2) \
> -             __asm __volatile ("\
> -               mova 1f,r0\n\
> -               mov r15,r1\n\
> -               .align 2\n\
> -               mov #(0f-1f),r15\n\
> -            0: mov.w @%0,r2\n\
> -               or %1,r2\n\
> -               mov.w r2,@%0\n\
> -            1: mov r1,r15"\
> -               : : "u" (mem), "u" (__mask) \
> -               : "r0", "r1", "r2", "memory"); \
> -           else if (sizeof (*(mem)) == 4) \
> -             __asm __volatile ("\
> -               mova 1f,r0\n\
> -               mov r15,r1\n\
> -               .align 2\n\
> -               mov #(0f-1f),r15\n\
> -            0: mov.l @%0,r2\n\
> -               or %1,r2\n\
> -               mov.l r2,@%0\n\
> -            1: mov r1,r15"\
> -               : : "u" (mem), "u" (__mask) \
> -               : "r0", "r1", "r2", "memory"); \
> -           else \
> -             abort (); \
> -           })
> -
> -#define atomic_bit_test_set(mem, bit) \
> -  ({ unsigned int __mask = 1 << (bit); \
> -     unsigned int __result = __mask; \
> -     if (sizeof (*(mem)) == 1) \
> -       __asm __volatile ("\
> -         mova 1f,r0\n\
> -         .align 2\n\
> -         mov r15,r1\n\
> -         mov #(0f-1f),r15\n\
> -       0: mov.b @%2,r2\n\
> -         mov r2,r3\n\
> -         or %1,r2\n\
> -         mov.b r2,@%2\n\
> -       1: mov r1,r15\n\
> -         and r3,%0"\
> -       : "=&r" (__result), "=&r" (__mask) \
> -       : "u" (mem), "0" (__result), "1" (__mask) \
> -       : "r0", "r1", "r2", "r3", "memory");    \
> -     else if (sizeof (*(mem)) == 2) \
> -       __asm __volatile ("\
> -         mova 1f,r0\n\
> -         .align 2\n\
> -         mov r15,r1\n\
> -         mov #(0f-1f),r15\n\
> -       0: mov.w @%2,r2\n\
> -         mov r2,r3\n\
> -         or %1,r2\n\
> -         mov.w %1,@%2\n\
> -       1: mov r1,r15\n\
> -         and r3,%0"\
> -       : "=&r" (__result), "=&r" (__mask) \
> -       : "u" (mem), "0" (__result), "1" (__mask) \
> -       : "r0", "r1", "r2", "r3", "memory"); \
> -     else if (sizeof (*(mem)) == 4) \
> -       __asm __volatile ("\
> -         mova 1f,r0\n\
> -         .align 2\n\
> -         mov r15,r1\n\
> -         mov #(0f-1f),r15\n\
> -       0: mov.l @%2,r2\n\
> -         mov r2,r3\n\
> -         or r2,%1\n\
> -         mov.l %1,@%2\n\
> -       1: mov r1,r15\n\
> -         and r3,%0"\
> -       : "=&r" (__result), "=&r" (__mask) \
> -       : "u" (mem), "0" (__result), "1" (__mask) \
> -       : "r0", "r1", "r2", "r3", "memory"); \
> -     else \
> -       abort (); \
> -     __result; })
> diff --git a/sysdeps/x86/atomic-machine.h b/sysdeps/x86/atomic-machine.h
> index c27a437ec1b129fc18ca832708a69627959fc107..6adc219bf69f1507624618ed931dad11fea4e150 100644
> --- a/sysdeps/x86/atomic-machine.h
> +++ b/sysdeps/x86/atomic-machine.h
> @@ -292,56 +292,6 @@
>       __result; })
>
>
> -#define atomic_bit_set(mem, bit) \
> -  do {                                                                       \
> -    if (sizeof (*mem) == 1)                                                  \
> -      __asm __volatile (LOCK_PREFIX "orb %b2, %0"                            \
> -                       : "=m" (*mem)                                         \
> -                       : "m" (*mem), IBR_CONSTRAINT (1L << (bit)));          \
> -    else if (sizeof (*mem) == 2)                                             \
> -      __asm __volatile (LOCK_PREFIX "orw %w2, %0"                            \
> -                       : "=m" (*mem)                                         \
> -                       : "m" (*mem), "ir" (1L << (bit)));                    \
> -    else if (sizeof (*mem) == 4)                                             \
> -      __asm __volatile (LOCK_PREFIX "orl %2, %0"                             \
> -                       : "=m" (*mem)                                         \
> -                       : "m" (*mem), "ir" (1L << (bit)));                    \
> -    else if (__builtin_constant_p (bit) && (bit) < 32)                       \
> -      __asm __volatile (LOCK_PREFIX "orq %2, %0"                             \
> -                       : "=m" (*mem)                                         \
> -                       : "m" (*mem), "i" (1L << (bit)));                     \
> -    else if (__HAVE_64B_ATOMICS)                                             \
> -      __asm __volatile (LOCK_PREFIX "orq %q2, %0"                            \
> -                       : "=m" (*mem)                                         \
> -                       : "m" (*mem), "r" (1UL << (bit)));                    \
> -    else                                                                     \
> -      __atomic_link_error ();                                                \
> -  } while (0)
> -
> -
> -#define atomic_bit_test_set(mem, bit) \
> -  ({ unsigned char __result;                                                 \
> -     if (sizeof (*mem) == 1)                                                 \
> -       __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0"                  \
> -                        : "=q" (__result), "=m" (*mem)                       \
> -                        : "m" (*mem), IBR_CONSTRAINT (bit));                 \
> -     else if (sizeof (*mem) == 2)                                            \
> -       __asm __volatile (LOCK_PREFIX "btsw %3, %1; setc %0"                  \
> -                        : "=q" (__result), "=m" (*mem)                       \
> -                        : "m" (*mem), "ir" (bit));                           \
> -     else if (sizeof (*mem) == 4)                                            \
> -       __asm __volatile (LOCK_PREFIX "btsl %3, %1; setc %0"                  \
> -                        : "=q" (__result), "=m" (*mem)                       \
> -                        : "m" (*mem), "ir" (bit));                           \
> -     else if (__HAVE_64B_ATOMICS)                                            \
> -       __asm __volatile (LOCK_PREFIX "btsq %3, %1; setc %0"                  \
> -                        : "=q" (__result), "=m" (*mem)                       \
> -                        : "m" (*mem), "ir" (bit));                           \
> -     else                                                                    \
> -       __atomic_link_error ();                                       \
> -     __result; })
> -
> -
>  #define __arch_and_body(lock, mem, mask) \
>    do {                                                                       \
>      if (sizeof (*mem) == 1)                                                  \

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 16:14 ` Noah Goldstein
@ 2022-07-06 18:25   ` Noah Goldstein
  2022-07-06 18:59     ` Wilco Dijkstra
  0 siblings, 1 reply; 18+ messages in thread
From: Noah Goldstein @ 2022-07-06 18:25 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GNU C Library

On Wed, Jul 6, 2022 at 9:14 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Wed, Jul 6, 2022 at 8:14 AM Wilco Dijkstra via Libc-alpha
> <libc-alpha@sourceware.org> wrote:
> >
> > Replace the 3 uses of atomic_bit_set and atomic_bit_test_set with atomic_fetch_or_acquire.
> > This enables removal of a lot of target specific implementations.
> >
> > ---
> >
> > diff --git a/include/atomic.h b/include/atomic.h
> > index 73cc772f0149f94fa0c3e14fa858fa89fee6985f..ed1fc38e7569fcbdd0473ab6f69956a44d62354f 100644
> > --- a/include/atomic.h
> > +++ b/include/atomic.h
> > @@ -255,28 +255,6 @@
> >  #endif
> >
> >
> > -#ifndef atomic_bit_set
> > -# define atomic_bit_set(mem, bit) \
> > -  (void) atomic_bit_test_set(mem, bit)
> > -#endif
> > -
> > -
> > -#ifndef atomic_bit_test_set
> > -# define atomic_bit_test_set(mem, bit) \
> > -  ({ __typeof (*(mem)) __atg14_old;                                          \
> > -     __typeof (mem) __atg14_memp = (mem);                                    \
> > -     __typeof (*(mem)) __atg14_mask = ((__typeof (*(mem))) 1 << (bit));              \
> > -                                                                             \
> > -     do                                                                              \
> > -       __atg14_old = (*__atg14_memp);                                        \
> > -     while (__builtin_expect                                                 \
> > -           (atomic_compare_and_exchange_bool_acq (__atg14_memp,              \
> > -                                                  __atg14_old | __atg14_mask,\
> > -                                                  __atg14_old), 0));         \
> > -                                                                             \
> > -     __atg14_old & __atg14_mask; })
> > -#endif
> > -
> >  /* Atomically *mem &= mask.  */
> >  #ifndef atomic_and
> >  # define atomic_and(mem, mask) \
> > diff --git a/misc/tst-atomic.c b/misc/tst-atomic.c
> > index 13acf0da8f1ab41774175401a68ce28a0eb7ed30..765b036873e181d6c49299c0ad08d5dbfcc05387 100644
> > --- a/misc/tst-atomic.c
> > +++ b/misc/tst-atomic.c
> > @@ -325,66 +325,6 @@ do_test (void)
> >        ret = 1;
> >      }
> >
> > -  mem = 0;
> > -  atomic_bit_set (&mem, 1);
> > -  if (mem != 2)
> > -    {
> > -      puts ("atomic_bit_set test 1 failed");
> > -      ret = 1;
> > -    }
> > -
> > -  mem = 8;
> > -  atomic_bit_set (&mem, 3);
> > -  if (mem != 8)
> > -    {
> > -      puts ("atomic_bit_set test 2 failed");
> > -      ret = 1;
> > -    }
> > -
> > -#ifdef TEST_ATOMIC64
> > -  mem = 16;
> > -  atomic_bit_set (&mem, 35);
> > -  if (mem != 0x800000010LL)
> > -    {
> > -      puts ("atomic_bit_set test 3 failed");
> > -      ret = 1;
> > -    }
> > -#endif
> > -
> > -  mem = 0;
> > -  if (atomic_bit_test_set (&mem, 1)
> > -      || mem != 2)
> > -    {
> > -      puts ("atomic_bit_test_set test 1 failed");
> > -      ret = 1;
> > -    }
> > -
> > -  mem = 8;
> > -  if (! atomic_bit_test_set (&mem, 3)
> > -      || mem != 8)
> > -    {
> > -      puts ("atomic_bit_test_set test 2 failed");
> > -      ret = 1;
> > -    }
> > -
> > -#ifdef TEST_ATOMIC64
> > -  mem = 16;
> > -  if (atomic_bit_test_set (&mem, 35)
> > -      || mem != 0x800000010LL)
> > -    {
> > -      puts ("atomic_bit_test_set test 3 failed");
> > -      ret = 1;
> > -    }
> > -
> > -  mem = 0x100000000LL;
> > -  if (! atomic_bit_test_set (&mem, 32)
> > -      || mem != 0x100000000LL)
> > -    {
> > -      puts ("atomic_bit_test_set test 4 failed");
> > -      ret = 1;
> > -    }
> > -#endif
> > -
> >    /* Tests for C11-like atomics.  */
> >    mem = 11;
> >    if (atomic_load_relaxed (&mem) != 11 || atomic_load_acquire (&mem) != 11)
> > diff --git a/nptl/nptl_free_tcb.c b/nptl/nptl_free_tcb.c
> > index 0d31143ac849de6398e06c399b94813ae57dcff3..b63ec3bc3c1df4a1fc5272a24d453e679dbedd5e 100644
> > --- a/nptl/nptl_free_tcb.c
> > +++ b/nptl/nptl_free_tcb.c
> > @@ -24,7 +24,8 @@ void
> >  __nptl_free_tcb (struct pthread *pd)
> >  {
> >    /* The thread is exiting now.  */
> > -  if (atomic_bit_test_set (&pd->cancelhandling, TERMINATED_BIT) == 0)
> > +  if ((atomic_fetch_or_acquire (&pd->cancelhandling, 1 << TERMINATED_BIT)
> > +      & (1 << TERMINATED_BIT)) == 0)
>
> I think this change may cause regressions on x86 because at the moment
> it seems the only way to generate `lock bts` is with inline assembly:
>
> https://godbolt.org/z/bdchE53Ps

GCC Does have a pass for this:
https://github.com/gcc-mirror/gcc/blob/master/gcc/tree-ssa-ccp.cc
but it needs to be done in a very specific way:
https://godbolt.org/z/s9dchva6G

Can these remain macros that setup the bit_test_set properly but
use compiler intrinsics?
>
> Can we keep atomic bit_test_set?
> >      {
> >        /* Free TPP data.  */
> >        if (pd->tpp != NULL)
> > diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
> > index 870a8fcb34eb43b58c2260fee6a4624f0fbbd469..4b7f3edc384748f300ca935ad878eb0e3547e163 100644
> > --- a/nptl/pthread_create.c
> > +++ b/nptl/pthread_create.c
> > @@ -487,7 +487,7 @@ start_thread (void *arg)
> >    /* The thread is exiting now.  Don't set this bit until after we've hit
> >       the event-reporting breakpoint, so that td_thr_get_info on us while at
> >       the breakpoint reports TD_THR_RUN state rather than TD_THR_ZOMBIE.  */
> > -  atomic_bit_set (&pd->cancelhandling, EXITING_BIT);
> > +  atomic_fetch_or_acquire (&pd->cancelhandling, 1 << EXITING_BIT);
> >
> >    if (__glibc_unlikely (atomic_decrement_and_test (&__nptl_nthreads)))
> >      /* This was the last thread.  */
> > diff --git a/sysdeps/alpha/atomic-machine.h b/sysdeps/alpha/atomic-machine.h
> > index df28b90261aa485125d951bc1abf76602730bfd0..115a9df5d77cd08bcb0a49d9b59f0c53b4a20d78 100644
> > --- a/sysdeps/alpha/atomic-machine.h
> > +++ b/sysdeps/alpha/atomic-machine.h
> > @@ -325,15 +325,6 @@
> >  #define atomic_exchange_and_add(mem, value) \
> >    __atomic_val_bysize (__arch_exchange_and_add, int, mem, value, __MB, __MB)
> >
> > -
> > -/* ??? Blah, I'm lazy.  Implement these later.  Can do better than the
> > -   compare-and-exchange loop provided by generic code.
> > -
> > -#define atomic_decrement_if_positive(mem)
> > -#define atomic_bit_test_set(mem, bit)
> > -
> > -*/
> > -
> >  #define atomic_full_barrier()  __asm ("mb" : : : "memory");
> >  #define atomic_read_barrier()  __asm ("mb" : : : "memory");
> >  #define atomic_write_barrier() __asm ("wmb" : : : "memory");
> > diff --git a/sysdeps/ia64/atomic-machine.h b/sysdeps/ia64/atomic-machine.h
> > index 71afcfe0d0031a6ceae5879a2b3b19ed2ee2f110..b2f5d2f4774cc2503c7595cb82f30f60fbcbe89c 100644
> > --- a/sysdeps/ia64/atomic-machine.h
> > +++ b/sysdeps/ia64/atomic-machine.h
> > @@ -77,20 +77,4 @@
> >       while (__builtin_expect (__val != __oldval, 0));                        \
> >       __oldval; })
> >
> > -#define atomic_bit_test_set(mem, bit) \
> > -  ({ __typeof (*mem) __oldval, __val;                                        \
> > -     __typeof (mem) __memp = (mem);                                          \
> > -     __typeof (*mem) __mask = ((__typeof (*mem)) 1 << (bit));                \
> > -                                                                             \
> > -     __val = (*__memp);                                                              \
> > -     do                                                                              \
> > -       {                                                                     \
> > -        __oldval = __val;                                                    \
> > -        __val = atomic_compare_and_exchange_val_acq (__memp,                 \
> > -                                                     __oldval | __mask,      \
> > -                                                     __oldval);              \
> > -       }                                                                     \
> > -     while (__builtin_expect (__val != __oldval, 0));                        \
> > -     __oldval & __mask; })
> > -
> >  #define atomic_full_barrier() __sync_synchronize ()
> > diff --git a/sysdeps/m68k/m680x0/m68020/atomic-machine.h b/sysdeps/m68k/m680x0/m68020/atomic-machine.h
> > index 70cda007341b49d28dc1d9847a88198b47589db4..72a3b81642c1a23207054092d16bb856556cd897 100644
> > --- a/sysdeps/m68k/m680x0/m68020/atomic-machine.h
> > +++ b/sysdeps/m68k/m680x0/m68020/atomic-machine.h
> > @@ -218,15 +218,3 @@
> >                            : "memory");                                       \
> >         }                                                                     \
> >       __result; })
> > -
> > -#define atomic_bit_set(mem, bit) \
> > -  __asm __volatile ("bfset %0{%1,#1}"                                        \
> > -                   : "+m" (*(mem))                                           \
> > -                   : "di" (sizeof (*(mem)) * 8 - (bit) - 1))
> > -
> > -#define atomic_bit_test_set(mem, bit) \
> > -  ({ char __result;                                                          \
> > -     __asm __volatile ("bfset %1{%2,#1}; sne %0"                             \
> > -                      : "=dm" (__result), "+m" (*(mem))                      \
> > -                      : "di" (sizeof (*(mem)) * 8 - (bit) - 1));             \
> > -     __result; })
> > diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h
> > index 39af275c254ef3e737736bd5c38099bada8746d6..e8bd4c6dcbf7b7785857ff08c15dac9a5cfb2a5d 100644
> > --- a/sysdeps/nptl/pthreadP.h
> > +++ b/sysdeps/nptl/pthreadP.h
> > @@ -43,12 +43,6 @@
> >    atomic_compare_and_exchange_val_acq (&(descr)->member, new, old)
> >  #endif
> >
> > -#ifndef THREAD_ATOMIC_BIT_SET
> > -# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
> > -  atomic_bit_set (&(descr)->member, bit)
> > -#endif
> > -
> > -
> >  static inline short max_adaptive_count (void)
> >  {
> >  #if HAVE_TUNABLES
> > @@ -276,7 +270,7 @@ __do_cancel (void)
> >    struct pthread *self = THREAD_SELF;
> >
> >    /* Make sure we get no more cancellations.  */
> > -  atomic_bit_set (&self->cancelhandling, EXITING_BIT);
> > +  atomic_fetch_or_acquire (&self->cancelhandling, 1 << EXITING_BIT);
> >
> >    __pthread_unwind ((__pthread_unwind_buf_t *)
> >                     THREAD_GETMEM (self, cleanup_jmp_buf));
> > diff --git a/sysdeps/s390/atomic-machine.h b/sysdeps/s390/atomic-machine.h
> > index fe7f04cf30cf5b62fea743a72384f8fc248d2d25..db31e377970c4ab6285ef65fb21419db7c6ca373 100644
> > --- a/sysdeps/s390/atomic-machine.h
> > +++ b/sysdeps/s390/atomic-machine.h
> > @@ -93,17 +93,6 @@
> >      atomic_or_val (mem, mask);                 \
> >    } while (0)
> >
> > -/* Atomically *mem |= 1 << bit and return true if the bit was set in old value
> > -   of *mem.  */
> > -/* The load-and-or instruction is used on z196 zarch and higher cpus
> > -   instead of a loop with compare-and-swap instruction.  */
> > -#define atomic_bit_test_set(mem, bit)                                  \
> > -  ({ __typeof (*(mem)) __atg14_old;                                    \
> > -    __typeof (mem) __atg14_memp = (mem);                               \
> > -    __typeof (*(mem)) __atg14_mask = ((__typeof (*(mem))) 1 << (bit)); \
> > -    __atg14_old = atomic_or_val (__atg14_memp, __atg14_mask);          \
> > -    __atg14_old & __atg14_mask; })
> > -
> >  /* Atomically *mem &= mask and return the old value of *mem.  */
> >  /* The gcc builtin uses load-and-and instruction on z196 zarch and higher cpus
> >     instead of a loop with compare-and-swap instruction.  */
> > diff --git a/sysdeps/unix/sysv/linux/riscv/atomic-machine.h b/sysdeps/unix/sysv/linux/riscv/atomic-machine.h
> > index deb6cadaa35e841e23cee55d169a20538bdb1f8d..c5eb5c639fb59d7395c0a2d8f4fd72452845914b 100644
> > --- a/sysdeps/unix/sysv/linux/riscv/atomic-machine.h
> > +++ b/sysdeps/unix/sysv/linux/riscv/atomic-machine.h
> > @@ -159,10 +159,6 @@
> >
> >  # define atomic_max_relaxed(mem, value) asm_amo ("amomaxu", "", mem, value)
> >
> > -# define atomic_bit_test_set(mem, bit)                   \
> > -  ({ typeof (*mem) __mask = (typeof (*mem))1 << (bit);    \
> > -     asm_amo ("amoor", ".aq", mem, __mask) & __mask; })
> > -
> >  #else /* __riscv_atomic */
> >  # error "ISAs that do not subsume the A extension are not supported"
> >  #endif /* !__riscv_atomic */
> > diff --git a/sysdeps/unix/sysv/linux/sh/atomic-machine.h b/sysdeps/unix/sysv/linux/sh/atomic-machine.h
> > index aead3d5866d12b3b6bba27a1ef4a273400e1715b..3bf84d06bc875fca75b38b5ab01fca6683d390f7 100644
> > --- a/sysdeps/unix/sysv/linux/sh/atomic-machine.h
> > +++ b/sysdeps/unix/sysv/linux/sh/atomic-machine.h
> > @@ -301,97 +301,3 @@
> >
> >  #define atomic_increment_and_test(mem) atomic_add_zero((mem), 1)
> >  #define atomic_decrement_and_test(mem) atomic_add_zero((mem), -1)
> > -
> > -#define atomic_bit_set(mem, bit) \
> > -  (void) ({ unsigned int __mask = 1 << (bit); \
> > -           if (sizeof (*(mem)) == 1) \
> > -             __asm __volatile ("\
> > -               mova 1f,r0\n\
> > -               mov r15,r1\n\
> > -               .align 2\n\
> > -               mov #(0f-1f),r15\n\
> > -            0: mov.b @%0,r2\n\
> > -               or %1,r2\n\
> > -               mov.b r2,@%0\n\
> > -            1: mov r1,r15"\
> > -               : : "u" (mem), "u" (__mask) \
> > -               : "r0", "r1", "r2", "memory"); \
> > -           else if (sizeof (*(mem)) == 2) \
> > -             __asm __volatile ("\
> > -               mova 1f,r0\n\
> > -               mov r15,r1\n\
> > -               .align 2\n\
> > -               mov #(0f-1f),r15\n\
> > -            0: mov.w @%0,r2\n\
> > -               or %1,r2\n\
> > -               mov.w r2,@%0\n\
> > -            1: mov r1,r15"\
> > -               : : "u" (mem), "u" (__mask) \
> > -               : "r0", "r1", "r2", "memory"); \
> > -           else if (sizeof (*(mem)) == 4) \
> > -             __asm __volatile ("\
> > -               mova 1f,r0\n\
> > -               mov r15,r1\n\
> > -               .align 2\n\
> > -               mov #(0f-1f),r15\n\
> > -            0: mov.l @%0,r2\n\
> > -               or %1,r2\n\
> > -               mov.l r2,@%0\n\
> > -            1: mov r1,r15"\
> > -               : : "u" (mem), "u" (__mask) \
> > -               : "r0", "r1", "r2", "memory"); \
> > -           else \
> > -             abort (); \
> > -           })
> > -
> > -#define atomic_bit_test_set(mem, bit) \
> > -  ({ unsigned int __mask = 1 << (bit); \
> > -     unsigned int __result = __mask; \
> > -     if (sizeof (*(mem)) == 1) \
> > -       __asm __volatile ("\
> > -         mova 1f,r0\n\
> > -         .align 2\n\
> > -         mov r15,r1\n\
> > -         mov #(0f-1f),r15\n\
> > -       0: mov.b @%2,r2\n\
> > -         mov r2,r3\n\
> > -         or %1,r2\n\
> > -         mov.b r2,@%2\n\
> > -       1: mov r1,r15\n\
> > -         and r3,%0"\
> > -       : "=&r" (__result), "=&r" (__mask) \
> > -       : "u" (mem), "0" (__result), "1" (__mask) \
> > -       : "r0", "r1", "r2", "r3", "memory");    \
> > -     else if (sizeof (*(mem)) == 2) \
> > -       __asm __volatile ("\
> > -         mova 1f,r0\n\
> > -         .align 2\n\
> > -         mov r15,r1\n\
> > -         mov #(0f-1f),r15\n\
> > -       0: mov.w @%2,r2\n\
> > -         mov r2,r3\n\
> > -         or %1,r2\n\
> > -         mov.w %1,@%2\n\
> > -       1: mov r1,r15\n\
> > -         and r3,%0"\
> > -       : "=&r" (__result), "=&r" (__mask) \
> > -       : "u" (mem), "0" (__result), "1" (__mask) \
> > -       : "r0", "r1", "r2", "r3", "memory"); \
> > -     else if (sizeof (*(mem)) == 4) \
> > -       __asm __volatile ("\
> > -         mova 1f,r0\n\
> > -         .align 2\n\
> > -         mov r15,r1\n\
> > -         mov #(0f-1f),r15\n\
> > -       0: mov.l @%2,r2\n\
> > -         mov r2,r3\n\
> > -         or r2,%1\n\
> > -         mov.l %1,@%2\n\
> > -       1: mov r1,r15\n\
> > -         and r3,%0"\
> > -       : "=&r" (__result), "=&r" (__mask) \
> > -       : "u" (mem), "0" (__result), "1" (__mask) \
> > -       : "r0", "r1", "r2", "r3", "memory"); \
> > -     else \
> > -       abort (); \
> > -     __result; })
> > diff --git a/sysdeps/x86/atomic-machine.h b/sysdeps/x86/atomic-machine.h
> > index c27a437ec1b129fc18ca832708a69627959fc107..6adc219bf69f1507624618ed931dad11fea4e150 100644
> > --- a/sysdeps/x86/atomic-machine.h
> > +++ b/sysdeps/x86/atomic-machine.h
> > @@ -292,56 +292,6 @@
> >       __result; })
> >
> >
> > -#define atomic_bit_set(mem, bit) \
> > -  do {                                                                       \
> > -    if (sizeof (*mem) == 1)                                                  \
> > -      __asm __volatile (LOCK_PREFIX "orb %b2, %0"                            \
> > -                       : "=m" (*mem)                                         \
> > -                       : "m" (*mem), IBR_CONSTRAINT (1L << (bit)));          \
> > -    else if (sizeof (*mem) == 2)                                             \
> > -      __asm __volatile (LOCK_PREFIX "orw %w2, %0"                            \
> > -                       : "=m" (*mem)                                         \
> > -                       : "m" (*mem), "ir" (1L << (bit)));                    \
> > -    else if (sizeof (*mem) == 4)                                             \
> > -      __asm __volatile (LOCK_PREFIX "orl %2, %0"                             \
> > -                       : "=m" (*mem)                                         \
> > -                       : "m" (*mem), "ir" (1L << (bit)));                    \
> > -    else if (__builtin_constant_p (bit) && (bit) < 32)                       \
> > -      __asm __volatile (LOCK_PREFIX "orq %2, %0"                             \
> > -                       : "=m" (*mem)                                         \
> > -                       : "m" (*mem), "i" (1L << (bit)));                     \
> > -    else if (__HAVE_64B_ATOMICS)                                             \
> > -      __asm __volatile (LOCK_PREFIX "orq %q2, %0"                            \
> > -                       : "=m" (*mem)                                         \
> > -                       : "m" (*mem), "r" (1UL << (bit)));                    \
> > -    else                                                                     \
> > -      __atomic_link_error ();                                                \
> > -  } while (0)
> > -
> > -
> > -#define atomic_bit_test_set(mem, bit) \
> > -  ({ unsigned char __result;                                                 \
> > -     if (sizeof (*mem) == 1)                                                 \
> > -       __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0"                  \
> > -                        : "=q" (__result), "=m" (*mem)                       \
> > -                        : "m" (*mem), IBR_CONSTRAINT (bit));                 \
> > -     else if (sizeof (*mem) == 2)                                            \
> > -       __asm __volatile (LOCK_PREFIX "btsw %3, %1; setc %0"                  \
> > -                        : "=q" (__result), "=m" (*mem)                       \
> > -                        : "m" (*mem), "ir" (bit));                           \
> > -     else if (sizeof (*mem) == 4)                                            \
> > -       __asm __volatile (LOCK_PREFIX "btsl %3, %1; setc %0"                  \
> > -                        : "=q" (__result), "=m" (*mem)                       \
> > -                        : "m" (*mem), "ir" (bit));                           \
> > -     else if (__HAVE_64B_ATOMICS)                                            \
> > -       __asm __volatile (LOCK_PREFIX "btsq %3, %1; setc %0"                  \
> > -                        : "=q" (__result), "=m" (*mem)                       \
> > -                        : "m" (*mem), "ir" (bit));                           \
> > -     else                                                                    \
> > -       __atomic_link_error ();                                       \
> > -     __result; })
> > -
> > -
> >  #define __arch_and_body(lock, mem, mask) \
> >    do {                                                                       \
> >      if (sizeof (*mem) == 1)                                                  \

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 18:25   ` Noah Goldstein
@ 2022-07-06 18:59     ` Wilco Dijkstra
  2022-07-06 19:14       ` Noah Goldstein
  0 siblings, 1 reply; 18+ messages in thread
From: Wilco Dijkstra @ 2022-07-06 18:59 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library

Hi Noah,

> > -  if (atomic_bit_test_set (&pd->cancelhandling, TERMINATED_BIT) == 0)
> > +  if ((atomic_fetch_or_acquire (&pd->cancelhandling, 1 << TERMINATED_BIT)
> > +      & (1 << TERMINATED_BIT)) == 0)

> Can these remain macros that setup the bit_test_set properly but
> use compiler intrinsics?

The fetch_or above is already written in a way that allows GCC to recognize it
(it's identical to your do_atomic_or0), so I don't see any issue here. However
there is a TERMINATED_BITMASK which might make it more readable.

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 18:59     ` Wilco Dijkstra
@ 2022-07-06 19:14       ` Noah Goldstein
  2022-07-06 19:30         ` H.J. Lu
  2022-07-06 19:36         ` Wilco Dijkstra
  0 siblings, 2 replies; 18+ messages in thread
From: Noah Goldstein @ 2022-07-06 19:14 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GNU C Library

On Wed, Jul 6, 2022 at 12:00 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>
> Hi Noah,
>
> > > -  if (atomic_bit_test_set (&pd->cancelhandling, TERMINATED_BIT) == 0)
> > > +  if ((atomic_fetch_or_acquire (&pd->cancelhandling, 1 << TERMINATED_BIT)
> > > +      & (1 << TERMINATED_BIT)) == 0)
>
> > Can these remain macros that setup the bit_test_set properly but
> > use compiler intrinsics?
>
> The fetch_or above is already written in a way that allows GCC to recognize it
> (it's identical to your do_atomic_or0), so I don't see any issue here. However
> there is a TERMINATED_BITMASK which might make it more readable.

For non-constant shift values it needs to be like atomic_or4/atomic_or5.

Since this needs to be set up in a particular way it seems worth it to
keep it as a macro that will do it properly. Fine with replacing the underlying
atomic op with a compiler intrinsic.
>
> Cheers,
> Wilco

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 19:14       ` Noah Goldstein
@ 2022-07-06 19:30         ` H.J. Lu
  2022-07-06 19:36         ` Wilco Dijkstra
  1 sibling, 0 replies; 18+ messages in thread
From: H.J. Lu @ 2022-07-06 19:30 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: Wilco Dijkstra, GNU C Library

On Wed, Jul 6, 2022 at 12:15 PM Noah Goldstein via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Wed, Jul 6, 2022 at 12:00 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> >
> > Hi Noah,
> >
> > > > -  if (atomic_bit_test_set (&pd->cancelhandling, TERMINATED_BIT) == 0)
> > > > +  if ((atomic_fetch_or_acquire (&pd->cancelhandling, 1 << TERMINATED_BIT)
> > > > +      & (1 << TERMINATED_BIT)) == 0)
> >
> > > Can these remain macros that setup the bit_test_set properly but
> > > use compiler intrinsics?
> >
> > The fetch_or above is already written in a way that allows GCC to recognize it
> > (it's identical to your do_atomic_or0), so I don't see any issue here. However
> > there is a TERMINATED_BITMASK which might make it more readable.
>
> For non-constant shift values it needs to be like atomic_or4/atomic_or5.
>
> Since this needs to be set up in a particular way it seems worth it to
> keep it as a macro that will do it properly. Fine with replacing the underlying
> atomic op with a compiler intrinsic.

GCC can optimize some atomic operations to locked bit operations.
Have we checked if the new assembly codes are as efficient as the
old ones on x86-64?

-- 
H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 19:14       ` Noah Goldstein
  2022-07-06 19:30         ` H.J. Lu
@ 2022-07-06 19:36         ` Wilco Dijkstra
  2022-07-06 19:51           ` Noah Goldstein
  1 sibling, 1 reply; 18+ messages in thread
From: Wilco Dijkstra @ 2022-07-06 19:36 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library

Hi Noah,

> For non-constant shift values it needs to be like atomic_or4/atomic_or5.
>
> Since this needs to be set up in a particular way it seems worth it to
> keep it as a macro that will do it properly. Fine with replacing the underlying
> atomic op with a compiler intrinsic.

This was only one use in GLIBC so how does adding a private macro help other
software? This idiom doesn't appear common, particularly with a non-constant bit,
but in principle the compiler could be fixed so that it optimizes do_atomic_or2/3
as well.

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 19:36         ` Wilco Dijkstra
@ 2022-07-06 19:51           ` Noah Goldstein
  2022-07-06 19:56             ` Noah Goldstein
  0 siblings, 1 reply; 18+ messages in thread
From: Noah Goldstein @ 2022-07-06 19:51 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GNU C Library

On Wed, Jul 6, 2022 at 12:37 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>
> Hi Noah,
>
> > For non-constant shift values it needs to be like atomic_or4/atomic_or5.
> >
> > Since this needs to be set up in a particular way it seems worth it to
> > keep it as a macro that will do it properly. Fine with replacing the underlying
> > atomic op with a compiler intrinsic.
>
> This was only one use in GLIBC so how does adding a private macro help other
> software? This idiom doesn't appear common, particularly with a non-constant bit,
> but in principle the compiler could be fixed so that it optimizes do_atomic_or2/3
> as well.

The usage in this patch is fine. Just seems having an internal API for
it provides
value given that in the future there may be a need for atomic_bit_set
w.o a constant
bit and there are non-obvious tricks necessary for doing it right.


>
> Cheers,
> Wilco

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 19:51           ` Noah Goldstein
@ 2022-07-06 19:56             ` Noah Goldstein
  2022-07-06 20:14               ` Wilco Dijkstra
  0 siblings, 1 reply; 18+ messages in thread
From: Noah Goldstein @ 2022-07-06 19:56 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GNU C Library

On Wed, Jul 6, 2022 at 12:51 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Wed, Jul 6, 2022 at 12:37 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> >
> > Hi Noah,
> >
> > > For non-constant shift values it needs to be like atomic_or4/atomic_or5.
> > >
> > > Since this needs to be set up in a particular way it seems worth it to
> > > keep it as a macro that will do it properly. Fine with replacing the underlying
> > > atomic op with a compiler intrinsic.
> >
> > This was only one use in GLIBC so how does adding a private macro help other
> > software? This idiom doesn't appear common, particularly with a non-constant bit,
> > but in principle the compiler could be fixed so that it optimizes do_atomic_or2/3
> > as well.
>
> The usage in this patch is fine. Just seems having an internal API for
> it provides
> value given that in the future there may be a need for atomic_bit_set
> w.o a constant
> bit and there are non-obvious tricks necessary for doing it right.

Also think frankly the API "atomic_bit_set" provides readability value
that is lost when you expand it.

The fetch_or_acquire seems like an implementation detail of
the desired functionality.
>
>
> >
> > Cheers,
> > Wilco

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 19:56             ` Noah Goldstein
@ 2022-07-06 20:14               ` Wilco Dijkstra
  2022-07-06 20:30                 ` Noah Goldstein
  0 siblings, 1 reply; 18+ messages in thread
From: Wilco Dijkstra @ 2022-07-06 20:14 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: GNU C Library

Hi Noah,

The goal here is to move to the standard atomics, not to invent our own set for
convenience or just for fun to be different etc. There were 2 uses of atomic_bit_set
in all of GLIBC, and I cannot believe that atomic_fetch_or (&x, 1 << bit) could cause
any confusion given that it is very similar to x |= 1 << bit. We don't wrap such
expressions in macros like bitset (x, bit) either - it just doesn't make sense.

As I mentioned, I'll use EXITING_BITMASK rather than 1 << EXITING_BIT.

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 20:14               ` Wilco Dijkstra
@ 2022-07-06 20:30                 ` Noah Goldstein
  2022-07-06 20:56                   ` H.J. Lu
  0 siblings, 1 reply; 18+ messages in thread
From: Noah Goldstein @ 2022-07-06 20:30 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GNU C Library

On Wed, Jul 6, 2022 at 1:14 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>
> Hi Noah,
>
> The goal here is to move to the standard atomics, not to invent our own set for
> convenience or just for fun to be different etc. There were 2 uses of atomic_bit_set
> in all of GLIBC, and I cannot believe that atomic_fetch_or (&x, 1 << bit) could cause
> any confusion given that it is very similar to x |= 1 << bit. We don't wrap such
> expressions in macros like bitset (x, bit) either - it just doesn't make sense.
>
> As I mentioned, I'll use EXITING_BITMASK rather than 1 << EXITING_BIT.

Alright.

>
> Cheers,
> Wilco

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 20:30                 ` Noah Goldstein
@ 2022-07-06 20:56                   ` H.J. Lu
  2022-07-12 17:47                     ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 18+ messages in thread
From: H.J. Lu @ 2022-07-06 20:56 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: Wilco Dijkstra, GNU C Library

On Wed, Jul 6, 2022 at 1:31 PM Noah Goldstein via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Wed, Jul 6, 2022 at 1:14 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> >
> > Hi Noah,
> >
> > The goal here is to move to the standard atomics, not to invent our own set for
> > convenience or just for fun to be different etc. There were 2 uses of atomic_bit_set
> > in all of GLIBC, and I cannot believe that atomic_fetch_or (&x, 1 << bit) could cause
> > any confusion given that it is very similar to x |= 1 << bit. We don't wrap such
> > expressions in macros like bitset (x, bit) either - it just doesn't make sense.
> >
> > As I mentioned, I'll use EXITING_BITMASK rather than 1 << EXITING_BIT.
>
> Alright.

Before GCC 12,

(__atomic_fetch_or (&x, MASK, __ATOMIC_ACQUIRE) & MASK) != 0;

will be optimized to "lock btsl" only if x is unsigned.  But cancelhandling in

if (atomic_bit_test_set (&pd->cancelhandling, TERMINATED_BIT) == 0)

is signed which leads to the much worse code when GCC 11 or older are
used.

-- 
H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-06 20:56                   ` H.J. Lu
@ 2022-07-12 17:47                     ` Adhemerval Zanella Netto
  2022-07-12 18:09                       ` H.J. Lu
  0 siblings, 1 reply; 18+ messages in thread
From: Adhemerval Zanella Netto @ 2022-07-12 17:47 UTC (permalink / raw)
  To: H.J. Lu, Noah Goldstein; +Cc: GNU C Library, Wilco Dijkstra



On 06/07/22 17:56, H.J. Lu via Libc-alpha wrote:
> On Wed, Jul 6, 2022 at 1:31 PM Noah Goldstein via Libc-alpha
> <libc-alpha@sourceware.org> wrote:
>>
>> On Wed, Jul 6, 2022 at 1:14 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>>>
>>> Hi Noah,
>>>
>>> The goal here is to move to the standard atomics, not to invent our own set for
>>> convenience or just for fun to be different etc. There were 2 uses of atomic_bit_set
>>> in all of GLIBC, and I cannot believe that atomic_fetch_or (&x, 1 << bit) could cause
>>> any confusion given that it is very similar to x |= 1 << bit. We don't wrap such
>>> expressions in macros like bitset (x, bit) either - it just doesn't make sense.
>>>
>>> As I mentioned, I'll use EXITING_BITMASK rather than 1 << EXITING_BIT.
>>
>> Alright.
> 
> Before GCC 12,
> 
> (__atomic_fetch_or (&x, MASK, __ATOMIC_ACQUIRE) & MASK) != 0;
> 
> will be optimized to "lock btsl" only if x is unsigned.  But cancelhandling in
> 
> if (atomic_bit_test_set (&pd->cancelhandling, TERMINATED_BIT) == 0)
> 
> is signed which leads to the much worse code when GCC 11 or older are
> used.
> 

I think it should be safe to change cancelhandling to be unsigned,
although this is really a micro-optimization that we should handle
in the compiler instead roll-out our own atomics.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-12 17:47                     ` Adhemerval Zanella Netto
@ 2022-07-12 18:09                       ` H.J. Lu
  2022-07-12 18:44                         ` Wilco Dijkstra
  0 siblings, 1 reply; 18+ messages in thread
From: H.J. Lu @ 2022-07-12 18:09 UTC (permalink / raw)
  To: Adhemerval Zanella Netto; +Cc: Noah Goldstein, GNU C Library, Wilco Dijkstra

On Tue, Jul 12, 2022 at 10:47 AM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 06/07/22 17:56, H.J. Lu via Libc-alpha wrote:
> > On Wed, Jul 6, 2022 at 1:31 PM Noah Goldstein via Libc-alpha
> > <libc-alpha@sourceware.org> wrote:
> >>
> >> On Wed, Jul 6, 2022 at 1:14 PM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> >>>
> >>> Hi Noah,
> >>>
> >>> The goal here is to move to the standard atomics, not to invent our own set for
> >>> convenience or just for fun to be different etc. There were 2 uses of atomic_bit_set
> >>> in all of GLIBC, and I cannot believe that atomic_fetch_or (&x, 1 << bit) could cause
> >>> any confusion given that it is very similar to x |= 1 << bit. We don't wrap such
> >>> expressions in macros like bitset (x, bit) either - it just doesn't make sense.
> >>>
> >>> As I mentioned, I'll use EXITING_BITMASK rather than 1 << EXITING_BIT.
> >>
> >> Alright.
> >
> > Before GCC 12,
> >
> > (__atomic_fetch_or (&x, MASK, __ATOMIC_ACQUIRE) & MASK) != 0;
> >
> > will be optimized to "lock btsl" only if x is unsigned.  But cancelhandling in
> >
> > if (atomic_bit_test_set (&pd->cancelhandling, TERMINATED_BIT) == 0)
> >
> > is signed which leads to the much worse code when GCC 11 or older are
> > used.
> >
>
> I think it should be safe to change cancelhandling to be unsigned,
> although this is really a micro-optimization that we should handle
> in the compiler instead roll-out our own atomics.

We should help older compilers in this case.  Can we change cancelhandling
to unsigned?

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-12 18:09                       ` H.J. Lu
@ 2022-07-12 18:44                         ` Wilco Dijkstra
  2022-07-12 19:11                           ` H.J. Lu
  0 siblings, 1 reply; 18+ messages in thread
From: Wilco Dijkstra @ 2022-07-12 18:44 UTC (permalink / raw)
  To: H.J. Lu, Adhemerval Zanella Netto; +Cc: Noah Goldstein, GNU C Library

Hi,

I don't believe it could make a difference since it isn't performance critical code.
Note that several similar fetch_or/fetch_and are used which do not optimize into a
bitset operation, so even if we fix this particular case to use unsigned, there are many
others. IIRC there is even a fetch_or with zero which is kind of useless!

More importantly, it raises a question for locking: we currently use a compare-exchange
for each lock and unlock. If compare-exchange has higher overheads than other atomics
then we should investigate more efficient locking sequences.

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-12 18:44                         ` Wilco Dijkstra
@ 2022-07-12 19:11                           ` H.J. Lu
  2022-07-12 19:18                             ` Noah Goldstein
  0 siblings, 1 reply; 18+ messages in thread
From: H.J. Lu @ 2022-07-12 19:11 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Adhemerval Zanella Netto, Noah Goldstein, GNU C Library

On Tue, Jul 12, 2022 at 11:44 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>
> Hi,
>
> I don't believe it could make a difference since it isn't performance critical code.
> Note that several similar fetch_or/fetch_and are used which do not optimize into a

Only the operations with test have this issue.  Simple fetch_or and
fetch_and are
OK.

> bitset operation, so even if we fix this particular case to use unsigned, there are many
> others. IIRC there is even a fetch_or with zero which is kind of useless!
>
> More importantly, it raises a question for locking: we currently use a compare-exchange
> for each lock and unlock. If compare-exchange has higher overheads than other atomics
> then we should investigate more efficient locking sequences.

compare-exchange can be very expensive under contention.

> Cheers,
> Wilco



-- 
H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-12 19:11                           ` H.J. Lu
@ 2022-07-12 19:18                             ` Noah Goldstein
  2022-07-12 20:39                               ` Wilco Dijkstra
  0 siblings, 1 reply; 18+ messages in thread
From: Noah Goldstein @ 2022-07-12 19:18 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Wilco Dijkstra, Adhemerval Zanella Netto, GNU C Library

On Tue, Jul 12, 2022 at 12:11 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Jul 12, 2022 at 11:44 AM Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> >
> > Hi,
> >
> > I don't believe it could make a difference since it isn't performance critical code.
> > Note that several similar fetch_or/fetch_and are used which do not optimize into a
>
> Only the operations with test have this issue.  Simple fetch_or and
> fetch_and are
> OK.
>
> > bitset operation, so even if we fix this particular case to use unsigned, there are many
> > others. IIRC there is even a fetch_or with zero which is kind of useless!
> >
> > More importantly, it raises a question for locking: we currently use a compare-exchange
> > for each lock and unlock. If compare-exchange has higher overheads than other atomics
> > then we should investigate more efficient locking sequences.
>
> compare-exchange can be very expensive under contention.
>

Which locks should we look into dropping CAS? pthread_mutex? Anything else?
> > Cheers,
> > Wilco
>
>
>
> --
> H.J.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set
  2022-07-12 19:18                             ` Noah Goldstein
@ 2022-07-12 20:39                               ` Wilco Dijkstra
  0 siblings, 0 replies; 18+ messages in thread
From: Wilco Dijkstra @ 2022-07-12 20:39 UTC (permalink / raw)
  To: Noah Goldstein, H.J. Lu; +Cc: Adhemerval Zanella Netto, GNU C Library

Hi,

>> > More importantly, it raises a question for locking: we currently use a compare-exchange
>> > for each lock and unlock. If compare-exchange has higher overheads than other atomics
>> > then we should investigate more efficient locking sequences.
>>
>> compare-exchange can be very expensive under contention.
>>
>
> Which locks should we look into dropping CAS? pthread_mutex? Anything else?

There are several internal locking schemes plus uses of CAS spread all over.
There is also the odd define ATOMIC_EXCHANGE_USES_CAS which almost no target
seems to know what to set to - it is used in pthread_spin_lock in a way that makes no
sense. I believe the answer should be 0 for all since exchange is typically simpler and
faster than compare-exchange. 

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2022-07-12 20:39 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-06 15:14 [PATCH 3/7] Remove atomic_bit_set/bit_test_set Wilco Dijkstra
2022-07-06 16:14 ` Noah Goldstein
2022-07-06 18:25   ` Noah Goldstein
2022-07-06 18:59     ` Wilco Dijkstra
2022-07-06 19:14       ` Noah Goldstein
2022-07-06 19:30         ` H.J. Lu
2022-07-06 19:36         ` Wilco Dijkstra
2022-07-06 19:51           ` Noah Goldstein
2022-07-06 19:56             ` Noah Goldstein
2022-07-06 20:14               ` Wilco Dijkstra
2022-07-06 20:30                 ` Noah Goldstein
2022-07-06 20:56                   ` H.J. Lu
2022-07-12 17:47                     ` Adhemerval Zanella Netto
2022-07-12 18:09                       ` H.J. Lu
2022-07-12 18:44                         ` Wilco Dijkstra
2022-07-12 19:11                           ` H.J. Lu
2022-07-12 19:18                             ` Noah Goldstein
2022-07-12 20:39                               ` Wilco Dijkstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).