From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-oa1-x30.google.com (mail-oa1-x30.google.com [IPv6:2001:4860:4864:20::30]) by sourceware.org (Postfix) with ESMTPS id 024013858408 for ; Wed, 6 Jul 2022 18:25:44 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 024013858408 Received: by mail-oa1-x30.google.com with SMTP id 586e51a60fabf-101b4f9e825so22445304fac.5 for ; Wed, 06 Jul 2022 11:25:43 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=DQJ4A/5LWD5bqrYI0uZ1Qy5xQoZaUdayqPUSSB6ecT4=; b=2o2oVvyxDcgQJJvQsSrwfIZluZA8ginhXwW+Pxh9pcIBfOf/vWuOH4r8CVAKBvrEHM w5lKCJz4rR88uX4j27BJnOvfOEMW1RJb5lIx1GoNmXj8EEPeiTSXIYOp1P48g4XPLHXB FBe3z60aiT0NIAT/XUR3AlMpeD+Ci2OHvVYUrayWCDoLluLFadtfH3Rc7qthjEAYWgEN +H0KoXxwwmE+zyjGJfhpOmYUoVkO8EXNKFOSfLr1JXJEzSc9wJ0rBAenwKwWRb3jfM/p zsnw7MzdTI1u5kaoPnRdzHGu0YFzW8Lwlaz5ZuuoxxlV48GtU+mmv42pLZ969rsgAbfK llZQ== X-Gm-Message-State: AJIora8v4sDLdJumU8uK634/inSaXytBWeQNKqiHbg/fby3pu4TLq+8B uOVaIC5jIq1NuM/Ldo7KKPz5bVu5g8PsSR52wwc9Jyrbxco= X-Google-Smtp-Source: AGRyM1vv30A5BTzg0ZxTuZomAzdPnbL1NvEElLQb02carSlwaQkNrWt5jU8F4eprAgHx/DgDxW2aLBNXIhrGzBQbF10= X-Received: by 2002:a05:6870:41ca:b0:101:d588:6241 with SMTP id z10-20020a05687041ca00b00101d5886241mr26069364oac.175.1657131943018; Wed, 06 Jul 2022 11:25:43 -0700 (PDT) MIME-Version: 1.0 References: In-Reply-To: From: Noah Goldstein Date: Wed, 6 Jul 2022 11:25:32 -0700 Message-ID: Subject: Re: [PATCH 3/7] Remove atomic_bit_set/bit_test_set To: Wilco Dijkstra Cc: GNU C Library Content-Type: text/plain; charset="UTF-8" X-Spam-Status: No, score=-8.4 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_LOTSOFHASH, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 06 Jul 2022 18:25:47 -0000 On Wed, Jul 6, 2022 at 9:14 AM Noah Goldstein wrote: > > On Wed, Jul 6, 2022 at 8:14 AM Wilco Dijkstra via Libc-alpha > wrote: > > > > Replace the 3 uses of atomic_bit_set and atomic_bit_test_set with atomic_fetch_or_acquire. > > This enables removal of a lot of target specific implementations. > > > > --- > > > > diff --git a/include/atomic.h b/include/atomic.h > > index 73cc772f0149f94fa0c3e14fa858fa89fee6985f..ed1fc38e7569fcbdd0473ab6f69956a44d62354f 100644 > > --- a/include/atomic.h > > +++ b/include/atomic.h > > @@ -255,28 +255,6 @@ > > #endif > > > > > > -#ifndef atomic_bit_set > > -# define atomic_bit_set(mem, bit) \ > > - (void) atomic_bit_test_set(mem, bit) > > -#endif > > - > > - > > -#ifndef atomic_bit_test_set > > -# define atomic_bit_test_set(mem, bit) \ > > - ({ __typeof (*(mem)) __atg14_old; \ > > - __typeof (mem) __atg14_memp = (mem); \ > > - __typeof (*(mem)) __atg14_mask = ((__typeof (*(mem))) 1 << (bit)); \ > > - \ > > - do \ > > - __atg14_old = (*__atg14_memp); \ > > - while (__builtin_expect \ > > - (atomic_compare_and_exchange_bool_acq (__atg14_memp, \ > > - __atg14_old | __atg14_mask,\ > > - __atg14_old), 0)); \ > > - \ > > - __atg14_old & __atg14_mask; }) > > -#endif > > - > > /* Atomically *mem &= mask. */ > > #ifndef atomic_and > > # define atomic_and(mem, mask) \ > > diff --git a/misc/tst-atomic.c b/misc/tst-atomic.c > > index 13acf0da8f1ab41774175401a68ce28a0eb7ed30..765b036873e181d6c49299c0ad08d5dbfcc05387 100644 > > --- a/misc/tst-atomic.c > > +++ b/misc/tst-atomic.c > > @@ -325,66 +325,6 @@ do_test (void) > > ret = 1; > > } > > > > - mem = 0; > > - atomic_bit_set (&mem, 1); > > - if (mem != 2) > > - { > > - puts ("atomic_bit_set test 1 failed"); > > - ret = 1; > > - } > > - > > - mem = 8; > > - atomic_bit_set (&mem, 3); > > - if (mem != 8) > > - { > > - puts ("atomic_bit_set test 2 failed"); > > - ret = 1; > > - } > > - > > -#ifdef TEST_ATOMIC64 > > - mem = 16; > > - atomic_bit_set (&mem, 35); > > - if (mem != 0x800000010LL) > > - { > > - puts ("atomic_bit_set test 3 failed"); > > - ret = 1; > > - } > > -#endif > > - > > - mem = 0; > > - if (atomic_bit_test_set (&mem, 1) > > - || mem != 2) > > - { > > - puts ("atomic_bit_test_set test 1 failed"); > > - ret = 1; > > - } > > - > > - mem = 8; > > - if (! atomic_bit_test_set (&mem, 3) > > - || mem != 8) > > - { > > - puts ("atomic_bit_test_set test 2 failed"); > > - ret = 1; > > - } > > - > > -#ifdef TEST_ATOMIC64 > > - mem = 16; > > - if (atomic_bit_test_set (&mem, 35) > > - || mem != 0x800000010LL) > > - { > > - puts ("atomic_bit_test_set test 3 failed"); > > - ret = 1; > > - } > > - > > - mem = 0x100000000LL; > > - if (! atomic_bit_test_set (&mem, 32) > > - || mem != 0x100000000LL) > > - { > > - puts ("atomic_bit_test_set test 4 failed"); > > - ret = 1; > > - } > > -#endif > > - > > /* Tests for C11-like atomics. */ > > mem = 11; > > if (atomic_load_relaxed (&mem) != 11 || atomic_load_acquire (&mem) != 11) > > diff --git a/nptl/nptl_free_tcb.c b/nptl/nptl_free_tcb.c > > index 0d31143ac849de6398e06c399b94813ae57dcff3..b63ec3bc3c1df4a1fc5272a24d453e679dbedd5e 100644 > > --- a/nptl/nptl_free_tcb.c > > +++ b/nptl/nptl_free_tcb.c > > @@ -24,7 +24,8 @@ void > > __nptl_free_tcb (struct pthread *pd) > > { > > /* The thread is exiting now. */ > > - if (atomic_bit_test_set (&pd->cancelhandling, TERMINATED_BIT) == 0) > > + if ((atomic_fetch_or_acquire (&pd->cancelhandling, 1 << TERMINATED_BIT) > > + & (1 << TERMINATED_BIT)) == 0) > > I think this change may cause regressions on x86 because at the moment > it seems the only way to generate `lock bts` is with inline assembly: > > https://godbolt.org/z/bdchE53Ps GCC Does have a pass for this: https://github.com/gcc-mirror/gcc/blob/master/gcc/tree-ssa-ccp.cc but it needs to be done in a very specific way: https://godbolt.org/z/s9dchva6G Can these remain macros that setup the bit_test_set properly but use compiler intrinsics? > > Can we keep atomic bit_test_set? > > { > > /* Free TPP data. */ > > if (pd->tpp != NULL) > > diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c > > index 870a8fcb34eb43b58c2260fee6a4624f0fbbd469..4b7f3edc384748f300ca935ad878eb0e3547e163 100644 > > --- a/nptl/pthread_create.c > > +++ b/nptl/pthread_create.c > > @@ -487,7 +487,7 @@ start_thread (void *arg) > > /* The thread is exiting now. Don't set this bit until after we've hit > > the event-reporting breakpoint, so that td_thr_get_info on us while at > > the breakpoint reports TD_THR_RUN state rather than TD_THR_ZOMBIE. */ > > - atomic_bit_set (&pd->cancelhandling, EXITING_BIT); > > + atomic_fetch_or_acquire (&pd->cancelhandling, 1 << EXITING_BIT); > > > > if (__glibc_unlikely (atomic_decrement_and_test (&__nptl_nthreads))) > > /* This was the last thread. */ > > diff --git a/sysdeps/alpha/atomic-machine.h b/sysdeps/alpha/atomic-machine.h > > index df28b90261aa485125d951bc1abf76602730bfd0..115a9df5d77cd08bcb0a49d9b59f0c53b4a20d78 100644 > > --- a/sysdeps/alpha/atomic-machine.h > > +++ b/sysdeps/alpha/atomic-machine.h > > @@ -325,15 +325,6 @@ > > #define atomic_exchange_and_add(mem, value) \ > > __atomic_val_bysize (__arch_exchange_and_add, int, mem, value, __MB, __MB) > > > > - > > -/* ??? Blah, I'm lazy. Implement these later. Can do better than the > > - compare-and-exchange loop provided by generic code. > > - > > -#define atomic_decrement_if_positive(mem) > > -#define atomic_bit_test_set(mem, bit) > > - > > -*/ > > - > > #define atomic_full_barrier() __asm ("mb" : : : "memory"); > > #define atomic_read_barrier() __asm ("mb" : : : "memory"); > > #define atomic_write_barrier() __asm ("wmb" : : : "memory"); > > diff --git a/sysdeps/ia64/atomic-machine.h b/sysdeps/ia64/atomic-machine.h > > index 71afcfe0d0031a6ceae5879a2b3b19ed2ee2f110..b2f5d2f4774cc2503c7595cb82f30f60fbcbe89c 100644 > > --- a/sysdeps/ia64/atomic-machine.h > > +++ b/sysdeps/ia64/atomic-machine.h > > @@ -77,20 +77,4 @@ > > while (__builtin_expect (__val != __oldval, 0)); \ > > __oldval; }) > > > > -#define atomic_bit_test_set(mem, bit) \ > > - ({ __typeof (*mem) __oldval, __val; \ > > - __typeof (mem) __memp = (mem); \ > > - __typeof (*mem) __mask = ((__typeof (*mem)) 1 << (bit)); \ > > - \ > > - __val = (*__memp); \ > > - do \ > > - { \ > > - __oldval = __val; \ > > - __val = atomic_compare_and_exchange_val_acq (__memp, \ > > - __oldval | __mask, \ > > - __oldval); \ > > - } \ > > - while (__builtin_expect (__val != __oldval, 0)); \ > > - __oldval & __mask; }) > > - > > #define atomic_full_barrier() __sync_synchronize () > > diff --git a/sysdeps/m68k/m680x0/m68020/atomic-machine.h b/sysdeps/m68k/m680x0/m68020/atomic-machine.h > > index 70cda007341b49d28dc1d9847a88198b47589db4..72a3b81642c1a23207054092d16bb856556cd897 100644 > > --- a/sysdeps/m68k/m680x0/m68020/atomic-machine.h > > +++ b/sysdeps/m68k/m680x0/m68020/atomic-machine.h > > @@ -218,15 +218,3 @@ > > : "memory"); \ > > } \ > > __result; }) > > - > > -#define atomic_bit_set(mem, bit) \ > > - __asm __volatile ("bfset %0{%1,#1}" \ > > - : "+m" (*(mem)) \ > > - : "di" (sizeof (*(mem)) * 8 - (bit) - 1)) > > - > > -#define atomic_bit_test_set(mem, bit) \ > > - ({ char __result; \ > > - __asm __volatile ("bfset %1{%2,#1}; sne %0" \ > > - : "=dm" (__result), "+m" (*(mem)) \ > > - : "di" (sizeof (*(mem)) * 8 - (bit) - 1)); \ > > - __result; }) > > diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h > > index 39af275c254ef3e737736bd5c38099bada8746d6..e8bd4c6dcbf7b7785857ff08c15dac9a5cfb2a5d 100644 > > --- a/sysdeps/nptl/pthreadP.h > > +++ b/sysdeps/nptl/pthreadP.h > > @@ -43,12 +43,6 @@ > > atomic_compare_and_exchange_val_acq (&(descr)->member, new, old) > > #endif > > > > -#ifndef THREAD_ATOMIC_BIT_SET > > -# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \ > > - atomic_bit_set (&(descr)->member, bit) > > -#endif > > - > > - > > static inline short max_adaptive_count (void) > > { > > #if HAVE_TUNABLES > > @@ -276,7 +270,7 @@ __do_cancel (void) > > struct pthread *self = THREAD_SELF; > > > > /* Make sure we get no more cancellations. */ > > - atomic_bit_set (&self->cancelhandling, EXITING_BIT); > > + atomic_fetch_or_acquire (&self->cancelhandling, 1 << EXITING_BIT); > > > > __pthread_unwind ((__pthread_unwind_buf_t *) > > THREAD_GETMEM (self, cleanup_jmp_buf)); > > diff --git a/sysdeps/s390/atomic-machine.h b/sysdeps/s390/atomic-machine.h > > index fe7f04cf30cf5b62fea743a72384f8fc248d2d25..db31e377970c4ab6285ef65fb21419db7c6ca373 100644 > > --- a/sysdeps/s390/atomic-machine.h > > +++ b/sysdeps/s390/atomic-machine.h > > @@ -93,17 +93,6 @@ > > atomic_or_val (mem, mask); \ > > } while (0) > > > > -/* Atomically *mem |= 1 << bit and return true if the bit was set in old value > > - of *mem. */ > > -/* The load-and-or instruction is used on z196 zarch and higher cpus > > - instead of a loop with compare-and-swap instruction. */ > > -#define atomic_bit_test_set(mem, bit) \ > > - ({ __typeof (*(mem)) __atg14_old; \ > > - __typeof (mem) __atg14_memp = (mem); \ > > - __typeof (*(mem)) __atg14_mask = ((__typeof (*(mem))) 1 << (bit)); \ > > - __atg14_old = atomic_or_val (__atg14_memp, __atg14_mask); \ > > - __atg14_old & __atg14_mask; }) > > - > > /* Atomically *mem &= mask and return the old value of *mem. */ > > /* The gcc builtin uses load-and-and instruction on z196 zarch and higher cpus > > instead of a loop with compare-and-swap instruction. */ > > diff --git a/sysdeps/unix/sysv/linux/riscv/atomic-machine.h b/sysdeps/unix/sysv/linux/riscv/atomic-machine.h > > index deb6cadaa35e841e23cee55d169a20538bdb1f8d..c5eb5c639fb59d7395c0a2d8f4fd72452845914b 100644 > > --- a/sysdeps/unix/sysv/linux/riscv/atomic-machine.h > > +++ b/sysdeps/unix/sysv/linux/riscv/atomic-machine.h > > @@ -159,10 +159,6 @@ > > > > # define atomic_max_relaxed(mem, value) asm_amo ("amomaxu", "", mem, value) > > > > -# define atomic_bit_test_set(mem, bit) \ > > - ({ typeof (*mem) __mask = (typeof (*mem))1 << (bit); \ > > - asm_amo ("amoor", ".aq", mem, __mask) & __mask; }) > > - > > #else /* __riscv_atomic */ > > # error "ISAs that do not subsume the A extension are not supported" > > #endif /* !__riscv_atomic */ > > diff --git a/sysdeps/unix/sysv/linux/sh/atomic-machine.h b/sysdeps/unix/sysv/linux/sh/atomic-machine.h > > index aead3d5866d12b3b6bba27a1ef4a273400e1715b..3bf84d06bc875fca75b38b5ab01fca6683d390f7 100644 > > --- a/sysdeps/unix/sysv/linux/sh/atomic-machine.h > > +++ b/sysdeps/unix/sysv/linux/sh/atomic-machine.h > > @@ -301,97 +301,3 @@ > > > > #define atomic_increment_and_test(mem) atomic_add_zero((mem), 1) > > #define atomic_decrement_and_test(mem) atomic_add_zero((mem), -1) > > - > > -#define atomic_bit_set(mem, bit) \ > > - (void) ({ unsigned int __mask = 1 << (bit); \ > > - if (sizeof (*(mem)) == 1) \ > > - __asm __volatile ("\ > > - mova 1f,r0\n\ > > - mov r15,r1\n\ > > - .align 2\n\ > > - mov #(0f-1f),r15\n\ > > - 0: mov.b @%0,r2\n\ > > - or %1,r2\n\ > > - mov.b r2,@%0\n\ > > - 1: mov r1,r15"\ > > - : : "u" (mem), "u" (__mask) \ > > - : "r0", "r1", "r2", "memory"); \ > > - else if (sizeof (*(mem)) == 2) \ > > - __asm __volatile ("\ > > - mova 1f,r0\n\ > > - mov r15,r1\n\ > > - .align 2\n\ > > - mov #(0f-1f),r15\n\ > > - 0: mov.w @%0,r2\n\ > > - or %1,r2\n\ > > - mov.w r2,@%0\n\ > > - 1: mov r1,r15"\ > > - : : "u" (mem), "u" (__mask) \ > > - : "r0", "r1", "r2", "memory"); \ > > - else if (sizeof (*(mem)) == 4) \ > > - __asm __volatile ("\ > > - mova 1f,r0\n\ > > - mov r15,r1\n\ > > - .align 2\n\ > > - mov #(0f-1f),r15\n\ > > - 0: mov.l @%0,r2\n\ > > - or %1,r2\n\ > > - mov.l r2,@%0\n\ > > - 1: mov r1,r15"\ > > - : : "u" (mem), "u" (__mask) \ > > - : "r0", "r1", "r2", "memory"); \ > > - else \ > > - abort (); \ > > - }) > > - > > -#define atomic_bit_test_set(mem, bit) \ > > - ({ unsigned int __mask = 1 << (bit); \ > > - unsigned int __result = __mask; \ > > - if (sizeof (*(mem)) == 1) \ > > - __asm __volatile ("\ > > - mova 1f,r0\n\ > > - .align 2\n\ > > - mov r15,r1\n\ > > - mov #(0f-1f),r15\n\ > > - 0: mov.b @%2,r2\n\ > > - mov r2,r3\n\ > > - or %1,r2\n\ > > - mov.b r2,@%2\n\ > > - 1: mov r1,r15\n\ > > - and r3,%0"\ > > - : "=&r" (__result), "=&r" (__mask) \ > > - : "u" (mem), "0" (__result), "1" (__mask) \ > > - : "r0", "r1", "r2", "r3", "memory"); \ > > - else if (sizeof (*(mem)) == 2) \ > > - __asm __volatile ("\ > > - mova 1f,r0\n\ > > - .align 2\n\ > > - mov r15,r1\n\ > > - mov #(0f-1f),r15\n\ > > - 0: mov.w @%2,r2\n\ > > - mov r2,r3\n\ > > - or %1,r2\n\ > > - mov.w %1,@%2\n\ > > - 1: mov r1,r15\n\ > > - and r3,%0"\ > > - : "=&r" (__result), "=&r" (__mask) \ > > - : "u" (mem), "0" (__result), "1" (__mask) \ > > - : "r0", "r1", "r2", "r3", "memory"); \ > > - else if (sizeof (*(mem)) == 4) \ > > - __asm __volatile ("\ > > - mova 1f,r0\n\ > > - .align 2\n\ > > - mov r15,r1\n\ > > - mov #(0f-1f),r15\n\ > > - 0: mov.l @%2,r2\n\ > > - mov r2,r3\n\ > > - or r2,%1\n\ > > - mov.l %1,@%2\n\ > > - 1: mov r1,r15\n\ > > - and r3,%0"\ > > - : "=&r" (__result), "=&r" (__mask) \ > > - : "u" (mem), "0" (__result), "1" (__mask) \ > > - : "r0", "r1", "r2", "r3", "memory"); \ > > - else \ > > - abort (); \ > > - __result; }) > > diff --git a/sysdeps/x86/atomic-machine.h b/sysdeps/x86/atomic-machine.h > > index c27a437ec1b129fc18ca832708a69627959fc107..6adc219bf69f1507624618ed931dad11fea4e150 100644 > > --- a/sysdeps/x86/atomic-machine.h > > +++ b/sysdeps/x86/atomic-machine.h > > @@ -292,56 +292,6 @@ > > __result; }) > > > > > > -#define atomic_bit_set(mem, bit) \ > > - do { \ > > - if (sizeof (*mem) == 1) \ > > - __asm __volatile (LOCK_PREFIX "orb %b2, %0" \ > > - : "=m" (*mem) \ > > - : "m" (*mem), IBR_CONSTRAINT (1L << (bit))); \ > > - else if (sizeof (*mem) == 2) \ > > - __asm __volatile (LOCK_PREFIX "orw %w2, %0" \ > > - : "=m" (*mem) \ > > - : "m" (*mem), "ir" (1L << (bit))); \ > > - else if (sizeof (*mem) == 4) \ > > - __asm __volatile (LOCK_PREFIX "orl %2, %0" \ > > - : "=m" (*mem) \ > > - : "m" (*mem), "ir" (1L << (bit))); \ > > - else if (__builtin_constant_p (bit) && (bit) < 32) \ > > - __asm __volatile (LOCK_PREFIX "orq %2, %0" \ > > - : "=m" (*mem) \ > > - : "m" (*mem), "i" (1L << (bit))); \ > > - else if (__HAVE_64B_ATOMICS) \ > > - __asm __volatile (LOCK_PREFIX "orq %q2, %0" \ > > - : "=m" (*mem) \ > > - : "m" (*mem), "r" (1UL << (bit))); \ > > - else \ > > - __atomic_link_error (); \ > > - } while (0) > > - > > - > > -#define atomic_bit_test_set(mem, bit) \ > > - ({ unsigned char __result; \ > > - if (sizeof (*mem) == 1) \ > > - __asm __volatile (LOCK_PREFIX "btsb %3, %1; setc %0" \ > > - : "=q" (__result), "=m" (*mem) \ > > - : "m" (*mem), IBR_CONSTRAINT (bit)); \ > > - else if (sizeof (*mem) == 2) \ > > - __asm __volatile (LOCK_PREFIX "btsw %3, %1; setc %0" \ > > - : "=q" (__result), "=m" (*mem) \ > > - : "m" (*mem), "ir" (bit)); \ > > - else if (sizeof (*mem) == 4) \ > > - __asm __volatile (LOCK_PREFIX "btsl %3, %1; setc %0" \ > > - : "=q" (__result), "=m" (*mem) \ > > - : "m" (*mem), "ir" (bit)); \ > > - else if (__HAVE_64B_ATOMICS) \ > > - __asm __volatile (LOCK_PREFIX "btsq %3, %1; setc %0" \ > > - : "=q" (__result), "=m" (*mem) \ > > - : "m" (*mem), "ir" (bit)); \ > > - else \ > > - __atomic_link_error (); \ > > - __result; }) > > - > > - > > #define __arch_and_body(lock, mem, mask) \ > > do { \ > > if (sizeof (*mem) == 1) \