diff --git a/nptl/sysdeps/unix/sysv/linux/sparc/pthread_once.c b/nptl/sysdeps/unix/sysv/linux/sparc/pthread_once.c index 5879f44..f9b0953 100644 --- a/nptl/sysdeps/unix/sysv/linux/sparc/pthread_once.c +++ b/nptl/sysdeps/unix/sysv/linux/sparc/pthread_once.c @@ -28,11 +28,31 @@ clear_once_control (void *arg) { pthread_once_t *once_control = (pthread_once_t *) arg; + /* Reset to the uninitialized state here (see __pthread_once). Also, we + don't need a stronger memory order because we do not need to make any + other of our writes visible to other threads that see this value. */ *once_control = 0; lll_futex_wake (once_control, INT_MAX, LLL_PRIVATE); } +/* This is similar to a lock implementation, but we distinguish between three + states: not yet initialized (0), initialization finished (2), and + initialization in progress (__fork_generation | 1). If in the first state, + threads will try to run the initialization by moving to the second state; + the first thread to do so via a CAS on once_control runs init_routine, + other threads block. + When forking the process, some threads can be interrupted during the second + state; they won't be present in the forked child, so we need to restart + initialization in the child. To distinguish an in-progress initialization + from an interrupted initialization (in which case we need to reclaim the + lock), we look at the fork generation that's part of the second state: We + can reclaim iff it differs from the current fork generation. + XXX: This algorithm has an ABA issue on the fork generation: If an + initialization is interrupted, we then fork 2^30 times (30b of once_control + are used for the fork generation), and try to initialize again, we can + deadlock because we can't distinguish the in-progress and interrupted cases + anymore. */ int __pthread_once (once_control, init_routine) pthread_once_t *once_control; @@ -42,15 +62,26 @@ __pthread_once (once_control, init_routine) { int oldval, val, newval; + /* We need acquire memory order for this load because if the value + signals that initialization has finished, we need to be see any + data modifications done during initialization. */ val = *once_control; + atomic_read_barrier(); do { - /* Check if the initialized has already been done. */ - if ((val & 2) != 0) + /* Check if the initialization has already been done. */ + if (__builtin_expect ((val & 2) != 0, 1)) return 0; oldval = val; - newval = (oldval & 3) | __fork_generation | 1; + /* We try to set the state to in-progress and having the current + fork generation. We don't need atomic accesses for the fork + generation because it's immutable in a particular process, and + forked child processes start with a single thread that modified + the generation. */ + newval = __fork_generation | 1; + /* We need acquire memory order here for the same reason as for the + load from once_control above. */ val = atomic_compare_and_exchange_val_acq (once_control, newval, oldval); } @@ -59,9 +90,10 @@ __pthread_once (once_control, init_routine) /* Check if another thread already runs the initializer. */ if ((oldval & 1) != 0) { - /* Check whether the initializer execution was interrupted - by a fork. */ - if (((oldval ^ newval) & -4) == 0) + /* Check whether the initializer execution was interrupted by a + fork. (We know that for both values, bit 0 is set and bit 1 is + not.) */ + if (oldval == newval) { /* Same generation, some other thread was faster. Wait. */ lll_futex_wait (once_control, newval, LLL_PRIVATE); @@ -79,8 +111,11 @@ __pthread_once (once_control, init_routine) pthread_cleanup_pop (0); - /* Add one to *once_control. */ - atomic_increment (once_control); + /* Mark *once_control as having finished the initialization. We need + release memory order here because we need to synchronize with other + threads that want to use the initialized data. */ + atomic_write_barrier(); + *once_control = 2; /* Wake up all other threads. */ lll_futex_wake (once_control, INT_MAX, LLL_PRIVATE);