gcc * expr.h (expand_atomic_exchange): Add extra parameter. * builtins.c (expand_builtin_sync_lock_test_and_set): Call expand_atomic_exchange with true. (expand_builtin_atomic_exchange): Call expand_atomic_exchange with false. * optabs.c (expand_atomic_exchange): Add use_test_and_set param and only fall back to __sync_test_and_set when true. (expand_atomic_store): Add false to expand_atomic_exchange call. libstdc++-v3 * include/bits/atomic_base.h (test_and_set): Call __atomic_exchange only if it is always lock free, otherwise __sync_lock_test_and_set. Index: gcc/expr.h =================================================================== *** gcc/expr.h (revision 180770) --- gcc/expr.h (working copy) *************** rtx emit_conditional_add (rtx, enum rtx_ *** 215,221 **** rtx expand_sync_operation (rtx, rtx, enum rtx_code); rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx); ! rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel); rtx expand_atomic_load (rtx, rtx, enum memmodel); rtx expand_atomic_store (rtx, rtx, enum memmodel); rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, --- 215,221 ---- rtx expand_sync_operation (rtx, rtx, enum rtx_code); rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx); ! rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel, bool); rtx expand_atomic_load (rtx, rtx, enum memmodel); rtx expand_atomic_store (rtx, rtx, enum memmodel); rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, Index: gcc/builtins.c =================================================================== *** gcc/builtins.c (revision 180789) --- gcc/builtins.c (working copy) *************** expand_builtin_sync_lock_test_and_set (e *** 5221,5227 **** mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE); } /* Expand the __sync_lock_release intrinsic. EXP is the CALL_EXPR. */ --- 5221,5227 ---- mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE, true); } /* Expand the __sync_lock_release intrinsic. EXP is the CALL_EXPR. */ *************** expand_builtin_atomic_exchange (enum mac *** 5284,5290 **** mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, model); } /* Expand the __atomic_compare_exchange intrinsic: --- 5284,5290 ---- mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, model, false); } /* Expand the __atomic_compare_exchange intrinsic: Index: gcc/optabs.c =================================================================== *** gcc/optabs.c (revision 180770) --- gcc/optabs.c (working copy) *************** expand_compare_and_swap_loop (rtx mem, r *** 6872,6881 **** atomically store VAL in MEM and return the previous value in MEM. MEMMODEL is the memory model variant to use. ! TARGET is an option place to stick the return value. */ rtx ! expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model) { enum machine_mode mode = GET_MODE (mem); enum insn_code icode; --- 6872,6884 ---- atomically store VAL in MEM and return the previous value in MEM. MEMMODEL is the memory model variant to use. ! TARGET is an optional place to stick the return value. ! USE_TEST_AND_SET indicates whether __sync_lock_test_and_set should be used ! as a fall back if the atomic_exchange pattern does not exist. */ rtx ! expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model, ! bool use_test_and_set) { enum machine_mode mode = GET_MODE (mem); enum insn_code icode; *************** expand_atomic_exchange (rtx target, rtx *** 6900,6930 **** acquire barrier. If the pattern exists, and the memory model is stronger than acquire, add a release barrier before the instruction. The barrier is not needed if sync_lock_test_and_set doesn't exist since ! it will expand into a compare-and-swap loop. */ ! icode = direct_optab_handler (sync_lock_test_and_set_optab, mode); ! last_insn = get_last_insn (); ! if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || ! model == MEMMODEL_RELEASE || ! model == MEMMODEL_ACQ_REL)) ! expand_builtin_mem_thread_fence (model); ! if (icode != CODE_FOR_nothing) ! { ! struct expand_operand ops[3]; ! create_output_operand (&ops[0], target, mode); ! create_fixed_operand (&ops[1], mem); ! /* VAL may have been promoted to a wider mode. Shrink it if so. */ ! create_convert_operand_to (&ops[2], val, mode, true); ! if (maybe_expand_insn (icode, 3, ops)) ! return ops[0].value; ! } ! /* Remove any fence we may have inserted since a compare and swap loop is a ! full memory barrier. */ ! if (last_insn != get_last_insn ()) ! delete_insns_since (last_insn); /* Otherwise, use a compare-and-swap loop for the exchange. */ if (can_compare_and_swap_p (mode)) --- 6903,6941 ---- acquire barrier. If the pattern exists, and the memory model is stronger than acquire, add a release barrier before the instruction. The barrier is not needed if sync_lock_test_and_set doesn't exist since ! it will expand into a compare-and-swap loop. ! Some targets have non-compliant test_and_sets, so it would be incorrect ! to emit a test_and_set in place of an __atomic_exchange. The test_and_set ! builtin shares this expander since exchange can always replace the ! test_and_set. */ ! ! if (use_test_and_set) ! { ! icode = direct_optab_handler (sync_lock_test_and_set_optab, mode); ! last_insn = get_last_insn (); ! if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || ! model == MEMMODEL_RELEASE || ! model == MEMMODEL_ACQ_REL)) ! expand_builtin_mem_thread_fence (model); ! if (icode != CODE_FOR_nothing) ! { ! struct expand_operand ops[3]; ! create_output_operand (&ops[0], target, mode); ! create_fixed_operand (&ops[1], mem); ! /* VAL may have been promoted to a wider mode. Shrink it if so. */ ! create_convert_operand_to (&ops[2], val, mode, true); ! if (maybe_expand_insn (icode, 3, ops)) ! return ops[0].value; ! } ! /* Remove any fence that was inserted since a compare and swap loop is ! already a full memory barrier. */ ! if (last_insn != get_last_insn ()) ! delete_insns_since (last_insn); ! } /* Otherwise, use a compare-and-swap loop for the exchange. */ if (can_compare_and_swap_p (mode)) *************** expand_atomic_store (rtx mem, rtx val, e *** 7130,7136 **** the result. If that doesn't work, don't do anything. */ if (GET_MODE_PRECISION(mode) > BITS_PER_WORD) { ! rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model); if (target) return const0_rtx; else --- 7141,7147 ---- the result. If that doesn't work, don't do anything. */ if (GET_MODE_PRECISION(mode) > BITS_PER_WORD) { ! rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model, false); if (target) return const0_rtx; else Index: libstdc++-v3/include/bits/atomic_base.h =================================================================== *** libstdc++-v3/include/bits/atomic_base.h (revision 180789) --- libstdc++-v3/include/bits/atomic_base.h (working copy) *************** _GLIBCXX_BEGIN_NAMESPACE_VERSION *** 261,273 **** bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept { ! return __atomic_exchange_n(&_M_i, 1, __m); } bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept { ! return __atomic_exchange_n(&_M_i, 1, __m); } void --- 261,295 ---- bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept { ! /* The standard *requires* this to be lock free. If exchange is not ! always lock free, the resort to the old test_and_set. */ ! if (__atomic_always_lock_free (sizeof (_M_i), 0)) ! return __atomic_exchange_n(&_M_i, 1, __m); ! else ! { ! /* Sync test and set is only guaranteed to be acquire. */ ! if (__m == memory_order_seq_cst || __m == memory_order_release ! || __m == memory_order_acq_rel) ! atomic_thread_fence (__m); ! return __sync_lock_test_and_set (&_M_i, 1); ! } } bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept { ! /* The standard *requires* this to be lock free. If exchange is not ! always lock free, the resort to the old test_and_set. */ ! if (__atomic_always_lock_free (sizeof (_M_i), 0)) ! return __atomic_exchange_n(&_M_i, 1, __m); ! else ! { ! /* Sync test and set is only guaranteed to be acquire. */ ! if (__m == memory_order_seq_cst || __m == memory_order_release ! || __m == memory_order_acq_rel) ! atomic_thread_fence (__m); ! return __sync_lock_test_and_set (&_M_i, 1); ! } } void