2011-11-07 Andrew MacLeod libstdc++-v3 * include/bits/atomic_base.h (atomic_thread_fence): Call builtin. (atomic_signal_fence): Call builtin. (atomic_flag::test_and_set): Call __atomic_exchange when it is lockfree, otherwise fall back to call __sync_lock_test_and_set. (atomic_flag::clear): Call __atomic_store when it is lockfree, otherwise fall back to call __sync_lock_release. gcc * doc/extend.texi: Docuemnt behaviour change for __atomic_exchange and __atomic_store. * optabs.c (expand_atomic_exchange): Expand to __sync_lock_test_and_set only when originated from that builtin. (expand_atomic_store): Expand to __sync_lock_release when originated from that builtin. * builtins.c (expand_builtin_sync_lock_test_and_set): Add flag that expand_atomic_exchange call originated from here. (expand_builtin_sync_lock_release): Add flag that expand_atomic_store call originated from here. (expand_builtin_atomic_exchange): Add origination flag. (expand_builtin_atomic_store): Add origination flag. * expr.h (expand_atomic_exchange, expand_atomic_store): Add boolean parameters to indicate implementation fall back options. Index: libstdc++-v3/include/bits/atomic_base.h =================================================================== *** libstdc++-v3/include/bits/atomic_base.h (revision 181031) --- libstdc++-v3/include/bits/atomic_base.h (working copy) *************** _GLIBCXX_BEGIN_NAMESPACE_VERSION *** 69,78 **** } void ! atomic_thread_fence(memory_order) noexcept; void ! atomic_signal_fence(memory_order) noexcept; /// kill_dependency template --- 69,84 ---- } void ! atomic_thread_fence(memory_order __m) noexcept ! { ! __atomic_thread_fence (__m); ! } void ! atomic_signal_fence(memory_order __m) noexcept ! { ! __atomic_signal_fence (__m); ! } /// kill_dependency template *************** _GLIBCXX_BEGIN_NAMESPACE_VERSION *** 261,273 **** bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept { ! return __atomic_exchange_n(&_M_i, 1, __m); } bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept { ! return __atomic_exchange_n(&_M_i, 1, __m); } void --- 267,301 ---- bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept { ! /* The standard *requires* this to be lock free. If exchange is not ! always lock free, the resort to the old test_and_set. */ ! if (__atomic_always_lock_free (sizeof (_M_i), 0)) ! return __atomic_exchange_n(&_M_i, 1, __m); ! else ! { ! /* Sync test and set is only guaranteed to be acquire. */ ! if (__m == memory_order_seq_cst || __m == memory_order_release ! || __m == memory_order_acq_rel) ! atomic_thread_fence (__m); ! return __sync_lock_test_and_set (&_M_i, 1); ! } } bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept { ! /* The standard *requires* this to be lock free. If exchange is not ! always lock free, the resort to the old test_and_set. */ ! if (__atomic_always_lock_free (sizeof (_M_i), 0)) ! return __atomic_exchange_n(&_M_i, 1, __m); ! else ! { ! /* Sync test and set is only guaranteed to be acquire. */ ! if (__m == memory_order_seq_cst || __m == memory_order_release ! || __m == memory_order_acq_rel) ! atomic_thread_fence (__m); ! return __sync_lock_test_and_set (&_M_i, 1); ! } } void *************** _GLIBCXX_BEGIN_NAMESPACE_VERSION *** 277,283 **** __glibcxx_assert(__m != memory_order_acquire); __glibcxx_assert(__m != memory_order_acq_rel); ! __atomic_store_n(&_M_i, 0, __m); } void --- 305,321 ---- __glibcxx_assert(__m != memory_order_acquire); __glibcxx_assert(__m != memory_order_acq_rel); ! /* The standard *requires* this to be lock free. If store is not always ! lock free, the resort to the old style __sync_lock_release. */ ! if (__atomic_always_lock_free (sizeof (_M_i), 0)) ! __atomic_store_n(&_M_i, 0, __m); ! else ! { ! __sync_lock_release (&_M_i, 0); ! /* __sync_lock_release is only guaranteed to be a release barrier. */ ! if (__m == memory_order_seq_cst) ! atomic_thread_fence (__m); ! } } void *************** _GLIBCXX_BEGIN_NAMESPACE_VERSION *** 287,293 **** __glibcxx_assert(__m != memory_order_acquire); __glibcxx_assert(__m != memory_order_acq_rel); ! __atomic_store_n(&_M_i, 0, __m); } }; --- 325,341 ---- __glibcxx_assert(__m != memory_order_acquire); __glibcxx_assert(__m != memory_order_acq_rel); ! /* The standard *requires* this to be lock free. If store is not always ! lock free, the resort to the old style __sync_lock_release. */ ! if (__atomic_always_lock_free (sizeof (_M_i), 0)) ! __atomic_store_n(&_M_i, 0, __m); ! else ! { ! __sync_lock_release (&_M_i, 0); ! /* __sync_lock_release is only guaranteed to be a release barrier. */ ! if (__m == memory_order_seq_cst) ! atomic_thread_fence (__m); ! } } }; Index: gcc/doc/extend.texi =================================================================== *** gcc/doc/extend.texi (revision 181031) --- gcc/doc/extend.texi (working copy) *************** contents of @code{*@var{ptr}} in @code{* *** 6910,6918 **** @deftypefn {Built-in Function} void __atomic_store_n (@var{type} *ptr, @var{type} val, int memmodel) This built-in function implements an atomic store operation. It writes ! @code{@var{val}} into @code{*@var{ptr}}. On targets which are limited, ! 0 may be the only valid value. This mimics the behaviour of ! @code{__sync_lock_release} on such hardware. The valid memory model variants are @code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, and @code{__ATOMIC_RELEASE}. --- 6910,6916 ---- @deftypefn {Built-in Function} void __atomic_store_n (@var{type} *ptr, @var{type} val, int memmodel) This built-in function implements an atomic store operation. It writes ! @code{@var{val}} into @code{*@var{ptr}}. The valid memory model variants are @code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, and @code{__ATOMIC_RELEASE}. *************** This built-in function implements an ato *** 6930,6939 **** @var{val} into @code{*@var{ptr}}, and returns the previous contents of @code{*@var{ptr}}. - On targets which are limited, a value of 1 may be the only valid value - written. This mimics the behaviour of @code{__sync_lock_test_and_set} on - such hardware. - The valid memory model variants are @code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, @code{__ATOMIC_ACQUIRE}, @code{__ATOMIC_RELEASE}, and @code{__ATOMIC_ACQ_REL}. --- 6928,6933 ---- Index: gcc/optabs.c =================================================================== *** gcc/optabs.c (revision 181031) --- gcc/optabs.c (working copy) *************** expand_compare_and_swap_loop (rtx mem, r *** 7256,7265 **** atomically store VAL in MEM and return the previous value in MEM. MEMMODEL is the memory model variant to use. ! TARGET is an option place to stick the return value. */ rtx ! expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model) { enum machine_mode mode = GET_MODE (mem); enum insn_code icode; --- 7256,7268 ---- atomically store VAL in MEM and return the previous value in MEM. MEMMODEL is the memory model variant to use. ! TARGET is an optional place to stick the return value. ! USE_TEST_AND_SET indicates whether __sync_lock_test_and_set should be used ! as a fall back if the atomic_exchange pattern does not exist. */ rtx ! expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model, ! bool use_test_and_set) { enum machine_mode mode = GET_MODE (mem); enum insn_code icode; *************** expand_atomic_exchange (rtx target, rtx *** 7284,7314 **** acquire barrier. If the pattern exists, and the memory model is stronger than acquire, add a release barrier before the instruction. The barrier is not needed if sync_lock_test_and_set doesn't exist since ! it will expand into a compare-and-swap loop. */ ! icode = direct_optab_handler (sync_lock_test_and_set_optab, mode); ! last_insn = get_last_insn (); ! if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || ! model == MEMMODEL_RELEASE || ! model == MEMMODEL_ACQ_REL)) ! expand_builtin_mem_thread_fence (model); ! if (icode != CODE_FOR_nothing) ! { ! struct expand_operand ops[3]; ! create_output_operand (&ops[0], target, mode); ! create_fixed_operand (&ops[1], mem); ! /* VAL may have been promoted to a wider mode. Shrink it if so. */ ! create_convert_operand_to (&ops[2], val, mode, true); ! if (maybe_expand_insn (icode, 3, ops)) ! return ops[0].value; ! } ! /* Remove any fence we may have inserted since a compare and swap loop is a ! full memory barrier. */ ! if (last_insn != get_last_insn ()) ! delete_insns_since (last_insn); /* Otherwise, use a compare-and-swap loop for the exchange. */ if (can_compare_and_swap_p (mode)) --- 7287,7325 ---- acquire barrier. If the pattern exists, and the memory model is stronger than acquire, add a release barrier before the instruction. The barrier is not needed if sync_lock_test_and_set doesn't exist since ! it will expand into a compare-and-swap loop. ! Some targets have non-compliant test_and_sets, so it would be incorrect ! to emit a test_and_set in place of an __atomic_exchange. The test_and_set ! builtin shares this expander since exchange can always replace the ! test_and_set. */ ! ! if (use_test_and_set) ! { ! icode = direct_optab_handler (sync_lock_test_and_set_optab, mode); ! last_insn = get_last_insn (); ! if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || ! model == MEMMODEL_RELEASE || ! model == MEMMODEL_ACQ_REL)) ! expand_builtin_mem_thread_fence (model); ! if (icode != CODE_FOR_nothing) ! { ! struct expand_operand ops[3]; ! create_output_operand (&ops[0], target, mode); ! create_fixed_operand (&ops[1], mem); ! /* VAL may have been promoted to a wider mode. Shrink it if so. */ ! create_convert_operand_to (&ops[2], val, mode, true); ! if (maybe_expand_insn (icode, 3, ops)) ! return ops[0].value; ! } ! /* Remove any fence that was inserted since a compare and swap loop is ! already a full memory barrier. */ ! if (last_insn != get_last_insn ()) ! delete_insns_since (last_insn); ! } /* Otherwise, use a compare-and-swap loop for the exchange. */ if (can_compare_and_swap_p (mode)) *************** expand_atomic_load (rtx target, rtx mem, *** 7489,7498 **** /* This function expands the atomic store operation: Atomically store VAL in MEM. MEMMODEL is the memory model variant to use. function returns const0_rtx if a pattern was emitted. */ rtx ! expand_atomic_store (rtx mem, rtx val, enum memmodel model) { enum machine_mode mode = GET_MODE (mem); enum insn_code icode; --- 7500,7510 ---- /* This function expands the atomic store operation: Atomically store VAL in MEM. MEMMODEL is the memory model variant to use. + USE_RELEASE is true if __sync_lock_release can be used as a fall back. function returns const0_rtx if a pattern was emitted. */ rtx ! expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) { enum machine_mode mode = GET_MODE (mem); enum insn_code icode; *************** expand_atomic_store (rtx mem, rtx val, e *** 7509,7520 **** return const0_rtx; } /* If the size of the object is greater than word size on this target, a default store will not be atomic, Try a mem_exchange and throw away the result. If that doesn't work, don't do anything. */ if (GET_MODE_PRECISION(mode) > BITS_PER_WORD) { ! rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model); if (target) return const0_rtx; else --- 7521,7550 ---- return const0_rtx; } + /* If using __sync_lock_release is a viable alternative, try it. */ + if (use_release) + { + icode = direct_optab_handler (sync_lock_release_optab, mode); + if (icode != CODE_FOR_nothing) + { + create_fixed_operand (&ops[0], mem); + create_input_operand (&ops[1], const0_rtx, mode); + if (maybe_expand_insn (icode, 2, ops)) + { + /* lock_release is only a release barrier. */ + if (model == MEMMODEL_SEQ_CST) + expand_builtin_mem_thread_fence (model); + return const0_rtx; + } + } + } + /* If the size of the object is greater than word size on this target, a default store will not be atomic, Try a mem_exchange and throw away the result. If that doesn't work, don't do anything. */ if (GET_MODE_PRECISION(mode) > BITS_PER_WORD) { ! rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model, false); if (target) return const0_rtx; else Index: gcc/builtins.c =================================================================== *** gcc/builtins.c (revision 181031) --- gcc/builtins.c (working copy) *************** expand_builtin_sync_lock_test_and_set (e *** 5221,5227 **** mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE); } /* Expand the __sync_lock_release intrinsic. EXP is the CALL_EXPR. */ --- 5221,5227 ---- mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE, true); } /* Expand the __sync_lock_release intrinsic. EXP is the CALL_EXPR. */ *************** expand_builtin_sync_lock_release (enum m *** 5234,5240 **** /* Expand the operands. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); ! expand_atomic_store (mem, const0_rtx, MEMMODEL_RELEASE); } /* Given an integer representing an ``enum memmodel'', verify its --- 5234,5240 ---- /* Expand the operands. */ mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); ! expand_atomic_store (mem, const0_rtx, MEMMODEL_RELEASE, true); } /* Given an integer representing an ``enum memmodel'', verify its *************** expand_builtin_atomic_exchange (enum mac *** 5285,5291 **** mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, model); } /* Expand the __atomic_compare_exchange intrinsic: --- 5285,5291 ---- mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_exchange (target, mem, val, model, false); } /* Expand the __atomic_compare_exchange intrinsic: *************** expand_builtin_atomic_store (enum machin *** 5402,5408 **** mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_store (mem, val, model); } /* Expand the __atomic_fetch_XXX intrinsic: --- 5402,5408 ---- mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode); val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode); ! return expand_atomic_store (mem, val, model, false); } /* Expand the __atomic_fetch_XXX intrinsic: Index: gcc/expr.h =================================================================== *** gcc/expr.h (revision 181031) --- gcc/expr.h (working copy) *************** rtx emit_conditional_add (rtx, enum rtx_ *** 215,223 **** rtx expand_sync_operation (rtx, rtx, enum rtx_code); rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx); ! rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel); rtx expand_atomic_load (rtx, rtx, enum memmodel); ! rtx expand_atomic_store (rtx, rtx, enum memmodel); rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, bool); void expand_atomic_thread_fence (enum memmodel); --- 215,223 ---- rtx expand_sync_operation (rtx, rtx, enum rtx_code); rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx); ! rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel, bool); rtx expand_atomic_load (rtx, rtx, enum memmodel); ! rtx expand_atomic_store (rtx, rtx, enum memmodel, bool); rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, bool); void expand_atomic_thread_fence (enum memmodel);