public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 2/9] Handle expanding insns with 8 operands.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
@ 2011-10-28  4:08 ` Richard Henderson
  2011-10-28  4:08 ` [PATCH 1/9] Fix thinko in gen_mem_thread_fence operand Richard Henderson
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28  4:08 UTC (permalink / raw)
  To: gcc-patches; +Cc: amacleod

---
 gcc/optabs.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/gcc/optabs.c b/gcc/optabs.c
index b021042..307101b 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -7697,6 +7697,14 @@ maybe_gen_insn (enum insn_code icode, unsigned int nops,
     case 6:
       return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
 			      ops[3].value, ops[4].value, ops[5].value);
+    case 7:
+      return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
+			      ops[3].value, ops[4].value, ops[5].value,
+			      ops[6].value);
+    case 8:
+      return GEN_FCN (icode) (ops[0].value, ops[1].value, ops[2].value,
+			      ops[3].value, ops[4].value, ops[5].value,
+			      ops[6].value, ops[7].value);
     }
   gcc_unreachable ();
 }
-- 
1.7.6.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 3/9] Introduce and use can_compare_and_swap_p.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
                   ` (2 preceding siblings ...)
  2011-10-28  4:08 ` [PATCH 7/9] Update omp for new atomic optabs Richard Henderson
@ 2011-10-28  4:08 ` Richard Henderson
  2011-10-28  5:11 ` [PATCH 9/9] Update ChangeLogs Richard Henderson
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28  4:08 UTC (permalink / raw)
  To: gcc-patches; +Cc: amacleod

---
 gcc/builtins.c |    7 +++----
 gcc/omp-low.c  |    3 +--
 gcc/optabs.c   |   27 +++++++++++++++++++++++----
 gcc/optabs.h   |    3 +++
 4 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 3367b50..cb9da83 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5452,12 +5452,11 @@ fold_builtin_atomic_always_lock_free (tree arg)
      of the pattern indicates support is present.  */
   size = INTVAL (expand_normal (arg)) * BITS_PER_UNIT;
   mode = mode_for_size (size, MODE_INT, 0);
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
 
-  if (icode == CODE_FOR_nothing)
+  if (can_compare_and_swap_p (mode))
+    return integer_one_node;
+  else
     return integer_zero_node;
-
-  return integer_one_node;
 }
 
 /* Return true if the first argument to call EXP represents a size of
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 05a3493..5faa084 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -5190,8 +5190,7 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
   type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
   itype = TREE_TYPE (TREE_TYPE (cmpxchg));
 
-  if (direct_optab_handler (sync_compare_and_swap_optab, TYPE_MODE (itype))
-      == CODE_FOR_nothing)
+  if (!can_compare_and_swap_p (TYPE_MODE (itype)))
     return false;
 
   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 307101b..9b4d6cf 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6778,6 +6778,27 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
 }
 
 \f
+/* Return true if there is a compare_and_swap pattern.  */
+
+bool
+can_compare_and_swap_p (enum machine_mode mode)
+{
+  enum insn_code icode;
+
+  /* Check for __sync_compare_and_swap.  */
+  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+      return true;
+
+  /* Check for __atomic_compare_and_swap.  */
+  icode = direct_optab_handler (atomic_compare_and_swap_optab, mode);
+  if (icode != CODE_FOR_nothing)
+      return true;
+
+  /* No inline compare and swap.  */
+  return false;
+}
+
 /* This is an internal subroutine of the other compare_and_swap expanders.
    MEM, OLD_VAL and NEW_VAL are as you'd expect for a compare-and-swap
    operation.  TARGET is an optional place to store the value result of
@@ -7014,8 +7035,7 @@ expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
     delete_insns_since (last_insn);
 
   /* Otherwise, use a compare-and-swap loop for the exchange.  */
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-      != CODE_FOR_nothing)
+  if (can_compare_and_swap_p (mode))
     {
       if (!target || !register_operand (target, mode))
 	target = gen_reg_rtx (mode);
@@ -7451,8 +7471,7 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
     }
 
   /* If nothing else has succeeded, default to a compare and swap loop.  */
-  if (direct_optab_handler (sync_compare_and_swap_optab, mode)
-        != CODE_FOR_nothing)
+  if (can_compare_and_swap_p (mode))
     {
       rtx insn;
       rtx t0 = gen_reg_rtx (mode), t1;
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 84e18e6..eabc994 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -952,6 +952,9 @@ extern void expand_float (rtx, rtx, int);
 /* Return the insn_code for a FLOAT_EXPR.  */
 enum insn_code can_float_p (enum machine_mode, enum machine_mode, int);
 
+/* Return true if there is an inline compare and swap pattern.  */
+extern bool can_compare_and_swap_p (enum machine_mode);
+
 /* Generate code for a FIX_EXPR.  */
 extern void expand_fix (rtx, rtx, int);
 
-- 
1.7.6.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs.
@ 2011-10-28  4:08 Richard Henderson
  2011-10-28  4:08 ` [PATCH 2/9] Handle expanding insns with 8 operands Richard Henderson
                   ` (10 more replies)
  0 siblings, 11 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28  4:08 UTC (permalink / raw)
  To: gcc-patches; +Cc: amacleod, jakub

This exposed a wealth of problems in code that has heretofore never
been tested.  The fourth patch makes certain that all expansions of
compare-and-swap go through a single routine.

I've tested the whole series with and without the last patch.  So
that I've tested both the sync_ and atomic_ paths.  I've not attempted
to test if both are present.  I rather assume that'll never be the
case for any target.

Jakub, in the seventh patch, is there any good reason why OMP is
making the decision of whether or not to generate a compare-and-swap
loop?  Why shouldn't we simply always generate the __sync_fetch_op
builtin and let optabs.c generate the compare-and-swap loop?

I'm going to commit this to the branch, but an extra set of eyes
looking at the sync.md changes.


r~


Richard Henderson (9):
  Fix thinko in gen_mem_thread_fence operand.
  Handle expanding insns with 8 operands.
  Introduce and use can_compare_and_swap_p.
  Rewrite all compare-and-swap in terms of
    expand_atomic_compare_and_swap.
  Add missing atomic optab initializations.
  Update cppbuiltins for atomic-compare-and-swap.
  Update omp for new atomic optabs.
  Convert i386 backend to new atomic patterns.
  Update ChangeLogs.

 gcc/ChangeLog.mm            |   58 ++++++
 gcc/builtins.c              |   53 ++++--
 gcc/c-family/ChangeLog.mm   |    4 +
 gcc/c-family/c-cppbuiltin.c |   50 ++++--
 gcc/config/i386/i386.md     |    5 +-
 gcc/config/i386/sync.md     |  306 +++++++++++++++++---------------
 gcc/expr.h                  |    4 -
 gcc/genopinit.c             |    6 +
 gcc/omp-low.c               |   58 +++---
 gcc/optabs.c                |  422 +++++++++++++++++--------------------------
 gcc/optabs.h                |   20 ++
 11 files changed, 515 insertions(+), 471 deletions(-)

-- 
1.7.6.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 7/9] Update omp for new atomic optabs.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
  2011-10-28  4:08 ` [PATCH 2/9] Handle expanding insns with 8 operands Richard Henderson
  2011-10-28  4:08 ` [PATCH 1/9] Fix thinko in gen_mem_thread_fence operand Richard Henderson
@ 2011-10-28  4:08 ` Richard Henderson
  2011-10-28  4:08 ` [PATCH 3/9] Introduce and use can_compare_and_swap_p Richard Henderson
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28  4:08 UTC (permalink / raw)
  To: gcc-patches; +Cc: amacleod, jakub

Cc: jakub@redhat.com
---
 gcc/omp-low.c |   55 +++++++++++------------
 gcc/optabs.c  |  134 +++++++++++++++++++--------------------------------------
 gcc/optabs.h  |   13 ++++++
 3 files changed, 84 insertions(+), 118 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 5faa084..972cb6d 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -5009,13 +5009,16 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
 {
   enum built_in_function oldbase, newbase, tmpbase;
   tree decl, itype, call;
-  direct_optab optab, oldoptab, newoptab;
+  const struct atomic_op_functions *optab;
   tree lhs, rhs;
   basic_block store_bb = single_succ (load_bb);
   gimple_stmt_iterator gsi;
   gimple stmt;
   location_t loc;
   bool need_old, need_new;
+  enum rtx_code r_code;
+  enum machine_mode imode;
+  bool have_old, have_new, have_noval;
 
   /* We expect to find the following sequences:
 
@@ -5053,41 +5056,33 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
     case POINTER_PLUS_EXPR:
       oldbase = BUILT_IN_SYNC_FETCH_AND_ADD_N;
       newbase = BUILT_IN_SYNC_ADD_AND_FETCH_N;
-      optab = sync_add_optab;
-      oldoptab = sync_old_add_optab;
-      newoptab = sync_new_add_optab;
+      r_code = PLUS;
       break;
     case MINUS_EXPR:
       oldbase = BUILT_IN_SYNC_FETCH_AND_SUB_N;
       newbase = BUILT_IN_SYNC_SUB_AND_FETCH_N;
-      optab = sync_add_optab;
-      oldoptab = sync_old_add_optab;
-      newoptab = sync_new_add_optab;
+      r_code = MINUS;
       break;
     case BIT_AND_EXPR:
       oldbase = BUILT_IN_SYNC_FETCH_AND_AND_N;
       newbase = BUILT_IN_SYNC_AND_AND_FETCH_N;
-      optab = sync_and_optab;
-      oldoptab = sync_old_and_optab;
-      newoptab = sync_new_and_optab;
+      r_code = AND;
       break;
     case BIT_IOR_EXPR:
       oldbase = BUILT_IN_SYNC_FETCH_AND_OR_N;
       newbase = BUILT_IN_SYNC_OR_AND_FETCH_N;
-      optab = sync_ior_optab;
-      oldoptab = sync_old_ior_optab;
-      newoptab = sync_new_ior_optab;
+      r_code = IOR;
       break;
     case BIT_XOR_EXPR:
       oldbase = BUILT_IN_SYNC_FETCH_AND_XOR_N;
       newbase = BUILT_IN_SYNC_XOR_AND_FETCH_N;
-      optab = sync_xor_optab;
-      oldoptab = sync_old_xor_optab;
-      newoptab = sync_new_xor_optab;
+      r_code = XOR;
       break;
     default:
       return false;
     }
+  optab = get_atomic_op_for_code (r_code);
+
   /* Make sure the expression is of the proper form.  */
   if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
     rhs = gimple_assign_rhs2 (stmt);
@@ -5103,31 +5098,33 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
   if (decl == NULL_TREE)
     return false;
   itype = TREE_TYPE (TREE_TYPE (decl));
+  imode = TYPE_MODE (itype);
+
+  have_new =
+    (direct_optab_handler (optab->mem_fetch_after, imode) == CODE_FOR_nothing
+     || direct_optab_handler (optab->fetch_after, imode) == CODE_FOR_nothing);
+  have_old =
+    (direct_optab_handler (optab->mem_fetch_before, imode) == CODE_FOR_nothing
+     || direct_optab_handler (optab->fetch_before, imode) == CODE_FOR_nothing);
+  have_noval =
+    (direct_optab_handler (optab->mem_no_result, imode) == CODE_FOR_nothing
+     || direct_optab_handler (optab->no_result, imode) == CODE_FOR_nothing);
 
   if (need_new)
     {
       /* expand_sync_fetch_operation can always compensate when interested
 	 in the new value.  */
-      if (direct_optab_handler (newoptab, TYPE_MODE (itype))
-	  == CODE_FOR_nothing
-	  && direct_optab_handler (oldoptab, TYPE_MODE (itype))
-	     == CODE_FOR_nothing)
+      if (!have_new && !have_old)
 	return false;
     }
   else if (need_old)
     {
       /* When interested in the old value, expand_sync_fetch_operation
-	 can compensate only if the operation is reversible.  AND and OR
-	 are not reversible.  */
-      if (direct_optab_handler (oldoptab, TYPE_MODE (itype))
-	  == CODE_FOR_nothing
-	  && (oldbase == BUILT_IN_SYNC_FETCH_AND_AND_N
-	      || oldbase == BUILT_IN_SYNC_FETCH_AND_OR_N
-	      || direct_optab_handler (newoptab, TYPE_MODE (itype))
-		 == CODE_FOR_nothing))
+	 can compensate only if the operation is reversible.  */
+      if (!have_old && !(have_new && optab->reverse_code != UNKNOWN))
 	return false;
     }
-  else if (direct_optab_handler (optab, TYPE_MODE (itype)) == CODE_FOR_nothing)
+  else if (!have_noval && !have_new && !have_old)
     return false;
 
   gsi = gsi_last_bb (load_bb);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index b7c00be..f594226 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -7172,70 +7172,48 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model)
 
 /* Structure containing the pointers and values required to process the
    various forms of the atomic_fetch_op and atomic_op_fetch builtins.  */
-struct op_functions {
-  struct direct_optab_d *mem_fetch_before;
-  struct direct_optab_d *mem_fetch_after;
-  struct direct_optab_d *mem_no_result;
-  struct direct_optab_d *fetch_before;
-  struct direct_optab_d *fetch_after;
-  struct direct_optab_d *no_result;
-  enum rtx_code reverse_code;
-};
 
-/* Initialize the fields for each supported opcode.  */
-static const struct op_functions add_op = { atomic_fetch_add_optab,
-					    atomic_add_fetch_optab,
-					    atomic_add_optab,
-					    sync_old_add_optab,
-					    sync_new_add_optab,
-					    sync_add_optab,
-					    MINUS
-				    };
-
-static const struct op_functions sub_op = { atomic_fetch_sub_optab,
-					    atomic_sub_fetch_optab,
-					    atomic_sub_optab,
-					    sync_old_sub_optab,
-					    sync_new_sub_optab,
-					    sync_sub_optab,
-					    PLUS
-					  };
-
-static const struct op_functions xor_op = { atomic_fetch_xor_optab,
-					    atomic_xor_fetch_optab,
-					    atomic_xor_optab,
-					    sync_old_xor_optab,
-					    sync_new_xor_optab,
-					    sync_xor_optab,
-					    UNKNOWN
-					  };
-
-static const struct op_functions and_op = { atomic_fetch_and_optab,
-					    atomic_and_fetch_optab,
-					    atomic_and_optab,
-					    sync_old_and_optab,
-					    sync_new_and_optab,
-					    sync_and_optab,
-					    UNKNOWN
-					  };
-
-static const struct op_functions nand_op = { atomic_fetch_nand_optab,
-					     atomic_nand_fetch_optab,
-					     atomic_nand_optab,
-					     sync_old_nand_optab,
-					     sync_new_nand_optab,
-					     sync_nand_optab,
-					     UNKNOWN
-					   };
-
-static const struct op_functions or_op = { atomic_fetch_or_optab,
-					   atomic_or_fetch_optab,
-					   atomic_or_optab,
-					   sync_old_ior_optab,
-					   sync_new_ior_optab,
-					   sync_ior_optab,
-					   UNKNOWN
-					 };
+const struct atomic_op_functions *
+get_atomic_op_for_code (enum rtx_code code)
+{
+  static const struct atomic_op_functions add_op = {
+    atomic_fetch_add_optab, atomic_add_fetch_optab, atomic_add_optab,
+    sync_old_add_optab, sync_new_add_optab, sync_add_optab, MINUS
+  }, sub_op = {
+    atomic_fetch_sub_optab, atomic_sub_fetch_optab, atomic_sub_optab,
+    sync_old_sub_optab, sync_new_sub_optab, sync_sub_optab, PLUS
+  }, xor_op = {
+    atomic_fetch_xor_optab, atomic_xor_fetch_optab, atomic_xor_optab,
+    sync_old_xor_optab, sync_new_xor_optab, sync_xor_optab, XOR
+  }, and_op = {
+    atomic_fetch_and_optab, atomic_and_fetch_optab, atomic_and_optab,
+    sync_old_and_optab, sync_new_and_optab, sync_and_optab, UNKNOWN
+  }, nand_op = {
+    atomic_fetch_nand_optab, atomic_nand_fetch_optab, atomic_nand_optab,
+    sync_old_nand_optab, sync_new_nand_optab, sync_nand_optab, UNKNOWN
+  }, ior_op = {
+    atomic_fetch_or_optab, atomic_or_fetch_optab, atomic_or_optab,
+    sync_old_ior_optab, sync_new_ior_optab, sync_ior_optab, UNKNOWN
+  };
+
+  switch (code)
+    {
+    case PLUS:
+      return &add_op;
+    case MINUS:
+      return &sub_op;
+    case XOR:
+      return &xor_op;
+    case AND:
+      return &and_op;
+    case IOR:
+      return &ior_op;
+    case NOT:
+      return &nand_op;
+    default:
+      gcc_unreachable ();
+    }
+}
 
 /* Try to emit an instruction for a specific operation varaition. 
    OPTAB contains the OP functions.
@@ -7247,8 +7225,8 @@ static const struct op_functions or_op = { atomic_fetch_or_optab,
    AFTER is true if the returned result is the value after the operation.  */
 
 static rtx 
-maybe_emit_op (const struct op_functions *optab, rtx target, rtx mem, rtx val,
-	    bool use_memmodel, enum memmodel model, bool after)
+maybe_emit_op (const struct atomic_op_functions *optab, rtx target, rtx mem,
+	       rtx val, bool use_memmodel, enum memmodel model, bool after)
 {
   enum machine_mode mode = GET_MODE (mem);
   struct direct_optab_d *this_optab;
@@ -7317,33 +7295,11 @@ expand_atomic_fetch_op (rtx target, rtx mem, rtx val, enum rtx_code code,
 			enum memmodel model, bool after)
 {
   enum machine_mode mode = GET_MODE (mem);
-  const struct op_functions *optab;
+  const struct atomic_op_functions *optab;
   rtx result;
   bool unused_result = (target == const0_rtx);
 
-  switch (code)
-    {
-      case PLUS:
-        optab = &add_op;
-        break;
-      case MINUS:
-        optab = &sub_op;
-        break;
-      case AND:
-        optab = &and_op;
-        break;
-      case XOR:
-        optab = &xor_op;
-        break;
-      case IOR:
-        optab = &or_op;
-        break;
-      case NOT:
-        optab = &nand_op;
-        break;
-      default:
-        gcc_unreachable();
-    }
+  optab = get_atomic_op_for_code (code);
 
   /* Check for the case where the result isn't used and try those patterns.  */
   if (unused_result)
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 2ca0fcd..4487f52 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -955,6 +955,19 @@ enum insn_code can_float_p (enum machine_mode, enum machine_mode, int);
 /* Return true if there is an inline compare and swap pattern.  */
 extern bool can_compare_and_swap_p (enum machine_mode);
 
+struct atomic_op_functions
+{
+  struct direct_optab_d *mem_fetch_before;
+  struct direct_optab_d *mem_fetch_after;
+  struct direct_optab_d *mem_no_result;
+  struct direct_optab_d *fetch_before;
+  struct direct_optab_d *fetch_after;
+  struct direct_optab_d *no_result;
+  enum rtx_code reverse_code;
+};
+
+extern const struct atomic_op_functions *get_atomic_op_for_code (enum rtx_code);
+
 /* Generate code for a compare and swap.  */
 extern bool expand_atomic_compare_and_swap (rtx *, rtx *, rtx, rtx, rtx, bool,
 					    enum memmodel, enum memmodel);
-- 
1.7.6.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/9] Fix thinko in gen_mem_thread_fence operand.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
  2011-10-28  4:08 ` [PATCH 2/9] Handle expanding insns with 8 operands Richard Henderson
@ 2011-10-28  4:08 ` Richard Henderson
  2011-10-28  4:08 ` [PATCH 7/9] Update omp for new atomic optabs Richard Henderson
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28  4:08 UTC (permalink / raw)
  To: gcc-patches; +Cc: amacleod

---
 gcc/builtins.c |    7 ++++---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index f47c3b1..3367b50 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5523,11 +5523,12 @@ expand_builtin_atomic_is_lock_free (tree exp)
 void
 expand_builtin_mem_thread_fence (enum memmodel model)
 {
+  if (model == MEMMODEL_RELAXED)
+    return;
 #ifdef HAVE_mem_thread_fence
-  emit_insn (gen_mem_thread_fence (memmodel));
+  emit_insn (gen_mem_thread_fence (GEN_INT (model)));
 #else
-  if (model != MEMMODEL_RELAXED)
-    expand_builtin_sync_synchronize ();
+  expand_builtin_sync_synchronize ();
 #endif
 }
 
-- 
1.7.6.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 9/9] Update ChangeLogs.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
                   ` (3 preceding siblings ...)
  2011-10-28  4:08 ` [PATCH 3/9] Introduce and use can_compare_and_swap_p Richard Henderson
@ 2011-10-28  5:11 ` Richard Henderson
  2011-10-28  5:11 ` [PATCH 6/9] Update cppbuiltins for atomic-compare-and-swap Richard Henderson
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28  5:11 UTC (permalink / raw)
  To: gcc-patches; +Cc: amacleod

---
 gcc/ChangeLog.mm          |   58 +++++++++++++++++++++++++++++++++++++++++++++
 gcc/c-family/ChangeLog.mm |    4 +++
 2 files changed, 62 insertions(+), 0 deletions(-)

diff --git a/gcc/ChangeLog.mm b/gcc/ChangeLog.mm
index 498cc15..758b510 100644
--- a/gcc/ChangeLog.mm
+++ b/gcc/ChangeLog.mm
@@ -1,3 +1,61 @@
+2011-10-27  Richard Henderson  <rth@redhat.com>
+
+	* config/i386/i386.md (UNSPECV_CMPXCHG): Split into ...
+	(UNSPECV_CMPXCHG_1, UNSPECV_CMPXCHG_2,
+	UNSPECV_CMPXCHG_3, UNSPECV_CMPXCHG_4): New.
+	* config/i386/sync.md (mem_thread_fence): Rename from memory_barrier.
+	Handle the added memory model parameter.
+	(mfence_nosse): Rename from memory_barrier_nosse.
+	(sync_compare_and_swap<CASMODE>): Split into ...
+	(atomic_compare_and_swap<SWI124>): this and ...
+	(atomic_compare_and_swap<CASMODE>): this.  Handle the new parameters.
+	(atomic_compare_and_swap_single<SWI>): Rename from
+	sync_compare_and_swap<SWI>; rewrite to use split unspecs.
+	(atomic_compare_and_swap_double<DCASMODE>): Rename from
+	sync_double_compare_and_swap<DCASMODE>; rewrite to use split unspecs.
+	(*atomic_compare_and_swap_doubledi_pic): Rename from
+	sync_double_compare_and_swapdi_pic; rewrite to use split unspecs.
+	(atomic_fetch_add<SWI>): Rename from sync_old_add<SWI>; add memory
+	model parameter.
+	(*atomic_fetch_add_cmp<SWI>): Similarly.
+	(atomic_add<SWI>, atomic<any_logic><SWI>): Similarly.
+	(atomic_sub<SWI>): Similarly.  Use x86_maybe_negate_const_int.
+	(sync_lock_test_and_set<SWI>): Merge with ...
+	(atomic_exchange<SWI>): ... this.
+
+	* optabs.c (get_atomic_op_for_code): Split out from ...
+	(maybe_emit_op): ... here.
+	* optabs.h (struct atomic_op_functions): Move from optabs.c and
+	rename from struct op_functions.
+	(get_atomic_op_for_code): Declare.
+	* omp-low.c (expand_omp_atomic_fetch_op): Use get_atomic_op_for_code
+	and test both atomic and sync optabs.
+
+	* genopinit.c (optabs): Add atomic_add, atomic_sub, atomic_and,
+	atomic_nand, atomic_xor, atomic_or.
+
+	* optabs.c (expand_val_compare_and_swap_1): Remove.
+	(expand_val_compare_and_swap, expand_bool_compare_and_swap): Remove.
+	(expand_atomic_compare_and_swap): Rename from
+	expand_atomic_compare_exchange.  Rewrite to return both success and
+	oldval return values; expand via both atomic and sync optabs.
+	(expand_compare_and_swap_loop): Use expand_atomic_compare_and_swap.
+	(expand_atomic_load): Likewise.
+	* builtins.c (expand_builtin_compare_and_swap): Likewise.
+	(expand_builtin_atomic_compare_exchange): Likewise.
+	* expr.h, optabs.h: Update.
+
+	* optabs.c (can_compare_and_swap_p): New.
+	(expand_atomic_exchange, expand_atomic_fetch_op): Use it.
+	* builtins.c (fold_builtin_atomic_always_lock_free): Likewise.
+	* omp-low.c (expand_omp_atomic_pipeline): Likewise.
+	* optabs.h: Update.
+
+	* optabs.c (maybe_gen_insn): Handle 8 operands.
+
+	* builtins.c (expand_builtin_mem_thread_fence): Fixup thinko in
+	mem_thread_fence case.
+
 2011-10-26  Andrew MacLeod  <amacleod@redhat.com>
 
 	* builtins.c (expand_builtin_atomic_fetch_op): External calls for
diff --git a/gcc/c-family/ChangeLog.mm b/gcc/c-family/ChangeLog.mm
index f23e1c0..5845d68 100644
--- a/gcc/c-family/ChangeLog.mm
+++ b/gcc/c-family/ChangeLog.mm
@@ -1,3 +1,7 @@
+2011-10-27  Richard Henderson  <rth@redhat.com>
+
+	* c-cppbuiltin.c (c_cpp_builtins): Test both atomic and sync patterns.
+
 2011-10-25  Andrew MacLeod  <amacleod@redhat.com>
 
 	* c-common.c (get_atomic_generic_size): New.  Find size of generic
-- 
1.7.6.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 6/9] Update cppbuiltins for atomic-compare-and-swap.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
                   ` (4 preceding siblings ...)
  2011-10-28  5:11 ` [PATCH 9/9] Update ChangeLogs Richard Henderson
@ 2011-10-28  5:11 ` Richard Henderson
  2011-10-28  5:20 ` [PATCH 8/9] Convert i386 backend to new atomic patterns Richard Henderson
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28  5:11 UTC (permalink / raw)
  To: gcc-patches; +Cc: amacleod

---
 gcc/c-family/c-cppbuiltin.c |   50 ++++++++++++++++++++++++++++++-------------
 1 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index bb9893a..bf83c26 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -758,30 +758,50 @@ c_cpp_builtins (cpp_reader *pfile)
 
   /* Tell source code if the compiler makes sync_compare_and_swap
      builtins available.  */
-#ifdef HAVE_sync_compare_and_swapqi
-  if (HAVE_sync_compare_and_swapqi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+#ifndef HAVE_sync_compare_and_swapqi
+#define HAVE_sync_compare_and_swapqi 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapqi
+#define HAVE_atomic_compare_and_swapqi 0
 #endif
+  if (HAVE_sync_compare_and_swapqi || HAVE_atomic_compare_and_swapqi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
 
-#ifdef HAVE_sync_compare_and_swaphi
-  if (HAVE_sync_compare_and_swaphi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+#ifndef HAVE_sync_compare_and_swaphi
+#define HAVE_sync_compare_and_swaphi 0
 #endif
+#ifndef HAVE_atomic_compare_and_swaphi
+#define HAVE_atomic_compare_and_swaphi 0
+#endif
+  if (HAVE_sync_compare_and_swaphi || HAVE_atomic_compare_and_swaphi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
 
-#ifdef HAVE_sync_compare_and_swapsi
-  if (HAVE_sync_compare_and_swapsi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+#ifndef HAVE_sync_compare_and_swapsi
+#define HAVE_sync_compare_and_swapsi 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapsi
+#define HAVE_atomic_compare_and_swapsi 0
 #endif
+  if (HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
 
-#ifdef HAVE_sync_compare_and_swapdi
-  if (HAVE_sync_compare_and_swapdi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+#ifndef HAVE_sync_compare_and_swapdi
+#define HAVE_sync_compare_and_swapdi 0
 #endif
+#ifndef HAVE_atomic_compare_and_swapdi
+#define HAVE_atomic_compare_and_swapdi 0
+#endif
+  if (HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
 
-#ifdef HAVE_sync_compare_and_swapti
-  if (HAVE_sync_compare_and_swapti)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
+#ifndef HAVE_sync_compare_and_swapti
+#define HAVE_sync_compare_and_swapti 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapti
+#define HAVE_atomic_compare_and_swapti 0
 #endif
+  if (HAVE_sync_compare_and_swapti || HAVE_atomic_compare_and_swapti)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
 
 #ifdef DWARF2_UNWIND_INFO
   if (dwarf2out_do_cfi_asm ())
-- 
1.7.6.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 8/9] Convert i386 backend to new atomic patterns.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
                   ` (5 preceding siblings ...)
  2011-10-28  5:11 ` [PATCH 6/9] Update cppbuiltins for atomic-compare-and-swap Richard Henderson
@ 2011-10-28  5:20 ` Richard Henderson
  2011-10-28  5:30 ` [PATCH 5/9] Add missing atomic optab initializations Richard Henderson
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28  5:20 UTC (permalink / raw)
  To: gcc-patches; +Cc: amacleod, jakub, ubizjak

Cc: jakub@redhat.com
Cc: ubizjak@gmail.com
---
 gcc/config/i386/i386.md |    5 +-
 gcc/config/i386/sync.md |  306 +++++++++++++++++++++++++----------------------
 2 files changed, 167 insertions(+), 144 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a11a71b..7ce57d8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -262,7 +262,10 @@
   UNSPECV_ALIGN
   UNSPECV_MONITOR
   UNSPECV_MWAIT
-  UNSPECV_CMPXCHG
+  UNSPECV_CMPXCHG_1
+  UNSPECV_CMPXCHG_2
+  UNSPECV_CMPXCHG_3
+  UNSPECV_CMPXCHG_4
   UNSPECV_XCHG
   UNSPECV_LOCK
   UNSPECV_PROLOGUE_USE
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 1044255..e5579b1 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -18,31 +18,27 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_mode_iterator CASMODE
-  [QI HI SI (DI "TARGET_64BIT || TARGET_CMPXCHG8B")
-	    (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
-(define_mode_iterator DCASMODE
-  [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
-   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
-(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
-(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
-
-(define_expand "memory_barrier"
-  [(set (match_dup 0)
-	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+(define_expand "mem_thread_fence"
+  [(match_operand:SI 0 "const_int_operand" "")]		;; model
   ""
 {
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
+  /* Unless this is a SEQ_CST fence, the i386 memory model is strong
+     enough not to require barriers of any kind.  */
+  if (INTVAL (operands[0]) != MEMMODEL_SEQ_CST)
+    DONE;
 
-  if (!(TARGET_64BIT || TARGET_SSE2))
+  if (TARGET_64BIT || TARGET_SSE2)
+    emit_insn (gen_sse2_mfence ());
+  else
     {
-      emit_insn (gen_memory_barrier_nosse (operands[0]));
-      DONE;
+      rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+      MEM_VOLATILE_P (mem) = 1;
+      emit_insn (gen_mfence_nosse (mem));
     }
+  DONE;
 })
 
-(define_insn "memory_barrier_nosse"
+(define_insn "mfence_nosse"
   [(set (match_operand:BLK 0 "" "")
 	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
    (clobber (reg:CC FLAGS_REG))]
@@ -50,127 +46,152 @@
   "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
   [(set_attr "memory" "unknown")])
 
-;; ??? It would be possible to use cmpxchg8b on pentium for DImode
-;; changes.  It's complicated because the insn uses ecx:ebx as the
-;; new value; note that the registers are reversed from the order
-;; that they'd be in with (reg:DI 2 ecx).  Similarly for TImode
-;; data in 64-bit mode.
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand" "")		;; bool success output
+   (match_operand:SWI124 1 "register_operand" "")	;; oldval output
+   (match_operand:SWI124 2 "memory_operand" "")		;; memory
+   (match_operand:SWI124 3 "register_operand" "")	;; expected input
+   (match_operand:SWI124 4 "register_operand" "")	;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
+  "TARGET_CMPXCHG"
+{
+  emit_insn (gen_atomic_compare_and_swap_single<mode>
+	     (operands[1], operands[2], operands[3], operands[4]));
+  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
+		     const0_rtx);
+  DONE;
+})
 
-(define_expand "sync_compare_and_swap<mode>"
-  [(parallel
-    [(set (match_operand:CASMODE 0 "register_operand" "")
-	  (match_operand:CASMODE 1 "memory_operand" ""))
-     (set (match_dup 1)
-	  (unspec_volatile:CASMODE
-	    [(match_dup 1)
-	     (match_operand:CASMODE 2 "register_operand" "")
-	     (match_operand:CASMODE 3 "register_operand" "")]
-	    UNSPECV_CMPXCHG))
-   (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:CASMODE
-            [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
-          (match_dup 2)))])]
+(define_mode_iterator CASMODE
+  [(DI "TARGET_64BIT || TARGET_CMPXCHG8B")
+   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_iterator DCASMODE
+  [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
+   (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
+(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:QI 0 "register_operand" "")		;; bool success output
+   (match_operand:CASMODE 1 "register_operand" "")	;; oldval output
+   (match_operand:CASMODE 2 "memory_operand" "")	;; memory
+   (match_operand:CASMODE 3 "register_operand" "")	;; expected input
+   (match_operand:CASMODE 4 "register_operand" "")	;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
   "TARGET_CMPXCHG"
 {
-  if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode)
+  if (<MODE>mode == DImode && TARGET_64BIT)
+    {
+      emit_insn (gen_atomic_compare_and_swap_singledi
+		 (operands[1], operands[2], operands[3], operands[4]));
+    }
+  else
     {
-      enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode;
-      rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0);
-      rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode,
-				      GET_MODE_SIZE (hmode));
-      low = force_reg (hmode, low);
-      high = force_reg (hmode, high);
-      if (<MODE>mode == DImode)
-	{
-	  if (flag_pic && !cmpxchg8b_pic_memory_operand (operands[1], DImode))
-	    operands[1] = replace_equiv_address (operands[1],
-						 force_reg (Pmode,
-							    XEXP (operands[1],
-								  0)));
-	  emit_insn (gen_sync_double_compare_and_swapdi
-		     (operands[0], operands[1], operands[2], low, high));
-	}
-      else if (<MODE>mode == TImode)
-	emit_insn (gen_sync_double_compare_and_swapti
-		   (operands[0], operands[1], operands[2], low, high));
-      else
-	gcc_unreachable ();
-      DONE;
+      enum machine_mode hmode = <DCASHMODE>mode;
+      rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n, mem;
+
+      lo_o = operands[1];
+      mem  = operands[2];
+      lo_e = operands[3];
+      lo_n = operands[4];
+      hi_o = gen_highpart (hmode, lo_o);
+      hi_e = gen_highpart (hmode, lo_e);
+      hi_n = gen_highpart (hmode, lo_n);
+      lo_o = gen_lowpart (hmode, lo_o);
+      lo_e = gen_lowpart (hmode, lo_e);
+      lo_n = gen_lowpart (hmode, lo_n);
+
+      if (<MODE>mode == DImode
+	  && !TARGET_64BIT
+	  && flag_pic
+	  && !cmpxchg8b_pic_memory_operand (mem, DImode))
+	mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
+
+      emit_insn (gen_atomic_compare_and_swap_double<mode>
+		 (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n));
     }
+  ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
+		     const0_rtx);
+  DONE;
 })
 
-(define_insn "*sync_compare_and_swap<mode>"
+(define_insn "atomic_compare_and_swap_single<mode>"
   [(set (match_operand:SWI 0 "register_operand" "=a")
-	(match_operand:SWI 1 "memory_operand" "+m"))
-   (set (match_dup 1)
 	(unspec_volatile:SWI
-	  [(match_dup 1)
-	   (match_operand:SWI 2 "register_operand" "a")
+	  [(match_operand:SWI 1 "memory_operand" "+m")
+	   (match_operand:SWI 2 "register_operand" "0")
 	   (match_operand:SWI 3 "register_operand" "<r>")]
-	  UNSPECV_CMPXCHG))
+	  UNSPECV_CMPXCHG_1))
+   (set (match_dup 1)
+	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG_2))
    (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:SWI
-            [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
-          (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_3))]
   "TARGET_CMPXCHG"
   "lock{%;} cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
 
-(define_insn "sync_double_compare_and_swap<mode>"
-  [(set (match_operand:DCASMODE 0 "register_operand" "=A")
-	(match_operand:DCASMODE 1 "memory_operand" "+m"))
-   (set (match_dup 1)
-	(unspec_volatile:DCASMODE
-	  [(match_dup 1)
-	   (match_operand:DCASMODE 2 "register_operand" "A")
-	   (match_operand:<DCASHMODE> 3 "register_operand" "b")
-	   (match_operand:<DCASHMODE> 4 "register_operand" "c")]
-	  UNSPECV_CMPXCHG))
+;; For double-word compare and swap, we are obliged to play tricks with
+;; the input newval (op5:op6) because the Intel register numbering does
+;; not match the gcc register numbering, so the pair must be CX:BX.
+;; That said, in order to take advantage of possible lower-subreg opts,
+;; treat all of the integral operands in the same way.
+(define_insn "atomic_compare_and_swap_double<mode>"
+  [(set (match_operand:<DCASHMODE> 0 "register_operand" "=a")
+	(unspec_volatile:<DCASHMODE>
+	  [(match_operand:DCASMODE 2 "memory_operand" "+m")
+	   (match_operand:<DCASHMODE> 3 "register_operand" "0")
+	   (match_operand:<DCASHMODE> 4 "register_operand" "1")
+	   (match_operand:<DCASHMODE> 5 "register_operand" "b")
+	   (match_operand:<DCASHMODE> 6 "register_operand" "c")]
+	  UNSPECV_CMPXCHG_1))
+   (set (match_operand:<DCASHMODE> 1 "register_operand" "=d")
+	(unspec_volatile:<DCASHMODE> [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (match_dup 2)
+	(unspec_volatile:DCASMODE [(const_int 0)] UNSPECV_CMPXCHG_3))
    (set (reg:CCZ FLAGS_REG)
-        (compare:CCZ
-          (unspec_volatile:DCASMODE
-            [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
-	    UNSPECV_CMPXCHG)
-          (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
   ""
-  "lock{%;} cmpxchg<doublemodesuffix>b\t%1")
+  "lock{%;} cmpxchg<doublemodesuffix>b\t%2")
 
-;; Theoretically we'd like to use constraint "r" (any reg) for operand
-;; 3, but that includes ecx.  If operand 3 and 4 are the same (like when
-;; the input is -1LL) GCC might chose to allocate operand 3 to ecx, like
-;; operand 4.  This breaks, as the xchg will move the PIC register contents
-;; to %ecx then --> boom.  Operands 3 and 4 really need to be different
-;; registers, which in this case means operand 3 must not be ecx.
-;; Instead of playing tricks with fake early clobbers or the like we
-;; just enumerate all regs possible here, which (as this is !TARGET_64BIT)
+;; Theoretically we'd like to use constraint "r" (any reg) for op5,
+;; but that includes ecx.  If op5 and op6 are the same (like when
+;; the input is -1LL) GCC might chose to allocate op5 to ecx, like
+;; op6.  This breaks, as the xchg will move the PIC register contents
+;; to %ecx then --> boom.  Operands 5 and 6 really need to be different
+;; registers, which in this case means op5 must not be ecx.  Instead
+;; of playing tricks with fake early clobbers or the like we just
+;; enumerate all regs possible here, which (as this is !TARGET_64BIT)
 ;; are just esi and edi.
-(define_insn "*sync_double_compare_and_swapdi_pic"
-  [(set (match_operand:DI 0 "register_operand" "=A")
-	(match_operand:DI 1 "cmpxchg8b_pic_memory_operand" "+m"))
-   (set (match_dup 1)
-	(unspec_volatile:DI
-	  [(match_dup 1)
-	   (match_operand:DI 2 "register_operand" "A")
-	   (match_operand:SI 3 "register_operand" "SD")
-	   (match_operand:SI 4 "register_operand" "c")]
-	  UNSPECV_CMPXCHG))
+(define_insn "*atomic_compare_and_swap_doubledi_pic"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec_volatile:SI
+	  [(match_operand:DI 2 "cmpxchg8b_pic_memory_operand" "+m")
+	   (match_operand:SI 3 "register_operand" "0")
+	   (match_operand:SI 4 "register_operand" "1")
+	   (match_operand:SI 5 "register_operand" "SD")
+	   (match_operand:SI 6 "register_operand" "c")]
+	  UNSPECV_CMPXCHG_1))
+   (set (match_operand:SI 1 "register_operand" "=d")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (match_dup 2)
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG_3))
    (set (reg:CCZ FLAGS_REG)
-	(compare:CCZ
-	  (unspec_volatile:DI
-	    [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
-	    UNSPECV_CMPXCHG)
-	  (match_dup 2)))]
+        (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
   "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
-  "xchg{l}\t%%ebx, %3\;lock{%;} cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3")
+  "xchg{l}\t%%ebx, %5\;lock{%;} cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5")
 
 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic
 ;; additions of constants.
-(define_insn "sync_old_add<mode>"
+(define_insn "atomic_fetch_add<mode>"
   [(set (match_operand:SWI 0 "register_operand" "=<r>")
 	(unspec_volatile:SWI
-	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
+	  [(match_operand:SWI 1 "memory_operand" "+m")
+	   (match_operand:SI 3 "const_int_operand" "")]		;; model
+	  UNSPECV_XCHG))
    (set (match_dup 1)
 	(plus:SWI (match_dup 1)
 		  (match_operand:SWI 2 "nonmemory_operand" "0")))
@@ -186,7 +207,9 @@
 	(match_operand:SWI 2 "const_int_operand" ""))
    (parallel [(set (match_dup 0)
 		   (unspec_volatile:SWI
-		     [(match_operand:SWI 1 "memory_operand" "")] UNSPECV_XCHG))
+		     [(match_operand:SWI 1 "memory_operand" "")
+		      (match_operand:SI 4 "const_int_operand" "")]
+		     UNSPECV_XCHG))
 	      (set (match_dup 1)
 		   (plus:SWI (match_dup 1)
 			     (match_dup 0)))
@@ -199,17 +222,19 @@
       == -(unsigned HOST_WIDE_INT) INTVAL (operands[3])
    && !reg_overlap_mentioned_p (operands[0], operands[1])"
   [(parallel [(set (reg:CCZ FLAGS_REG)
-		   (compare:CCZ (unspec_volatile:SWI [(match_dup 1)]
-						     UNSPECV_XCHG)
-				(match_dup 3)))
+		   (compare:CCZ
+		     (unspec_volatile:SWI [(match_dup 1) (match_dup 4)]
+					  UNSPECV_XCHG)
+		     (match_dup 3)))
 	      (set (match_dup 1)
 		   (plus:SWI (match_dup 1)
 			     (match_dup 2)))])])
 
-(define_insn "*sync_old_add_cmp<mode>"
+(define_insn "*atomic_fetch_add_cmp<mode>"
   [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ (unspec_volatile:SWI
-		       [(match_operand:SWI 0 "memory_operand" "+m")]
+		       [(match_operand:SWI 0 "memory_operand" "+m")
+		        (match_operand:SI 3 "const_int_operand" "")]
 		       UNSPECV_XCHG)
 		     (match_operand:SWI 2 "const_int_operand" "i")))
    (set (match_dup 0)
@@ -232,35 +257,25 @@
   return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
-(define_expand "atomic_exchange<mode>"
-  [(match_operand:SWI 0 "register_operand" "")		;; output
-   (match_operand:SWI 1 "memory_operand" "")		;; memory
-   (match_operand:SWI 2 "register_operand" "")		;; input
-   (match_operand:SI  3 "const_int_operand" "")]	;; memory model
-  ""
-{
-  /* On i386 the xchg instruction is a full barrier.  Thus we
-     can completely ignore the memory model operand.  */
-  emit_insn (gen_sync_lock_test_and_set<mode>
-		(operands[0], operands[1], operands[2]));
-  DONE;
-})
-
 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
-(define_insn "sync_lock_test_and_set<mode>"
-  [(set (match_operand:SWI 0 "register_operand" "=<r>")
+;; In addition, it is always a full barrier, so we can ignore the memory model.
+(define_insn "atomic_exchange<mode>"
+  [(set (match_operand:SWI 0 "register_operand" "=<r>")		;; output
 	(unspec_volatile:SWI
-	  [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG))
+	  [(match_operand:SWI 1 "memory_operand" "+m")		;; memory
+	   (match_operand:SI 3 "const_int_operand" "")]		;; model
+	  UNSPECV_XCHG))
    (set (match_dup 1)
-	(match_operand:SWI 2 "register_operand" "0"))]
+	(match_operand:SWI 2 "register_operand" "0"))]		;; input
   ""
   "xchg{<imodesuffix>}\t{%1, %0|%0, %1}")
 
-(define_insn "sync_add<mode>"
+(define_insn "atomic_add<mode>"
   [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(plus:SWI (match_dup 0)
-		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+		     (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -279,11 +294,12 @@
   return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
-(define_insn "sync_sub<mode>"
+(define_insn "atomic_sub<mode>"
   [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(minus:SWI (match_dup 0)
-		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+		      (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -296,14 +312,18 @@
 	return "lock{%;} inc{<imodesuffix>}\t%0";
     }
 
+  if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+    return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}";
+
   return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
-(define_insn "sync_<code><mode>"
+(define_insn "atomic_<code><mode>"
   [(set (match_operand:SWI 0 "memory_operand" "+m")
 	(unspec_volatile:SWI
 	  [(any_logic:SWI (match_dup 0)
-			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))]
+			  (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+	   (match_operand:SI 2 "const_int_operand" "")]		;; model
 	  UNSPECV_LOCK))
    (clobber (reg:CC FLAGS_REG))]
   ""
-- 
1.7.6.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 5/9] Add missing atomic optab initializations.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
                   ` (6 preceding siblings ...)
  2011-10-28  5:20 ` [PATCH 8/9] Convert i386 backend to new atomic patterns Richard Henderson
@ 2011-10-28  5:30 ` Richard Henderson
  2011-10-28  5:40 ` [PATCH 4/9] Rewrite all compare-and-swap in terms of expand_atomic_compare_and_swap Richard Henderson
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28  5:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: amacleod

---
 gcc/genopinit.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index e658b44..1954783 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -259,6 +259,12 @@ static const char * const optabs[] =
   "set_direct_optab_handler (atomic_fetch_nand_optab, $A, CODE_FOR_$(atomic_fetch_nand$I$a$))",
   "set_direct_optab_handler (atomic_fetch_xor_optab, $A, CODE_FOR_$(atomic_fetch_xor$I$a$))",
   "set_direct_optab_handler (atomic_fetch_or_optab, $A, CODE_FOR_$(atomic_fetch_or$I$a$))",
+  "set_direct_optab_handler (atomic_add_optab, $A, CODE_FOR_$(atomic_add$I$a$))",
+  "set_direct_optab_handler (atomic_sub_optab, $A, CODE_FOR_$(atomic_sub$I$a$))",
+  "set_direct_optab_handler (atomic_and_optab, $A, CODE_FOR_$(atomic_and$I$a$))",
+  "set_direct_optab_handler (atomic_nand_optab, $A, CODE_FOR_$(atomic_nand$I$a$))",
+  "set_direct_optab_handler (atomic_xor_optab, $A, CODE_FOR_$(atomic_xor$I$a$))",
+  "set_direct_optab_handler (atomic_or_optab, $A, CODE_FOR_$(atomic_or$I$a$))",
   "set_optab_handler (vec_set_optab, $A, CODE_FOR_$(vec_set$a$))",
   "set_optab_handler (vec_extract_optab, $A, CODE_FOR_$(vec_extract$a$))",
   "set_optab_handler (vec_extract_even_optab, $A, CODE_FOR_$(vec_extract_even$a$))",
-- 
1.7.6.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 4/9] Rewrite all compare-and-swap in terms of expand_atomic_compare_and_swap.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
                   ` (7 preceding siblings ...)
  2011-10-28  5:30 ` [PATCH 5/9] Add missing atomic optab initializations Richard Henderson
@ 2011-10-28  5:40 ` Richard Henderson
  2011-10-28 11:29 ` [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Jakub Jelinek
  2011-10-29 17:28 ` Andrew MacLeod
  10 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28  5:40 UTC (permalink / raw)
  To: gcc-patches; +Cc: amacleod

---
 gcc/builtins.c |   39 +++++---
 gcc/expr.h     |    4 -
 gcc/optabs.c   |  267 ++++++++++++++++++++-----------------------------------
 gcc/optabs.h   |    4 +
 4 files changed, 126 insertions(+), 188 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index cb9da83..756070f 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5196,10 +5196,13 @@ expand_builtin_compare_and_swap (enum machine_mode mode, tree exp,
   old_val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
   new_val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 2), mode);
 
-  if (is_bool)
-    return expand_bool_compare_and_swap (mem, old_val, new_val, target);
-  else
-    return expand_val_compare_and_swap (mem, old_val, new_val, target);
+  if (!expand_atomic_compare_and_swap ((is_bool ? &target : NULL),
+				       (is_bool ? NULL : &target),
+				       mem, old_val, new_val, false,
+				       MEMMODEL_SEQ_CST, MEMMODEL_SEQ_CST))
+    return NULL_RTX;
+
+  return target;
 }
 
 /* Expand the __sync_lock_test_and_set intrinsic.  Note that the most
@@ -5293,8 +5296,10 @@ static rtx
 expand_builtin_atomic_compare_exchange (enum machine_mode mode, tree exp, 
 					rtx target)
 {
-  rtx expect, desired, mem, weak;
+  rtx expect, desired, mem, oldval;
   enum memmodel success, failure;
+  tree weak;
+  bool is_weak;
 
   success = get_memmodel (CALL_EXPR_ARG (exp, 4));
   failure = get_memmodel (CALL_EXPR_ARG (exp, 5));
@@ -5307,24 +5312,31 @@ expand_builtin_atomic_compare_exchange (enum machine_mode mode, tree exp,
 
   if (failure > success)
     {
-      error ("failure memory model cannot be stronger than success memory model for %<__atomic_compare_exchange%>");
+      error ("failure memory model cannot be stronger than success "
+	     "memory model for %<__atomic_compare_exchange%>");
       return NULL_RTX;
     }
   
   /* Expand the operands.  */
   mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
 
-  expect = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, ptr_mode, 
-			EXPAND_NORMAL);
+  expect = expand_normal (CALL_EXPR_ARG (exp, 1));
   expect = convert_memory_address (Pmode, expect);
-
   desired = expand_expr_force_mode (CALL_EXPR_ARG (exp, 2), mode);
 
-  weak = expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX, ptr_mode,
-		      EXPAND_NORMAL);
+  weak = CALL_EXPR_ARG (exp, 3);
+  is_weak = false;
+  if (host_integerp (weak, 0) && tree_low_cst (weak, 0) != 0)
+    is_weak = true;
 
-  return expand_atomic_compare_exchange (target, mem, expect, desired, weak,
-					 success, failure);
+  oldval = copy_to_reg (gen_rtx_MEM (mode, expect));
+
+  if (!expand_atomic_compare_and_swap (&target, &oldval, mem, oldval,
+				       desired, is_weak, success, failure))
+    return NULL_RTX;
+
+  emit_move_insn (gen_rtx_MEM (mode, expect), oldval);
+  return target;
 }
 
 /* Expand the __atomic_load intrinsic:
@@ -5442,7 +5454,6 @@ fold_builtin_atomic_always_lock_free (tree arg)
 {
   int size;
   enum machine_mode mode;
-  enum insn_code icode;
 
   if (TREE_CODE (arg) != INTEGER_CST)
     return NULL_TREE;
diff --git a/gcc/expr.h b/gcc/expr.h
index 8cae22b..1623ad9 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -212,14 +212,10 @@ int can_conditionally_move_p (enum machine_mode mode);
 rtx emit_conditional_add (rtx, enum rtx_code, rtx, rtx, enum machine_mode,
 			  rtx, rtx, enum machine_mode, int);
 
-rtx expand_val_compare_and_swap (rtx, rtx, rtx, rtx);
-rtx expand_bool_compare_and_swap (rtx, rtx, rtx, rtx);
 rtx expand_sync_operation (rtx, rtx, enum rtx_code);
 rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx);
 
 rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel);
-rtx expand_atomic_compare_exchange (rtx, rtx, rtx, rtx, rtx, enum memmodel, 
-				      enum memmodel);
 rtx expand_atomic_load (rtx, rtx, enum memmodel);
 rtx expand_atomic_store (rtx, rtx, enum memmodel);
 rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, 
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 9b4d6cf..b7c00be 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6799,45 +6799,6 @@ can_compare_and_swap_p (enum machine_mode mode)
   return false;
 }
 
-/* This is an internal subroutine of the other compare_and_swap expanders.
-   MEM, OLD_VAL and NEW_VAL are as you'd expect for a compare-and-swap
-   operation.  TARGET is an optional place to store the value result of
-   the operation.  ICODE is the particular instruction to expand.  Return
-   the result of the operation.  */
-
-static rtx
-expand_val_compare_and_swap_1 (rtx mem, rtx old_val, rtx new_val,
-			       rtx target, enum insn_code icode)
-{
-  struct expand_operand ops[4];
-  enum machine_mode mode = GET_MODE (mem);
-
-  create_output_operand (&ops[0], target, mode);
-  create_fixed_operand (&ops[1], mem);
-  /* OLD_VAL and NEW_VAL may have been promoted to a wider mode.
-     Shrink them if so.  */
-  create_convert_operand_to (&ops[2], old_val, mode, true);
-  create_convert_operand_to (&ops[3], new_val, mode, true);
-  if (maybe_expand_insn (icode, 4, ops))
-    return ops[0].value;
-  return NULL_RTX;
-}
-
-/* Expand a compare-and-swap operation and return its value.  */
-
-rtx
-expand_val_compare_and_swap (rtx mem, rtx old_val, rtx new_val, rtx target)
-{
-  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode
-    = direct_optab_handler (sync_compare_and_swap_optab, mode);
-
-  if (icode == CODE_FOR_nothing)
-    return NULL_RTX;
-
-  return expand_val_compare_and_swap_1 (mem, old_val, new_val, target, icode);
-}
-
 /* Helper function to find the MODE_CC set in a sync_compare_and_swap
    pattern.  */
 
@@ -6853,58 +6814,6 @@ find_cc_set (rtx x, const_rtx pat, void *data)
     }
 }
 
-/* Expand a compare-and-swap operation and store true into the result if
-   the operation was successful and false otherwise.  Return the result.
-   Unlike other routines, TARGET is not optional.  */
-
-rtx
-expand_bool_compare_and_swap (rtx mem, rtx old_val, rtx new_val, rtx target)
-{
-  enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode;
-  rtx subtarget, seq, cc_reg;
-
-  /* If the target supports a compare-and-swap pattern that simultaneously
-     sets some flag for success, then use it.  Otherwise use the regular
-     compare-and-swap and follow that immediately with a compare insn.  */
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
-  if (icode == CODE_FOR_nothing)
-    return NULL_RTX;
-
-  do_pending_stack_adjust ();
-  do
-    {
-      start_sequence ();
-      subtarget = expand_val_compare_and_swap_1 (mem, old_val, new_val,
-					         NULL_RTX, icode);
-      cc_reg = NULL_RTX;
-      if (subtarget == NULL_RTX)
-	{
-	  end_sequence ();
-	  return NULL_RTX;
-	}
-
-      if (have_insn_for (COMPARE, CCmode))
-	note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
-      seq = get_insns ();
-      end_sequence ();
-
-      /* We might be comparing against an old value.  Try again. :-(  */
-      if (!cc_reg && MEM_P (old_val))
-	{
-	  seq = NULL_RTX;
-	  old_val = force_reg (mode, old_val);
-        }
-    }
-  while (!seq);
-
-  emit_insn (seq);
-  if (cc_reg)
-    return emit_store_flag_force (target, EQ, cc_reg, const0_rtx, VOIDmode, 0, 1);
-  else
-    return emit_store_flag_force (target, EQ, subtarget, old_val, VOIDmode, 1, 1);
-}
-
 /* This is a helper function for the other atomic operations.  This function
    emits a loop that contains SEQ that iterates until a compare-and-swap
    operation at the end succeeds.  MEM is the memory to be modified.  SEQ is
@@ -6918,8 +6827,7 @@ static bool
 expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
 {
   enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode;
-  rtx label, cmp_reg, subtarget, cc_reg;
+  rtx label, cmp_reg, success, oldval;
 
   /* The loop we want to generate looks like
 
@@ -6927,8 +6835,8 @@ expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
       label:
         old_reg = cmp_reg;
 	seq;
-	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
-	if (cmp_reg != old_reg)
+	(success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
+	if (success)
 	  goto label;
 
      Note that we only do the plain load from memory once.  Subsequent
@@ -6943,35 +6851,19 @@ expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
   if (seq)
     emit_insn (seq);
 
-  /* If the target supports a compare-and-swap pattern that simultaneously
-     sets some flag for success, then use it.  Otherwise use the regular
-     compare-and-swap and follow that immediately with a compare insn.  */
-  icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
-  if (icode == CODE_FOR_nothing)
+  success = NULL_RTX;
+  oldval = cmp_reg;
+  if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
+				       new_reg, false, MEMMODEL_SEQ_CST,
+				       MEMMODEL_RELAXED))
     return false;
 
-  subtarget = expand_val_compare_and_swap_1 (mem, old_reg, new_reg,
-					     cmp_reg, icode);
-  if (subtarget == NULL_RTX)
-    return false;
-
-  cc_reg = NULL_RTX;
-  if (have_insn_for (COMPARE, CCmode))
-    note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
-  if (cc_reg)
-    {
-      cmp_reg = cc_reg;
-      old_reg = const0_rtx;
-    }
-  else
-    {
-      if (subtarget != cmp_reg)
-	emit_move_insn (cmp_reg, subtarget);
-    }
+  if (oldval != cmp_reg)
+    emit_move_insn (cmp_reg, oldval);
 
   /* ??? Mark this jump predicted not taken?  */
-  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, const0_rtx, GET_MODE (cmp_reg), 1,
-			   label);
+  emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
+			   GET_MODE (success), 1, label);
   return true;
 }
 
@@ -7050,77 +6942,112 @@ expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
 
 /* This function expands the atomic compare exchange operation:
 
+   *PTARGET_BOOL is an optional place to store the boolean success/failure.
+   *PTARGET_OVAL is an optional place to store the old value from memory.
+   Both target parameters may be NULL to indicate that we do not care about
+   that return value.  Both target parameters are updated on success to
+   the actual location of the corresponding result.
+
    MEMMODEL is the memory model variant to use.
-   TARGET is an option place to stick the return value.  */
 
-rtx
-expand_atomic_compare_exchange (rtx target, rtx mem, rtx expected, 
-				rtx desired, rtx weak, enum memmodel success, 
-				enum memmodel failure)
+   The return value of the function is true for success.  */
+
+bool
+expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
+				rtx mem, rtx expected, rtx desired,
+				bool is_weak, enum memmodel succ_model,
+				enum memmodel fail_model)
 {
   enum machine_mode mode = GET_MODE (mem);
-  enum insn_code icode = CODE_FOR_nothing;
-  rtx expected_val, CAS_val;
+  struct expand_operand ops[8];
+  enum insn_code icode;
+  rtx target_bool, target_oval;
 
-  /* Load '*expected into a register for the compare and swap */
-  expected_val = gen_reg_rtx (mode);
-  emit_move_insn (expected_val, gen_rtx_MEM (mode, expected));
+  /* Load expected into a register for the compare and swap.  */
+  if (MEM_P (expected))
+    expected = copy_to_reg (expected);
+
+  /* Make sure we always have some place to put the return oldval.
+     Further, make sure that place is distinct from the input expected,
+     just in case we need that path down below.  */
+  if (ptarget_oval == NULL
+      || (target_oval = *ptarget_oval) == NULL
+      || reg_overlap_mentioned_p (expected, target_oval))
+    target_oval = gen_reg_rtx (mode);
 
   icode = direct_optab_handler (atomic_compare_and_swap_optab, mode);
-    
   if (icode != CODE_FOR_nothing)
     {
-      struct expand_operand ops[8];
-      rtx original_val = gen_reg_rtx (mode);
-      enum machine_mode target_mode;
-      int is_weak;
-
-      target_mode = insn_data[icode].operand[0].mode;
-
-      /* Either WEAK is explcitly 1, or assume strong to be safe.  */
-      is_weak = (weak == const1_rtx);
+      enum machine_mode bool_mode = insn_data[icode].operand[0].mode;
 
-      if (target == NULL_RTX || target == const0_rtx 
-	  || GET_MODE (target) != target_mode)
-        target = gen_reg_rtx (target_mode);
+      /* Make sure we always have a place for the bool operand.  */
+      if (ptarget_bool == NULL
+	  || (target_bool = *ptarget_bool) == NULL
+	  || GET_MODE (target_bool) != bool_mode)
+	target_bool = gen_reg_rtx (bool_mode);
 
       /* Emit the compare_and_swap.  */
-      create_output_operand (&ops[0], target, target_mode);
-      create_output_operand (&ops[1], original_val, mode);
+      create_output_operand (&ops[0], target_bool, bool_mode);
+      create_output_operand (&ops[1], target_oval, mode);
       create_fixed_operand (&ops[2], mem);
-      create_convert_operand_to (&ops[3], expected_val, mode, true);
+      create_convert_operand_to (&ops[3], expected, mode, true);
       create_convert_operand_to (&ops[4], desired, mode, true);
       create_integer_operand (&ops[5], is_weak);
-      create_integer_operand (&ops[6], success);
-      create_integer_operand (&ops[7], failure);
+      create_integer_operand (&ops[6], succ_model);
+      create_integer_operand (&ops[7], fail_model);
       expand_insn (icode, 8, ops);
 
-      /* Store *expected = original_val  */
-      emit_move_insn (gen_rtx_MEM (mode, expected), original_val);
-
       /* Return success/failure.  */
-      return ops[0].value;
+      target_bool = ops[0].value;
+      target_oval = ops[1].value;
+      goto success;
     }
 
-
-  /* Otherwise fall back to the original __sync_val_compare_and_swap which is
-     always seq-cst.  */
+  /* Otherwise fall back to the original __sync_val_compare_and_swap
+     which is always seq-cst.  */
   icode = direct_optab_handler (sync_compare_and_swap_optab, mode);
-  if (icode == CODE_FOR_nothing)
-      return NULL_RTX;
+  if (icode != CODE_FOR_nothing)
+    {
+      rtx cc_reg;
+
+      create_output_operand (&ops[0], target_oval, mode);
+      create_fixed_operand (&ops[1], mem);
+      create_convert_operand_to (&ops[2], expected, mode, true);
+      create_convert_operand_to (&ops[3], desired, mode, true);
+      if (!maybe_expand_insn (icode, 4, ops))
+	return false;
 
-  /* Issue the compare and swap */
-  CAS_val = expand_val_compare_and_swap_1 (mem, expected_val, desired, NULL_RTX,
-					   icode);
+      target_oval = ops[0].value;
+      target_bool = NULL_RTX;
 
-  /* Always store the result back into 'expected'.  If the result is true, its
-     an extra store, but makes the flow better.  */
-  emit_move_insn (gen_rtx_MEM (mode, expected), CAS_val);
+      /* If the caller isn't interested in the boolean return value,
+	 skip the computation of it.  */
+      if (ptarget_bool == NULL)
+	goto success;
 
-  return emit_store_flag_force (target, EQ, CAS_val, expected_val,
-				VOIDmode, 1, 1);
-}
+      /* Otherwise, work out if the compare-and-swap succeeded.  */
+      cc_reg = NULL_RTX;
+      if (have_insn_for (COMPARE, CCmode))
+	note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg);
 
+      target_bool
+	= (cc_reg
+	   ? emit_store_flag_force (target_bool, EQ, cc_reg,
+				    const0_rtx, VOIDmode, 0, 1)
+	   : emit_store_flag_force (target_bool, EQ, target_oval,
+				    expected, VOIDmode, 1, 1));
+      goto success;
+    }
+  return false;
+
+ success:
+  /* Make sure that the oval output winds up where the caller asked.  */
+  if (ptarget_oval)
+    *ptarget_oval = target_oval;
+  if (ptarget_bool)
+    *ptarget_bool = target_bool;
+  return true;
+}
 
 /* This function expands the atomic load operation:
    return the atomically loaded value in MEM.
@@ -7150,13 +7077,13 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model)
   /* If there is no load pattern, default to a move with barriers. If the size
      of the object is greater than word size on this target, a default load 
      will not be atomic.  */
-  if (GET_MODE_PRECISION(mode) > BITS_PER_WORD)
+  if (GET_MODE_PRECISION (mode) > BITS_PER_WORD)
     {
-      /* Issue val = compare_and_swap (mem, 0 , 0).
+      /* Issue val = compare_and_swap (mem, 0, 0).
 	 This may cause the occasional harmless store of 0 when the value is
 	 already 0, but do it anyway until its determined to be invalid.  */
-      target = expand_val_compare_and_swap (mem, const0_rtx, const0_rtx, 
-					    target);
+      expand_atomic_compare_and_swap (NULL, &target, mem, const0_rtx,
+				      const0_rtx, false, model, model);
       return target;
     }
 
diff --git a/gcc/optabs.h b/gcc/optabs.h
index eabc994..2ca0fcd 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -955,6 +955,10 @@ enum insn_code can_float_p (enum machine_mode, enum machine_mode, int);
 /* Return true if there is an inline compare and swap pattern.  */
 extern bool can_compare_and_swap_p (enum machine_mode);
 
+/* Generate code for a compare and swap.  */
+extern bool expand_atomic_compare_and_swap (rtx *, rtx *, rtx, rtx, rtx, bool,
+					    enum memmodel, enum memmodel);
+
 /* Generate code for a FIX_EXPR.  */
 extern void expand_fix (rtx, rtx, int);
 
-- 
1.7.6.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
                   ` (8 preceding siblings ...)
  2011-10-28  5:40 ` [PATCH 4/9] Rewrite all compare-and-swap in terms of expand_atomic_compare_and_swap Richard Henderson
@ 2011-10-28 11:29 ` Jakub Jelinek
  2011-10-28 15:25   ` Richard Henderson
  2011-10-29 17:28 ` Andrew MacLeod
  10 siblings, 1 reply; 13+ messages in thread
From: Jakub Jelinek @ 2011-10-28 11:29 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, amacleod

On Thu, Oct 27, 2011 at 09:07:29PM -0700, Richard Henderson wrote:
> Jakub, in the seventh patch, is there any good reason why OMP is
> making the decision of whether or not to generate a compare-and-swap
> loop?  Why shouldn't we simply always generate the __sync_fetch_op
> builtin and let optabs.c generate the compare-and-swap loop?

It just wants a guarantee that the builtin will actually be implemented
in hw.  I guess if __sync_fetch_op (new/old) isn't supported but
__sync_compare_and_swap_* is, we could just use the former and let
optabs.c deal with that.  But we have to handle the CAS case anyway
for most of the operations that don't have a __sync_fetch_op defined
(and for the cases where we e.g. VCE floating point data to integer
of the same size for CAS).
For ABI reasons we should keep using GOMP_{start,end}_atomic for the
types that don't have CAS in hw, shouldn't replace it with some generic
C++11 atomic helper in some library (libgcc or elsewhere?).

BTW, I believe all #pragma omp atomic ops we want in the relaxed model
or weaker, I think OpenMP only guarantees that the memory is modified
or loaded atomically (that you don't see half of something and half of
something else), there is nothing that requires ordering the atomic
vs. any other memory location stores/loads.

	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs.
  2011-10-28 11:29 ` [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Jakub Jelinek
@ 2011-10-28 15:25   ` Richard Henderson
  0 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-10-28 15:25 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, amacleod

On 10/28/2011 04:06 AM, Jakub Jelinek wrote:
> It just wants a guarantee that the builtin will actually be implemented
> in hw.  I guess if __sync_fetch_op (new/old) isn't supported but
> __sync_compare_and_swap_* is, we could just use the former and let
> optabs.c deal with that.  But we have to handle the CAS case anyway
> for most of the operations that don't have a __sync_fetch_op defined
> (and for the cases where we e.g. VCE floating point data to integer
> of the same size for CAS).

I was just thinking that the data structure with the 6 optabs that we're
exporting from optabs.c is somewhat over the top, when simply testing
can_compare_and_swap_p is just about equivalent.

On reflection, I think I'll revert that patch and try it with just that
one test...

> BTW, I believe all #pragma omp atomic ops we want in the relaxed model
> or weaker, I think OpenMP only guarantees that the memory is modified
> or loaded atomically (that you don't see half of something and half of
> something else), there is nothing that requires ordering the atomic
> vs. any other memory location stores/loads.

... possibly with switching to the new builtins in relaxed mode?


r~

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs.
  2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
                   ` (9 preceding siblings ...)
  2011-10-28 11:29 ` [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Jakub Jelinek
@ 2011-10-29 17:28 ` Andrew MacLeod
  10 siblings, 0 replies; 13+ messages in thread
From: Andrew MacLeod @ 2011-10-29 17:28 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, jakub

On 10/28/2011 12:07 AM, Richard Henderson wrote:
> This exposed a wealth of problems in code that has heretofore never
> been tested.  The fourth patch makes certain that all expansions of
> compare-and-swap go through a single routine.
>
> I've tested the whole series with and without the last patch.  So
> that I've tested both the sync_ and atomic_ paths.  I've not attempted
> to test if both are present.  I rather assume that'll never be the
> case for any target.
Excellent.  The code is written to always check first for the new atomic 
patterns, and use them if present.  If it works with either one present, 
it should be fine with both, should it ever happen...   but we can leave 
that until some person actually decides that needs to happen :-)

Thanks for exercising and fixing that untested code path. now other 
targets should be easier to convert too.

Andrew

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2011-10-29 15:09 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-10-28  4:08 [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Richard Henderson
2011-10-28  4:08 ` [PATCH 2/9] Handle expanding insns with 8 operands Richard Henderson
2011-10-28  4:08 ` [PATCH 1/9] Fix thinko in gen_mem_thread_fence operand Richard Henderson
2011-10-28  4:08 ` [PATCH 7/9] Update omp for new atomic optabs Richard Henderson
2011-10-28  4:08 ` [PATCH 3/9] Introduce and use can_compare_and_swap_p Richard Henderson
2011-10-28  5:11 ` [PATCH 9/9] Update ChangeLogs Richard Henderson
2011-10-28  5:11 ` [PATCH 6/9] Update cppbuiltins for atomic-compare-and-swap Richard Henderson
2011-10-28  5:20 ` [PATCH 8/9] Convert i386 backend to new atomic patterns Richard Henderson
2011-10-28  5:30 ` [PATCH 5/9] Add missing atomic optab initializations Richard Henderson
2011-10-28  5:40 ` [PATCH 4/9] Rewrite all compare-and-swap in terms of expand_atomic_compare_and_swap Richard Henderson
2011-10-28 11:29 ` [cxx-mem-model][PATCH 0/9] Convert i386 to new atomic optabs Jakub Jelinek
2011-10-28 15:25   ` Richard Henderson
2011-10-29 17:28 ` Andrew MacLeod

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).