public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 4/6] Convert to md_asm_adjust
  2015-05-07 21:39 [RFC 0/6] Flags outputs for asms Richard Henderson
  2015-05-07 21:39 ` [PATCH 6/6] i386: Implement asm flag outputs Richard Henderson
@ 2015-05-07 21:39 ` Richard Henderson
  2015-05-08 19:41   ` Jeff Law
  2015-05-07 21:39 ` [PATCH 5/6] i386: Add CCPmode Richard Henderson
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2015-05-07 21:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: law, peterz, hpa, torvalds, jakub

Using proper vectors instead of lists of trees.
---
 gcc/cfgexpand.c              | 614 ++++++++++++++++++++-----------------------
 gcc/config/cris/cris.c       |  88 +++----
 gcc/config/i386/i386.c       |  24 +-
 gcc/config/mn10300/mn10300.c |  20 +-
 gcc/config/rs6000/rs6000.c   |  17 +-
 gcc/config/visium/visium.c   |  21 +-
 gcc/config/vxworks.c         |   2 +-
 gcc/doc/tm.texi              |  18 +-
 gcc/doc/tm.texi.in           |   2 +-
 gcc/gimple.c                 |   2 +-
 gcc/hooks.c                  |   8 -
 gcc/hooks.h                  |   1 -
 gcc/incpath.c                |   1 +
 gcc/mode-switching.c         |   2 +-
 gcc/system.h                 |   1 +
 gcc/target.def               |  30 ++-
 16 files changed, 401 insertions(+), 450 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index fbd2101..d73678c 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2438,14 +2438,12 @@ n_occurrences (int c, const char *s)
    the same number of alternatives.  Return true if so.  */
 
 static bool
-check_operand_nalternatives (tree outputs, tree inputs)
+check_operand_nalternatives (const vec<const char *> &constraints)
 {
-  if (outputs || inputs)
+  unsigned len = constraints.length();
+  if (len > 0)
     {
-      tree tmp = TREE_PURPOSE (outputs ? outputs : inputs);
-      int nalternatives
-	= n_occurrences (',', TREE_STRING_POINTER (TREE_VALUE (tmp)));
-      tree next = inputs;
+      int nalternatives = n_occurrences (',', constraints[0]);
 
       if (nalternatives + 1 > MAX_RECOG_ALTERNATIVES)
 	{
@@ -2453,26 +2451,14 @@ check_operand_nalternatives (tree outputs, tree inputs)
 	  return false;
 	}
 
-      tmp = outputs;
-      while (tmp)
-	{
-	  const char *constraint
-	    = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (tmp)));
-
-	  if (n_occurrences (',', constraint) != nalternatives)
-	    {
-	      error ("operand constraints for %<asm%> differ "
-		     "in number of alternatives");
-	      return false;
-	    }
-
-	  if (TREE_CHAIN (tmp))
-	    tmp = TREE_CHAIN (tmp);
-	  else
-	    tmp = next, next = 0;
-	}
+      for (unsigned i = 1; i < len; ++i)
+	if (n_occurrences (',', constraints[i]) != nalternatives)
+	  {
+	    error ("operand constraints for %<asm%> differ "
+		   "in number of alternatives");
+	    return false;
+	  }
     }
-
   return true;
 }
 
@@ -2524,156 +2510,145 @@ tree_conflicts_with_clobbers_p (tree t, HARD_REG_SET *clobbered_regs)
 static void
 expand_asm_stmt (gasm *stmt)
 {
-  int noutputs, ninputs, nclobbers, nlabels, i;
-  tree string, outputs, inputs, clobbers, labels, tail, t;
-  location_t locus = gimple_location (stmt);
-  basic_block fallthru_bb = NULL;
-
-  /* Meh... convert the gimple asm operands into real tree lists.
-     Eventually we should make all routines work on the vectors instead
-     of relying on TREE_CHAIN.  */
-  outputs = NULL_TREE;
-  noutputs = gimple_asm_noutputs (stmt);
-  if (noutputs > 0)
-    {
-      t = outputs = gimple_asm_output_op (stmt, 0);
-      for (i = 1; i < noutputs; i++)
-	t = TREE_CHAIN (t) = gimple_asm_output_op (stmt, i);
-    }
+  class save_input_location
+  {
+    location_t old;
 
-  inputs = NULL_TREE;
-  ninputs = gimple_asm_ninputs (stmt);
-  if (ninputs > 0)
+  public:
+    explicit save_input_location(location_t where)
     {
-      t = inputs = gimple_asm_input_op (stmt, 0);
-      for (i = 1; i < ninputs; i++)
-	t = TREE_CHAIN (t) = gimple_asm_input_op (stmt, i);
+      old = input_location;
+      input_location = where;
     }
 
-  clobbers = NULL_TREE;
-  nclobbers = gimple_asm_nclobbers (stmt);
-  if (nclobbers > 0)
+    ~save_input_location()
     {
-      t = clobbers = gimple_asm_clobber_op (stmt, 0);
-      for (i = 1; i < nclobbers; i++)
-	t = TREE_CHAIN (t) = gimple_asm_clobber_op (stmt, i);
+      input_location = old;
     }
+  };
 
-  labels = NULL_TREE;
-  nlabels = gimple_asm_nlabels (stmt);
-  if (nlabels > 0)
+  location_t locus = gimple_location (stmt);
+
+  if (gimple_asm_input_p (stmt))
     {
-      edge fallthru = find_fallthru_edge (gimple_bb (stmt)->succs);
-      if (fallthru)
-	fallthru_bb = fallthru->dest;
-      t = labels = gimple_asm_label_op (stmt, 0);
-      for (i = 1; i < nlabels; i++)
-	t = TREE_CHAIN (t) = gimple_asm_label_op (stmt, i);
+      const char *s = gimple_asm_string (stmt);
+      tree string = build_string (strlen (s), s);
+      expand_asm_loc (string, gimple_asm_volatile_p (stmt), locus);
+      return;
     }
 
-  {
-    const char *s = gimple_asm_string (stmt);
-    string = build_string (strlen (s), s);
-  }
+  /* There are some legacy diagnostics in here, and also avoids a
+     sixth parameger to targetm.md_asm_adjust.  */
+  save_input_location s_i_l(locus);
 
-  if (gimple_asm_input_p (stmt))
+  unsigned noutputs = gimple_asm_noutputs (stmt);
+  unsigned ninputs = gimple_asm_ninputs (stmt);
+  unsigned nlabels = gimple_asm_nlabels (stmt);
+  unsigned i;
+
+  /* ??? Diagnose during gimplification?  */
+  if (ninputs + noutputs + nlabels > MAX_RECOG_OPERANDS)
     {
-      expand_asm_loc (string, gimple_asm_volatile_p (stmt), locus);
+      error ("more than %d operands in %<asm%>", MAX_RECOG_OPERANDS);
       return;
     }
 
-  /* Record the contents of OUTPUTS before it is modified.  */
-  tree *orig_outputs = XALLOCAVEC (tree, noutputs);
-  for (i = 0; i < noutputs; ++i)
-    orig_outputs[i] = TREE_VALUE (gimple_asm_output_op (stmt, i));
+  auto_vec<tree, MAX_RECOG_OPERANDS> output_tvec;
+  auto_vec<tree, MAX_RECOG_OPERANDS> input_tvec;
+  auto_vec<const char *, MAX_RECOG_OPERANDS> constraints;
 
-  rtvec argvec, constraintvec, labelvec;
-  rtx body;
-  int ninout;
-  HARD_REG_SET clobbered_regs;
-  int clobber_conflict_found = 0;
-  /* Vector of RTX's of evaluated output operands.  */
-  rtx *output_rtx = XALLOCAVEC (rtx, noutputs);
-  int *inout_opnum = XALLOCAVEC (int, noutputs);
-  rtx *real_output_rtx = XALLOCAVEC (rtx, noutputs);
-  machine_mode *inout_mode = XALLOCAVEC (machine_mode, noutputs);
-  const char **constraints = XALLOCAVEC (const char *, noutputs + ninputs);
-  int old_generating_concat_p = generating_concat_p;
-  rtx_code_label *fallthru_label = NULL;
+  /* Copy the gimple vectors into new vectors that we can manipulate.  */
 
-  if (! check_operand_nalternatives (outputs, inputs))
-    return;
+  output_tvec.safe_grow (noutputs);
+  input_tvec.safe_grow (ninputs);
+  constraints.safe_grow (noutputs + ninputs);
 
-  /* Collect constraints.  */
-  i = 0;
-  for (t = outputs; t ; t = TREE_CHAIN (t), i++)
-    constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
-  for (t = inputs; t ; t = TREE_CHAIN (t), i++)
-    constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
+  for (i = 0; i < noutputs; ++i)
+    {
+      tree t = gimple_asm_output_op (stmt, i);
+      output_tvec[i] = TREE_VALUE (t);
+      constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
+    }
+  for (i = 0; i < ninputs; i++)
+    {
+      tree t = gimple_asm_input_op (stmt, i);
+      input_tvec[i] = TREE_VALUE (t);
+      constraints[i + noutputs]
+	= TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t)));
+    }
 
-  /* Sometimes we wish to automatically clobber registers across an asm.
-     Case in point is when the i386 backend moved from cc0 to a hard reg --
-     maintaining source-level compatibility means automatically clobbering
-     the flags register.  */
-  clobbers = targetm.md_asm_clobbers (outputs, inputs, clobbers);
+  /* ??? Diagnose during gimplification?  */
+  if (! check_operand_nalternatives (constraints))
+    return;
 
   /* Count the number of meaningful clobbered registers, ignoring what
      we would ignore later.  */
-  nclobbers = 0;
+  auto_vec<rtx> clobber_rvec;
+  HARD_REG_SET clobbered_regs;
   CLEAR_HARD_REG_SET (clobbered_regs);
-  for (tail = clobbers; tail; tail = TREE_CHAIN (tail))
-    {
-      const char *regname;
-      int nregs;
-
-      if (TREE_VALUE (tail) == error_mark_node)
-	return;
-      regname = TREE_STRING_POINTER (TREE_VALUE (tail));
-
-      i = decode_reg_name_and_count (regname, &nregs);
-      if (i == -4)
-	++nclobbers;
-      else if (i == -2)
-	error ("unknown register name %qs in %<asm%>", regname);
 
-      /* Mark clobbered registers.  */
-      if (i >= 0)
-        {
-	  int reg;
+  if (unsigned n = gimple_asm_nclobbers (stmt))
+    {
+      clobber_rvec.reserve (n);
+      for (i = 0; i < n; i++)
+	{
+	  tree t = gimple_asm_clobber_op (stmt, i);
+          const char *regname = TREE_STRING_POINTER (TREE_VALUE (t));
+	  int nregs, j;
 
-	  for (reg = i; reg < i + nregs; reg++)
+	  j = decode_reg_name_and_count (regname, &nregs);
+	  if (j < 0)
 	    {
-	      ++nclobbers;
-
-	      /* Clobbering the PIC register is an error.  */
-	      if (reg == (int) PIC_OFFSET_TABLE_REGNUM)
+	      if (j == -2)
 		{
-		  error ("PIC register clobbered by %qs in %<asm%>", regname);
-		  return;
+		  /* ??? Diagnose during gimplification?  */
+		  error ("unknown register name %qs in %<asm%>", regname);
+		}
+	      else if (j == -4)
+		{
+		  rtx x = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
+		  clobber_rvec.safe_push (x);
+		}
+	      else
+		{
+		  /* Otherwise we should have -1 == empty string
+		     or -3 == cc, which is not a register.  */
+		  gcc_assert (j == -1 || j == -3);
 		}
-
-	      SET_HARD_REG_BIT (clobbered_regs, reg);
 	    }
+	  else
+	    for (int reg = j; reg < j + nregs; reg++)
+	      {
+		/* Clobbering the PIC register is an error.  */
+		if (reg == (int) PIC_OFFSET_TABLE_REGNUM)
+		  {
+		    /* ??? Diagnose during gimplification?  */
+		    error ("PIC register clobbered by %qs in %<asm%>",
+			   regname);
+		    return;
+		  }
+
+	        SET_HARD_REG_BIT (clobbered_regs, reg);
+	        rtx x = gen_rtx_REG (reg_raw_mode[reg], reg);
+		clobber_rvec.safe_push (x);
+	      }
 	}
     }
+  unsigned nclobbers = clobber_rvec.length();
 
   /* First pass over inputs and outputs checks validity and sets
      mark_addressable if needed.  */
+  /* ??? Diagnose during gimplification?  */
 
-  ninout = 0;
-  for (i = 0, tail = outputs; tail; tail = TREE_CHAIN (tail), i++)
+  for (i = 0; i < noutputs; ++i)
     {
-      tree val = TREE_VALUE (tail);
+      tree val = output_tvec[i];
       tree type = TREE_TYPE (val);
       const char *constraint;
       bool is_inout;
       bool allows_reg;
       bool allows_mem;
 
-      /* If there's an erroneous arg, emit no insn.  */
-      if (type == error_mark_node)
-	return;
-
       /* Try to parse the output constraint.  If that fails, there's
 	 no point in going further.  */
       constraint = constraints[i];
@@ -2688,35 +2663,21 @@ expand_asm_stmt (gasm *stmt)
 		  && REG_P (DECL_RTL (val))
 		  && GET_MODE (DECL_RTL (val)) != TYPE_MODE (type))))
 	mark_addressable (val);
-
-      if (is_inout)
-	ninout++;
-    }
-
-  ninputs += ninout;
-  if (ninputs + noutputs + nlabels > MAX_RECOG_OPERANDS)
-    {
-      error ("more than %d operands in %<asm%>", MAX_RECOG_OPERANDS);
-      return;
     }
 
-  for (i = 0, tail = inputs; tail; i++, tail = TREE_CHAIN (tail))
+  for (i = 0; i < ninputs; ++i)
     {
       bool allows_reg, allows_mem;
       const char *constraint;
 
-      /* If there's an erroneous arg, emit no insn, because the ASM_INPUT
-	 would get VOIDmode and that could cause a crash in reload.  */
-      if (TREE_TYPE (TREE_VALUE (tail)) == error_mark_node)
-	return;
-
       constraint = constraints[i + noutputs];
-      if (! parse_input_constraint (&constraint, i, ninputs, noutputs, ninout,
-				    constraints, &allows_mem, &allows_reg))
+      if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
+				    constraints.address (),
+				    &allows_mem, &allows_reg))
 	return;
 
       if (! allows_reg && allows_mem)
-	mark_addressable (TREE_VALUE (tail));
+	mark_addressable (input_tvec[i]);
     }
 
   /* Second pass evaluates arguments.  */
@@ -2724,17 +2685,21 @@ expand_asm_stmt (gasm *stmt)
   /* Make sure stack is consistent for asm goto.  */
   if (nlabels > 0)
     do_pending_stack_adjust ();
+  int old_generating_concat_p = generating_concat_p;
+
+  /* Vector of RTX's of evaluated output operands.  */
+  auto_vec<rtx, MAX_RECOG_OPERANDS> output_rvec;
+  auto_vec<int, MAX_RECOG_OPERANDS> inout_opnum;
+  rtx_insn *after_rtl_seq = NULL, *after_rtl_end = NULL;
+
+  output_rvec.safe_grow (noutputs);
 
-  ninout = 0;
-  for (i = 0, tail = outputs; tail; tail = TREE_CHAIN (tail), i++)
+  for (i = 0; i < noutputs; ++i)
     {
-      tree val = TREE_VALUE (tail);
+      tree val = output_tvec[i];
       tree type = TREE_TYPE (val);
-      bool is_inout;
-      bool allows_reg;
-      bool allows_mem;
+      bool is_inout, allows_reg, allows_mem, ok;
       rtx op;
-      bool ok;
 
       ok = parse_output_constraint (&constraints[i], i, ninputs,
 				    noutputs, &allows_mem, &allows_reg,
@@ -2743,12 +2708,11 @@ expand_asm_stmt (gasm *stmt)
 
       /* If an output operand is not a decl or indirect ref and our constraint
 	 allows a register, make a temporary to act as an intermediate.
-	 Make the asm insn write into that, then our caller will copy it to
+	 Make the asm insn write into that, then we will copy it to
 	 the real output operand.  Likewise for promoted variables.  */
 
       generating_concat_p = 0;
 
-      real_output_rtx[i] = NULL_RTX;
       if ((TREE_CODE (val) == INDIRECT_REF
 	   && allows_mem)
 	  || (DECL_P (val)
@@ -2768,69 +2732,64 @@ expand_asm_stmt (gasm *stmt)
 	  if ((! allows_mem && MEM_P (op))
 	      || GET_CODE (op) == CONCAT)
 	    {
-	      real_output_rtx[i] = op;
+	      rtx old_op = op;
 	      op = gen_reg_rtx (GET_MODE (op));
+
+	      generating_concat_p = old_generating_concat_p;
+
 	      if (is_inout)
-		emit_move_insn (op, real_output_rtx[i]);
+		emit_move_insn (op, old_op);
+
+	      push_to_sequence2 (after_rtl_seq, after_rtl_end);
+	      emit_move_insn (old_op, op);
+	      after_rtl_seq = get_insns ();
+	      after_rtl_end = get_last_insn ();
+	      end_sequence ();
 	    }
 	}
       else
 	{
 	  op = assign_temp (type, 0, 1);
 	  op = validize_mem (op);
-	  if (!MEM_P (op) && TREE_CODE (TREE_VALUE (tail)) == SSA_NAME)
-	    set_reg_attrs_for_decl_rtl (SSA_NAME_VAR (TREE_VALUE (tail)), op);
-	  TREE_VALUE (tail) = make_tree (type, op);
-	}
-      output_rtx[i] = op;
+	  if (!MEM_P (op) && TREE_CODE (val) == SSA_NAME)
+	    set_reg_attrs_for_decl_rtl (SSA_NAME_VAR (val), op);
 
-      generating_concat_p = old_generating_concat_p;
+	  generating_concat_p = old_generating_concat_p;
 
-      if (is_inout)
-	{
-	  inout_mode[ninout] = TYPE_MODE (type);
-	  inout_opnum[ninout++] = i;
+	  push_to_sequence2 (after_rtl_seq, after_rtl_end);
+	  expand_assignment (val, make_tree (type, op), false);
+	  after_rtl_seq = get_insns ();
+	  after_rtl_end = get_last_insn ();
+	  end_sequence ();
 	}
+      output_rvec[i] = op;
 
-      if (tree_conflicts_with_clobbers_p (val, &clobbered_regs))
-	clobber_conflict_found = 1;
+      if (is_inout)
+	inout_opnum.safe_push (i);
     }
 
-  /* Make vectors for the expression-rtx, constraint strings,
-     and named operands.  */
-
-  argvec = rtvec_alloc (ninputs);
-  constraintvec = rtvec_alloc (ninputs);
-  labelvec = rtvec_alloc (nlabels);
+  auto_vec<rtx, MAX_RECOG_OPERANDS> input_rvec;
+  auto_vec<machine_mode, MAX_RECOG_OPERANDS> input_mode;
 
-  body = gen_rtx_ASM_OPERANDS ((noutputs == 0 ? VOIDmode
-				: GET_MODE (output_rtx[0])),
-			       ggc_strdup (TREE_STRING_POINTER (string)),
-			       empty_string, 0, argvec, constraintvec,
-			       labelvec, locus);
+  input_rvec.safe_grow (ninputs);
+  input_mode.safe_grow (ninputs);
 
-  MEM_VOLATILE_P (body) = gimple_asm_volatile_p (stmt);
-
-  /* Eval the inputs and put them into ARGVEC.
-     Put their constraints into ASM_INPUTs and store in CONSTRAINTS.  */
+  generating_concat_p = 0;
 
-  for (i = 0, tail = inputs; tail; tail = TREE_CHAIN (tail), ++i)
+  for (i = 0; i < ninputs; ++i)
     {
-      bool allows_reg, allows_mem;
+      tree val = input_tvec[i];
+      tree type = TREE_TYPE (val);
+      bool allows_reg, allows_mem, ok;
       const char *constraint;
-      tree val, type;
       rtx op;
-      bool ok;
 
       constraint = constraints[i + noutputs];
-      ok = parse_input_constraint (&constraint, i, ninputs, noutputs, ninout,
-				   constraints, &allows_mem, &allows_reg);
+      ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
+				   constraints.address (),
+				   &allows_mem, &allows_reg);
       gcc_assert (ok);
 
-      generating_concat_p = 0;
-
-      val = TREE_VALUE (tail);
-      type = TREE_TYPE (val);
       /* EXPAND_INITIALIZER will not generate code for valid initializer
 	 constants, but will still generate code for other types of operand.
 	 This is the behavior we want for constant constraints.  */
@@ -2861,61 +2820,109 @@ expand_asm_stmt (gasm *stmt)
 	  else
 	    gcc_unreachable ();
 	}
-
-      generating_concat_p = old_generating_concat_p;
-      ASM_OPERANDS_INPUT (body, i) = op;
-
-      ASM_OPERANDS_INPUT_CONSTRAINT_EXP (body, i)
-	= gen_rtx_ASM_INPUT_loc (TYPE_MODE (type),
-				 ggc_strdup (constraints[i + noutputs]),
-				 locus);
-
-      if (tree_conflicts_with_clobbers_p (val, &clobbered_regs))
-	clobber_conflict_found = 1;
+      input_rvec[i] = op;
+      input_mode[i] = TYPE_MODE (type);
     }
 
-  /* Protect all the operands from the queue now that they have all been
-     evaluated.  */
-
-  generating_concat_p = 0;
-
   /* For in-out operands, copy output rtx to input rtx.  */
+  unsigned ninout = inout_opnum.length();
   for (i = 0; i < ninout; i++)
     {
       int j = inout_opnum[i];
-      char buffer[16];
+      rtx o = output_rvec[j];
 
-      ASM_OPERANDS_INPUT (body, ninputs - ninout + i)
-	= output_rtx[j];
+      input_rvec.safe_push (o);
+      input_mode.safe_push (GET_MODE (o));
 
+      char buffer[16];
       sprintf (buffer, "%d", j);
-      ASM_OPERANDS_INPUT_CONSTRAINT_EXP (body, ninputs - ninout + i)
-	= gen_rtx_ASM_INPUT_loc (inout_mode[i], ggc_strdup (buffer), locus);
+      constraints.safe_push (ggc_strdup (buffer));
+    }
+  ninputs += ninout;
+
+  /* Sometimes we wish to automatically clobber registers across an asm.
+     Case in point is when the i386 backend moved from cc0 to a hard reg --
+     maintaining source-level compatibility means automatically clobbering
+     the flags register.  */
+  rtx_insn *after_md_seq = NULL;
+  if (targetm.md_asm_adjust)
+    after_md_seq = targetm.md_asm_adjust (output_rvec, input_rvec,
+					  constraints, clobber_rvec,
+					  clobbered_regs);
+
+  /* Do not allow the hook to change the output and input count,
+     lest it mess up the operand numbering.  */
+  gcc_assert (output_rvec.length() == noutputs);
+  gcc_assert (input_rvec.length() == ninputs);
+  gcc_assert (constraints.length() == noutputs + ninputs);
+
+  /* But it certainly can adjust the clobbers.  */
+  nclobbers = clobber_rvec.length();
+
+  /* Third pass checks for easy conflicts.  */
+  /* ??? Why are we doing this on trees instead of rtx.  */
+
+  bool clobber_conflict_found = 0;
+  for (i = 0; i < noutputs; ++i)
+    if (tree_conflicts_with_clobbers_p (output_tvec[i], &clobbered_regs))
+	clobber_conflict_found = 1;
+  for (i = 0; i < ninputs - ninout; ++i)
+    if (tree_conflicts_with_clobbers_p (input_tvec[i], &clobbered_regs))
+	clobber_conflict_found = 1;
+
+  /* Make vectors for the expression-rtx, constraint strings,
+     and named operands.  */
+
+  rtvec argvec = rtvec_alloc (ninputs);
+  rtvec constraintvec = rtvec_alloc (ninputs);
+  rtvec labelvec = rtvec_alloc (nlabels);
+
+  rtx body = gen_rtx_ASM_OPERANDS ((noutputs == 0 ? VOIDmode
+				    : GET_MODE (output_rvec[0])),
+				   ggc_strdup (gimple_asm_string (stmt)),
+				   empty_string, 0, argvec, constraintvec,
+				   labelvec, locus);
+  MEM_VOLATILE_P (body) = gimple_asm_volatile_p (stmt);
+
+  for (i = 0; i < ninputs; ++i)
+    {
+      ASM_OPERANDS_INPUT (body, i) = input_rvec[i];
+      ASM_OPERANDS_INPUT_CONSTRAINT_EXP (body, i)
+	= gen_rtx_ASM_INPUT_loc (input_mode[i],
+				 constraints[i + noutputs],
+				 locus);
     }
 
   /* Copy labels to the vector.  */
-  for (i = 0, tail = labels; i < nlabels; ++i, tail = TREE_CHAIN (tail))
+  rtx_code_label *fallthru_label = NULL;
+  if (nlabels > 0)
     {
-      rtx r;
-      /* If asm goto has any labels in the fallthru basic block, use
-	 a label that we emit immediately after the asm goto.  Expansion
-	 may insert further instructions into the same basic block after
-	 asm goto and if we don't do this, insertion of instructions on
-	 the fallthru edge might misbehave.  See PR58670.  */
-      if (fallthru_bb
-	  && label_to_block_fn (cfun, TREE_VALUE (tail)) == fallthru_bb)
+      basic_block fallthru_bb = NULL;
+      edge fallthru = find_fallthru_edge (gimple_bb (stmt)->succs);
+      if (fallthru)
+	fallthru_bb = fallthru->dest;
+
+      for (i = 0; i < nlabels; ++i)
 	{
-	  if (fallthru_label == NULL_RTX)
-	    fallthru_label = gen_label_rtx ();
-	  r = fallthru_label;
+	  tree label = TREE_VALUE (gimple_asm_label_op (stmt, i));
+	  rtx r;
+	  /* If asm goto has any labels in the fallthru basic block, use
+	     a label that we emit immediately after the asm goto.  Expansion
+	     may insert further instructions into the same basic block after
+	     asm goto and if we don't do this, insertion of instructions on
+	     the fallthru edge might misbehave.  See PR58670.  */
+	  if (fallthru_bb && label_to_block_fn (cfun, label) == fallthru_bb)
+	    {
+	      if (fallthru_label == NULL_RTX)
+	        fallthru_label = gen_label_rtx ();
+	      r = fallthru_label;
+	    }
+	  else
+	    r = label_rtx (label);
+	  ASM_OPERANDS_LABEL (body, i) = gen_rtx_LABEL_REF (Pmode, r);
 	}
-      else
-	r = label_rtx (TREE_VALUE (tail));
-      ASM_OPERANDS_LABEL (body, i) = gen_rtx_LABEL_REF (Pmode, r);
     }
 
-  generating_concat_p = old_generating_concat_p;
-
   /* Now, for each output, construct an rtx
      (set OUTPUT (asm_operands INSN OUTPUTCONSTRAINT OUTPUTNUMBER
 			       ARGVEC CONSTRAINTS OPNAMES))
@@ -2933,8 +2940,8 @@ expand_asm_stmt (gasm *stmt)
     }
   else if (noutputs == 1 && nclobbers == 0)
     {
-      ASM_OPERANDS_OUTPUT_CONSTRAINT (body) = ggc_strdup (constraints[0]);
-      emit_insn (gen_rtx_SET (VOIDmode, output_rtx[0], body));
+      ASM_OPERANDS_OUTPUT_CONSTRAINT (body) = constraints[0];
+      emit_insn (gen_rtx_SET (VOIDmode, output_rvec[0], body));
     }
   else
     {
@@ -2947,88 +2954,52 @@ expand_asm_stmt (gasm *stmt)
       body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num + nclobbers));
 
       /* For each output operand, store a SET.  */
-      for (i = 0, tail = outputs; tail; tail = TREE_CHAIN (tail), i++)
+      for (i = 0; i < noutputs; ++i)
 	{
-	  XVECEXP (body, 0, i)
-	    = gen_rtx_SET (VOIDmode,
-			   output_rtx[i],
-			   gen_rtx_ASM_OPERANDS
-			   (GET_MODE (output_rtx[i]),
-			    ggc_strdup (TREE_STRING_POINTER (string)),
-			    ggc_strdup (constraints[i]),
-			    i, argvec, constraintvec, labelvec, locus));
-
-	  MEM_VOLATILE_P (SET_SRC (XVECEXP (body, 0, i)))
-	    = gimple_asm_volatile_p (stmt);
+	  rtx src, o = output_rvec[i];
+	  if (i == 0)
+	    {
+	      ASM_OPERANDS_OUTPUT_CONSTRAINT (obody) = constraints[0];
+	      src = obody;
+	    }
+	  else
+	    {
+	      src = gen_rtx_ASM_OPERANDS (GET_MODE (o),
+					  ASM_OPERANDS_TEMPLATE (obody),
+					  constraints[i], i, argvec,
+					  constraintvec, labelvec, locus);
+	      MEM_VOLATILE_P (src) = gimple_asm_volatile_p (stmt);
+	    }
+	  XVECEXP (body, 0, i) = gen_rtx_SET (VOIDmode, o, src);
 	}
 
       /* If there are no outputs (but there are some clobbers)
 	 store the bare ASM_OPERANDS into the PARALLEL.  */
-
       if (i == 0)
 	XVECEXP (body, 0, i++) = obody;
 
       /* Store (clobber REG) for each clobbered register specified.  */
-
-      for (tail = clobbers; tail; tail = TREE_CHAIN (tail))
+      for (unsigned j = 0; j < nclobbers; ++j)
 	{
-	  const char *regname = TREE_STRING_POINTER (TREE_VALUE (tail));
-	  int reg, nregs;
-	  int j = decode_reg_name_and_count (regname, &nregs);
-	  rtx clobbered_reg;
+	  rtx clobbered_reg = clobber_rvec[j];
 
-	  if (j < 0)
+	  /* Do sanity check for overlap between clobbers and respectively
+	     input and outputs that hasn't been handled.  Such overlap
+	     should have been detected and reported above.  */
+	  if (!clobber_conflict_found && REG_P (clobbered_reg))
 	    {
-	      if (j == -3)	/* `cc', which is not a register */
-		continue;
-
-	      if (j == -4)	/* `memory', don't cache memory across asm */
-		{
-		  XVECEXP (body, 0, i++)
-		    = gen_rtx_CLOBBER (VOIDmode,
-				       gen_rtx_MEM
-				       (BLKmode,
-					gen_rtx_SCRATCH (VOIDmode)));
-		  continue;
-		}
-
-	      /* Ignore unknown register, error already signaled.  */
-	      continue;
+	      /* We test the old body (obody) contents to avoid
+		 tripping over the under-construction body.  */
+	      for (unsigned k = 0; k < noutputs; ++k)
+		if (reg_overlap_mentioned_p (clobbered_reg, output_rvec[k]))
+		  internal_error ("asm clobber conflict with output operand");
+
+	      for (unsigned k = 0; k < ninputs - ninout; ++k)
+		if (reg_overlap_mentioned_p (clobbered_reg, input_rvec[k]))
+		  internal_error ("asm clobber conflict with input operand");
 	    }
 
-	  for (reg = j; reg < j + nregs; reg++)
-	    {
-	      /* Use QImode since that's guaranteed to clobber just
-	       * one reg.  */
-	      clobbered_reg = gen_rtx_REG (QImode, reg);
-
-	      /* Do sanity check for overlap between clobbers and
-		 respectively input and outputs that hasn't been
-		 handled.  Such overlap should have been detected and
-		 reported above.  */
-	      if (!clobber_conflict_found)
-		{
-		  int opno;
-
-		  /* We test the old body (obody) contents to avoid
-		     tripping over the under-construction body.  */
-		  for (opno = 0; opno < noutputs; opno++)
-		    if (reg_overlap_mentioned_p (clobbered_reg,
-						 output_rtx[opno]))
-		      internal_error
-			("asm clobber conflict with output operand");
-
-		  for (opno = 0; opno < ninputs - ninout; opno++)
-		    if (reg_overlap_mentioned_p (clobbered_reg,
-						 ASM_OPERANDS_INPUT (obody,
-								     opno)))
-		      internal_error
-			("asm clobber conflict with input operand");
-		}
-
-	      XVECEXP (body, 0, i++)
-		= gen_rtx_CLOBBER (VOIDmode, clobbered_reg);
-	    }
+	  XVECEXP (body, 0, i++) = gen_rtx_CLOBBER (VOIDmode, clobbered_reg);
 	}
 
       if (nlabels > 0)
@@ -3037,31 +3008,18 @@ expand_asm_stmt (gasm *stmt)
 	emit_insn (body);
     }
 
+  generating_concat_p = old_generating_concat_p;
+
   if (fallthru_label)
     emit_label (fallthru_label);
 
-  /* For any outputs that needed reloading into registers, spill them
-     back to where they belong.  */
-  for (i = 0; i < noutputs; ++i)
-    if (real_output_rtx[i])
-      emit_move_insn (real_output_rtx[i], output_rtx[i]);
-
-  /* Copy all the intermediate outputs into the specified outputs.  */
-  for (i = 0, tail = outputs; tail; tail = TREE_CHAIN (tail), i++)
-    {
-      if (orig_outputs[i] != TREE_VALUE (tail))
-	{
-	  expand_assignment (orig_outputs[i], TREE_VALUE (tail), false);
-	  free_temp_slots ();
+  if (after_md_seq)
+    emit_insn (after_md_seq);
+  if (after_rtl_seq)
+    emit_insn (after_rtl_seq);
 
-	  /* Restore the original value so that it's correct the next
-	     time we expand this function.  */
-	  TREE_VALUE (tail) = orig_outputs[i];
-	}
-    }
-
-  crtl->has_asm_statement = 1;
   free_temp_slots ();
+  crtl->has_asm_statement = 1;
 }
 
 /* Emit code to jump to the address
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
index 650402e..7d88a22 100644
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -179,7 +179,9 @@ static rtx cris_function_incoming_arg (cumulative_args_t,
 				       machine_mode, const_tree, bool);
 static void cris_function_arg_advance (cumulative_args_t, machine_mode,
 				       const_tree, bool);
-static tree cris_md_asm_clobbers (tree, tree, tree);
+static rtx_insn *cris_md_asm_adjust (vec<rtx> &, vec<rtx> &,
+				     vec<const char *> &,
+				     vec<rtx> &, HARD_REG_SET &);
 static bool cris_cannot_force_const_mem (machine_mode, rtx);
 
 static void cris_option_override (void);
@@ -283,8 +285,8 @@ int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION;
 #define TARGET_FUNCTION_INCOMING_ARG cris_function_incoming_arg
 #undef TARGET_FUNCTION_ARG_ADVANCE
 #define TARGET_FUNCTION_ARG_ADVANCE cris_function_arg_advance
-#undef TARGET_MD_ASM_CLOBBERS
-#define TARGET_MD_ASM_CLOBBERS cris_md_asm_clobbers
+#undef TARGET_MD_ASM_ADJUST
+#define TARGET_MD_ASM_ADJUST cris_md_asm_adjust
 
 #undef TARGET_CANNOT_FORCE_CONST_MEM
 #define TARGET_CANNOT_FORCE_CONST_MEM cris_cannot_force_const_mem
@@ -4219,55 +4221,41 @@ cris_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
   ca->regs += (3 + CRIS_FUNCTION_ARG_SIZE (mode, type)) / 4;
 }
 
-/* Worker function for TARGET_MD_ASM_CLOBBERS.  */
+/* Worker function for TARGET_MD_ASM_ADJUST.  */
 
-static tree
-cris_md_asm_clobbers (tree outputs, tree inputs, tree in_clobbers)
+static rtx_insn *
+cris_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
+		    vec<const char *> &constraints,
+		    vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
 {
-  HARD_REG_SET mof_set;
-  tree clobbers;
-  tree t;
-
-  CLEAR_HARD_REG_SET (mof_set);
-  SET_HARD_REG_BIT (mof_set, CRIS_MOF_REGNUM);
-
-  /* For the time being, all asms clobber condition codes.  Revisit when
-     there's a reasonable use for inputs/outputs that mention condition
-     codes.  */
-  clobbers
-    = tree_cons (NULL_TREE,
-		 build_string (strlen (reg_names[CRIS_CC0_REGNUM]),
-			       reg_names[CRIS_CC0_REGNUM]),
-		 in_clobbers);
-
-  for (t = outputs; t != NULL; t = TREE_CHAIN (t))
-    {
-      tree val = TREE_VALUE (t);
-
-      /* The constraint letter for the singleton register class of MOF
-	 is 'h'.  If it's mentioned in the constraints, the asm is
-	 MOF-aware and adding it to the clobbers would cause it to have
-	 impossible constraints.  */
-      if (strchr (TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))),
-		  'h') != NULL
-	  || tree_overlaps_hard_reg_set (val, &mof_set) != NULL_TREE)
-	return clobbers;
-    }
-
-  for (t = inputs; t != NULL; t = TREE_CHAIN (t))
-    {
-      tree val = TREE_VALUE (t);
-
-      if (strchr (TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))),
-		  'h') != NULL
-	  || tree_overlaps_hard_reg_set (val, &mof_set) != NULL_TREE)
-	return clobbers;
-    }
-
-  return tree_cons (NULL_TREE,
-		    build_string (strlen (reg_names[CRIS_MOF_REGNUM]),
-				  reg_names[CRIS_MOF_REGNUM]),
-		    clobbers);
+  /* For the time being, all asms clobber condition codes.
+     Revisit when there's a reasonable use for inputs/outputs
+     that mention condition codes.  */
+  clobbers.safe_push (gen_rtx_REG (CCmode, CRIS_CC0_REGNUM));
+  SET_HARD_REG_BIT (clobbered_regs, CRIS_CC0_REGNUM);
+
+  /* Determine if the source using MOF.  If it is, automatically
+     clobbering MOF would cause it to have impossible constraints.  */
+
+  /* Look for a use of the MOF constraint letter: h.  */
+  for (unsigned i = 0, n = constraints.length(); i < n; ++i)
+    if (strchr (constraints[i], 'h') != NULL)
+      return NULL;
+
+  /* Look for an output or an input that touches MOF.  */
+  rtx mof_reg = gen_rtx_REG (SImode, CRIS_MOF_REGNUM);
+  for (unsigned i = 0, n = outputs.length(); i < n; ++i)
+    if (reg_overlap_mentioned_p (mof_reg, outputs[i]))
+      return NULL;
+  for (unsigned i = 0, n = inputs.length(); i < n; ++i)
+    if (reg_overlap_mentioned_p (mof_reg, inputs[i]))
+      return NULL;
+
+  /* No direct reference to MOF or its constraint.
+     Clobber it for backward compatibility.  */
+  clobbers.safe_push (mof_reg);
+  SET_HARD_REG_BIT (clobbered_regs, CRIS_MOF_REGNUM);
+  return NULL;
 }
 
 /* Implement TARGET_FRAME_POINTER_REQUIRED.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 627ef65..c47134e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -45428,19 +45428,23 @@ ix86_c_mode_for_suffix (char suffix)
   return VOIDmode;
 }
 
-/* Worker function for TARGET_MD_ASM_CLOBBERS.
+/* Worker function for TARGET_MD_ASM_ADJUST.
 
    We do this in the new i386 backend to maintain source compatibility
    with the old cc0-based compiler.  */
 
-static tree
-ix86_md_asm_clobbers (tree, tree, tree clobbers)
+static rtx_insn *
+ix86_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
+		    vec<const char *> &/*constraints*/,
+		    vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
 {
-  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
-			clobbers);
-  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
-			clobbers);
-  return clobbers;
+  clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
+  clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
+
+  SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
+  SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
+
+  return NULL;
 }
 
 /* Implements target vector targetm.asm.encode_section_info.  */
@@ -51995,8 +51999,8 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
 #undef TARGET_EXPAND_BUILTIN_VA_START
 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
 
-#undef TARGET_MD_ASM_CLOBBERS
-#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
+#undef TARGET_MD_ASM_ADJUST
+#define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
 
 #undef TARGET_PROMOTE_PROTOTYPES
 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c
index 58c97e8..34d577d 100644
--- a/gcc/config/mn10300/mn10300.c
+++ b/gcc/config/mn10300/mn10300.c
@@ -2881,18 +2881,18 @@ mn10300_conditional_register_usage (void)
     call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
 }
 
-/* Worker function for TARGET_MD_ASM_CLOBBERS.
+/* Worker function for TARGET_MD_ASM_ADJUST.
    We do this in the mn10300 backend to maintain source compatibility
    with the old cc0-based compiler.  */
 
-static tree
-mn10300_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
-                         tree inputs ATTRIBUTE_UNUSED,
-                         tree clobbers)
+static rtx_insn *
+mn10300_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
+		       vec<const char *> &/*constraints*/,
+		       vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
 {
-  clobbers = tree_cons (NULL_TREE, build_string (5, "EPSW"),
-                        clobbers);
-  return clobbers;
+  clobbers.safe_push (gen_rtx_REG (CCmode, CC_REG));
+  SET_HARD_REG_BIT (clobbered_regs, CC_REG);
+  return NULL;
 }
 \f
 /* A helper function for splitting cbranch patterns after reload.  */
@@ -3442,8 +3442,8 @@ mn10300_reorg (void)
 #undef  TARGET_CONDITIONAL_REGISTER_USAGE
 #define TARGET_CONDITIONAL_REGISTER_USAGE mn10300_conditional_register_usage
 
-#undef TARGET_MD_ASM_CLOBBERS
-#define TARGET_MD_ASM_CLOBBERS  mn10300_md_asm_clobbers
+#undef TARGET_MD_ASM_ADJUST
+#define TARGET_MD_ASM_ADJUST mn10300_md_asm_adjust
 
 #undef  TARGET_FLAGS_REGNUM
 #define TARGET_FLAGS_REGNUM  CC_REG
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index c74c7d1..79ff0ea 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1597,8 +1597,8 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
 
-#undef TARGET_MD_ASM_CLOBBERS
-#define TARGET_MD_ASM_CLOBBERS rs6000_md_asm_clobbers
+#undef TARGET_MD_ASM_ADJUST
+#define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
 
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE rs6000_option_override
@@ -3209,17 +3209,20 @@ rs6000_builtin_mask_calculate (void)
 	  | ((TARGET_LONG_DOUBLE_128)	    ? RS6000_BTM_LDBL128 : 0));
 }
 
-/* Implement TARGET_MD_ASM_CLOBBERS.  All asm statements are considered
+/* Implement TARGET_MD_ASM_ADJUST.  All asm statements are considered
    to clobber the XER[CA] bit because clobbering that bit without telling
    the compiler worked just fine with versions of GCC before GCC 5, and
    breaking a lot of older code in ways that are hard to track down is
    not such a great idea.  */
 
-static tree
-rs6000_md_asm_clobbers (tree, tree, tree clobbers)
+static rtx_insn *
+rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
+		      vec<const char *> &/*constraints*/,
+		      vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
 {
-  tree s = build_string (strlen (reg_names[CA_REGNO]), reg_names[CA_REGNO]);
-  return tree_cons (NULL_TREE, s, clobbers);
+  clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
+  SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
+  return NULL;
 }
 
 /* Override command line options.  Mostly we process the processor type and
diff --git a/gcc/config/visium/visium.c b/gcc/config/visium/visium.c
index e656af9..164d4d2 100644
--- a/gcc/config/visium/visium.c
+++ b/gcc/config/visium/visium.c
@@ -172,7 +172,9 @@ static bool visium_frame_pointer_required (void);
 
 static tree visium_build_builtin_va_list (void);
 
-static tree visium_md_asm_clobbers (tree, tree, tree);
+static rtx_insn *visium_md_asm_adjust (vec<rtx> &, vec<rtx> &,
+				       vec<const char *> &,
+				       vec<rtx> &, HARD_REG_SET &);
 
 static bool visium_legitimate_constant_p (enum machine_mode, rtx);
 
@@ -299,8 +301,8 @@ static unsigned int visium_reorg (void);
 #undef  TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT visium_trampoline_init
 
-#undef TARGET_MD_ASM_CLOBBERS
-#define TARGET_MD_ASM_CLOBBERS visium_md_asm_clobbers
+#undef TARGET_MD_ASM_ADJUST
+#define TARGET_MD_ASM_ADJUST visium_md_asm_adjust
 
 #undef TARGET_FLAGS_REGNUM
 #define TARGET_FLAGS_REGNUM FLAGS_REGNUM
@@ -720,13 +722,14 @@ visium_conditional_register_usage (void)
    an asm   We do this for the FLAGS to maintain source compatibility with
    the original cc0-based compiler.  */
 
-static tree
-visium_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
-			tree inputs ATTRIBUTE_UNUSED,
-			tree clobbers)
+static rtx_insn *
+visium_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
+		      vec<const char *> &/*constraints*/,
+		      vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
 {
-  const char *flags = reg_names[FLAGS_REGNUM];
-  return tree_cons (NULL_TREE, build_string (strlen (flags), flags), clobbers);
+  clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REGNUM));
+  SET_HARD_REG_BIT (clobbered_regs, FLAGS_REGNUM);
+  return NULL;
 }
 
 /* Return true if X is a legitimate constant for a MODE immediate operand.
diff --git a/gcc/config/vxworks.c b/gcc/config/vxworks.c
index 4e4cfc8..e29dc69 100644
--- a/gcc/config/vxworks.c
+++ b/gcc/config/vxworks.c
@@ -21,13 +21,13 @@ along with GCC; see the file COPYING3.  If not see
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "vec.h"
 #include "target.h"
 #include "diagnostic-core.h"
 #include "output.h"
 #include "tm.h"
 #include "hash-set.h"
 #include "machmode.h"
-#include "vec.h"
 #include "double-int.h"
 #include "input.h"
 #include "alias.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index dd1fd22..9886f4a 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -10835,15 +10835,15 @@ from shared libraries (DLLs).
 You need not define this macro if it would always evaluate to zero.
 @end defmac
 
-@deftypefn {Target Hook} tree TARGET_MD_ASM_CLOBBERS (tree @var{outputs}, tree @var{inputs}, tree @var{clobbers})
-This target hook should add to @var{clobbers} @code{STRING_CST} trees for
-any hard regs the port wishes to automatically clobber for an asm.
-It should return the result of the last @code{tree_cons} used to add a
-clobber.  The @var{outputs}, @var{inputs} and @var{clobber} lists are the
-corresponding parameters to the asm and may be inspected to avoid
-clobbering a register that is an input or output of the asm.  You can use
-@code{tree_overlaps_hard_reg_set}, declared in @file{tree.h}, to test
-for overlap with regards to asm-declared registers.
+@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec<rtx>& @var{outputs}, vec<rtx>& @var{inputs}, vec<const char *>& @var{constraints}, vec<rtx>& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs})
+This target hook may add @dfn{clobbers} to @var{clobbers} and
+@var{clobbered_regs} for any hard regs the port wishes to automatically
+clobber for an asm.  The @var{outputs} and @var{inputs} may be inspected
+to avoid clobbering a register that is already used by the asm.
+
+It may modify the @var{outputs}, @var{inputs}, and @var{constraints}
+as necessary for other pre-processing.  In this case the return value is
+a sequence of insns to emit after the asm.
 @end deftypefn
 
 @defmac MATH_LIBRARY
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 8680967..707d610 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7903,7 +7903,7 @@ from shared libraries (DLLs).
 You need not define this macro if it would always evaluate to zero.
 @end defmac
 
-@hook TARGET_MD_ASM_CLOBBERS
+@hook TARGET_MD_ASM_ADJUST
 
 @defmac MATH_LIBRARY
 Define this macro as a C string constant for the linker argument to link
diff --git a/gcc/gimple.c b/gcc/gimple.c
index a5c1192..8b3b322 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -23,10 +23,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
-#include "target.h"
 #include "hash-set.h"
 #include "machmode.h"
 #include "vec.h"
+#include "target.h"
 #include "double-int.h"
 #include "input.h"
 #include "alias.h"
diff --git a/gcc/hooks.c b/gcc/hooks.c
index 824aeb0..92a2141 100644
--- a/gcc/hooks.c
+++ b/gcc/hooks.c
@@ -378,14 +378,6 @@ hook_uint_mode_0 (machine_mode m ATTRIBUTE_UNUSED)
   return 0;
 }
 
-/* Generic hook that takes three trees and returns the last one as is.  */
-tree
-hook_tree_tree_tree_tree_3rd_identity (tree a ATTRIBUTE_UNUSED,
-				       tree b ATTRIBUTE_UNUSED, tree c)
-{
-  return c;
-}
-
 /* Generic hook that takes no arguments and returns a NULL const string.  */
 const char *
 hook_constcharptr_void_null (void)
diff --git a/gcc/hooks.h b/gcc/hooks.h
index 8c929e8..02c24ce 100644
--- a/gcc/hooks.h
+++ b/gcc/hooks.h
@@ -93,7 +93,6 @@ extern tree hook_tree_const_tree_null (const_tree);
 
 extern tree hook_tree_tree_tree_null (tree, tree);
 extern tree hook_tree_tree_tree_tree_null (tree, tree, tree);
-extern tree hook_tree_tree_tree_tree_3rd_identity (tree, tree, tree);
 extern tree hook_tree_tree_int_treep_bool_null (tree, int, tree *, bool);
 
 extern unsigned hook_uint_void_0 (void);
diff --git a/gcc/incpath.c b/gcc/incpath.c
index 6c54ca6..c4e0574 100644
--- a/gcc/incpath.c
+++ b/gcc/incpath.c
@@ -21,6 +21,7 @@
 #include "system.h"
 #include "coretypes.h"
 #include "machmode.h"
+#include "vec.h"
 #include "target.h"
 #include "tm.h"
 #include "cpplib.h"
diff --git a/gcc/mode-switching.c b/gcc/mode-switching.c
index c0f865c..9af7911 100644
--- a/gcc/mode-switching.c
+++ b/gcc/mode-switching.c
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
+#include "vec.h"
 #include "target.h"
 #include "rtl.h"
 #include "regs.h"
@@ -29,7 +30,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-config.h"
 #include "recog.h"
 #include "predict.h"
-#include "vec.h"
 #include "hashtab.h"
 #include "hash-set.h"
 #include "machmode.h"
diff --git a/gcc/system.h b/gcc/system.h
index 94d8138..448a7fe 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -964,6 +964,7 @@ extern void fancy_abort (const char *, int, const char *) ATTRIBUTE_NORETURN;
 	TARGET_HANDLE_PRAGMA_EXTERN_PREFIX \
 	TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN \
 	TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \
+	TARGET_MD_ASM_CLOBBERS
 
 /* Arrays that were deleted in favor of a functional interface.  */
  #pragma GCC poison built_in_decls implicit_built_in_decls
diff --git a/gcc/target.def b/gcc/target.def
index 329ea04..eba9921 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3742,20 +3742,22 @@ machines.  One reason you may need to define this target hook is if\n\
  rtx, (void),
  default_builtin_setjmp_frame_value)
 
-/* This target hook should add STRING_CST trees for any hard regs
-   the port wishes to automatically clobber for an asm.  */
-DEFHOOK
-(md_asm_clobbers,
- "This target hook should add to @var{clobbers} @code{STRING_CST} trees for\n\
-any hard regs the port wishes to automatically clobber for an asm.\n\
-It should return the result of the last @code{tree_cons} used to add a\n\
-clobber.  The @var{outputs}, @var{inputs} and @var{clobber} lists are the\n\
-corresponding parameters to the asm and may be inspected to avoid\n\
-clobbering a register that is an input or output of the asm.  You can use\n\
-@code{tree_overlaps_hard_reg_set}, declared in @file{tree.h}, to test\n\
-for overlap with regards to asm-declared registers.",
- tree, (tree outputs, tree inputs, tree clobbers),
- hook_tree_tree_tree_tree_3rd_identity)
+/* This target hook should manipulate the outputs, inputs, constraints,
+   and clobbers the port wishes for pre-processing the asm.  */
+DEFHOOK
+(md_asm_adjust,
+ "This target hook may add @dfn{clobbers} to @var{clobbers} and\n\
+@var{clobbered_regs} for any hard regs the port wishes to automatically\n\
+clobber for an asm.  The @var{outputs} and @var{inputs} may be inspected\n\
+to avoid clobbering a register that is already used by the asm.\n\
+\n\
+It may modify the @var{outputs}, @var{inputs}, and @var{constraints}\n\
+as necessary for other pre-processing.  In this case the return value is\n\
+a sequence of insns to emit after the asm.",
+ rtx_insn *,
+ (vec<rtx>& outputs, vec<rtx>& inputs, vec<const char *>& constraints,
+  vec<rtx>& clobbers, HARD_REG_SET& clobbered_regs),
+ NULL)
 
 /* This target hook allows the backend to specify a calling convention
    in the debug information.  This function actually returns an
-- 
2.1.0

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 6/6] i386: Implement asm flag outputs
  2015-05-07 21:39 [RFC 0/6] Flags outputs for asms Richard Henderson
@ 2015-05-07 21:39 ` Richard Henderson
  2015-05-08 18:40   ` H. Peter Anvin
  2015-05-15 15:46   ` [PATCH v2 " Richard Henderson
  2015-05-07 21:39 ` [PATCH 4/6] Convert to md_asm_adjust Richard Henderson
                   ` (5 subsequent siblings)
  6 siblings, 2 replies; 33+ messages in thread
From: Richard Henderson @ 2015-05-07 21:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: law, peterz, hpa, torvalds, jakub

All j<cc> mnemonics implemented as =@cc<cc>
to make it easy for someone reading the manual
to figure out what condition is desired.
---
 gcc/config/i386/constraints.md |   5 ++
 gcc/config/i386/i386.c         | 132 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 130 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 2271bd1..d16e728 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -146,10 +146,15 @@
  "@internal Lower SSE register when avoiding REX prefix and all SSE registers otherwise.")
 
 ;; We use the B prefix to denote any number of internal operands:
+;;  f  FLAGS_REG
 ;;  s  Sibcall memory operand, not valid for TARGET_X32
 ;;  w  Call memory operand, not valid for TARGET_X32
 ;;  z  Constant call address operand.
 
+(define_constraint "Bf"
+  "@internal Flags register operand."
+  (match_operand 0 "flags_reg_operand"))
+
 (define_constraint "Bs"
   "@internal Sibcall memory operand."
   (and (not (match_test "TARGET_X32"))
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7cbb465..352884d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -45433,21 +45433,139 @@ ix86_c_mode_for_suffix (char suffix)
 
 /* Worker function for TARGET_MD_ASM_ADJUST.
 
-   We do this in the new i386 backend to maintain source compatibility
+   We implement asm flag outputs, and maintain source compatibility
    with the old cc0-based compiler.  */
 
 static rtx_insn *
-ix86_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
-		    vec<const char *> &/*constraints*/,
+ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
+		    vec<const char *> &constraints,
 		    vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
 {
-  clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
   clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
-
-  SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
   SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
 
-  return NULL;
+  bool saw_asm_flag = false;
+
+  start_sequence ();
+  for (unsigned i = 0, n = outputs.length (); i < n; ++i)
+    {
+      const char *con = constraints[i];
+      if (strncmp (con, "=@cc", 4) != 0)
+	continue;
+      con += 4;
+      if (strchr (con, ',') != NULL)
+	{
+	  error ("alternatives not allowed in asm flag output");
+	  continue;
+	}
+
+      bool invert = false;
+      if (con[0] == 'n')
+	invert = true, con++;
+
+      machine_mode mode = CCmode;
+      rtx_code code = UNKNOWN;
+
+      switch (con[0])
+	{
+	case 'a':
+	  if (con[1] == 0)
+	    mode = CCAmode, code = EQ;
+	  else if (con[1] == 'e' && con[2] == 0)
+	    mode = CCCmode, code = EQ;
+	  break;
+	case 'b':
+	  if (con[1] == 0)
+	    mode = CCCmode, code = EQ;
+	  else if (con[1] == 'e' && con[2] == 0)
+	    mode = CCAmode, code = NE;
+	  break;
+	case 'c':
+	  if (con[1] == 0)
+	    mode = CCCmode, code = EQ;
+	  break;
+	case 'e':
+	  if (con[1] == 0)
+	    mode = CCZmode, code = EQ;
+	  break;
+	case 'g':
+	  if (con[1] == 0)
+	    mode = CCGCmode, code = GT;
+	  else if (con[1] == 'e' && con[2] == 0)
+	    mode = CCGCmode, code = GE;
+	  break;
+	case 'l':
+	  if (con[1] == 0)
+	    mode = CCGCmode, code = LT;
+	  else if (con[1] == 'e' && con[2] == 0)
+	    mode = CCGCmode, code = LE;
+	  break;
+	case 'o':
+	  if (con[1] == 0)
+	    mode = CCOmode, code = EQ;
+	  break;
+	case 'p':
+	  if (con[1] == 0)
+	    mode = CCPmode, code = EQ;
+	  break;
+	case 's':
+	  if (con[1] == 0)
+	    mode = CCSmode, code = EQ;
+	  break;
+	case 'z':
+	  if (con[1] == 0)
+	    mode = CCZmode, code = EQ;
+	  break;
+	}
+      if (code == UNKNOWN)
+	{
+	  error ("unknown asm flag output %qs", constraints[i]);
+	  continue;
+	}
+      if (invert)
+	code = reverse_condition (code);
+
+      rtx dest = outputs[i];
+      if (!saw_asm_flag)
+	{
+	  /* This is the first asm flag output.  Here we put the flags
+	     register in as the real output and adjust the condition to
+	     allow it.  */
+	  constraints[i] = "=Bf";
+	  outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
+	  saw_asm_flag = true;
+	}
+      else
+	{
+	  /* We don't need the flags register as output twice.  */
+	  constraints[i] = "=X";
+	  outputs[i] = gen_rtx_SCRATCH (SImode);
+	}
+
+      rtx x = gen_rtx_REG (mode, FLAGS_REG);
+      x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
+
+      machine_mode dest_mode = GET_MODE (dest);
+      if (dest_mode != QImode)
+	{
+	  rtx destqi = gen_reg_rtx (QImode);
+	  emit_insn (gen_rtx_SET (VOIDmode, destqi, x));
+	  x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
+	}
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  rtx_insn *seq = get_insns ();
+  end_sequence ();
+
+  if (saw_asm_flag)
+    return seq;
+  else
+    {
+      /* If we had no asm flag outputs, clobber the flags.  */
+      clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
+      SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
+      return NULL;
+    }
 }
 
 /* Implements target vector targetm.asm.encode_section_info.  */
-- 
2.1.0

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 5/6] i386: Add CCPmode
  2015-05-07 21:39 [RFC 0/6] Flags outputs for asms Richard Henderson
  2015-05-07 21:39 ` [PATCH 6/6] i386: Implement asm flag outputs Richard Henderson
  2015-05-07 21:39 ` [PATCH 4/6] Convert to md_asm_adjust Richard Henderson
@ 2015-05-07 21:39 ` Richard Henderson
  2015-05-08 18:16   ` Jeff Law
  2015-05-07 21:39 ` [PATCH 3/6] Canonicalize asm volatility earlier Richard Henderson
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2015-05-07 21:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: law, peterz, hpa, torvalds, jakub

For testing parity coming out of asm flags.
---
 gcc/config/i386/i386-modes.def |  2 ++
 gcc/config/i386/i386.c         | 19 +++++++++++--------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 0b6a1f1..714c138 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -55,6 +55,7 @@ ADJUST_ALIGNMENT (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 4);
    Add CCA to indicate that only the Above flag is valid.
    Add CCC to indicate that only the Carry flag is valid.
    Add CCO to indicate that only the Overflow flag is valid.
+   Add CCP to indicate that only the Parity flag is valid.
    Add CCS to indicate that only the Sign flag is valid.
    Add CCZ to indicate that only the Zero flag is valid.  */
 
@@ -64,6 +65,7 @@ CC_MODE (CCNO);
 CC_MODE (CCA);
 CC_MODE (CCC);
 CC_MODE (CCO);
+CC_MODE (CCP);
 CC_MODE (CCS);
 CC_MODE (CCZ);
 CC_MODE (CCFP);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c47134e..7cbb465 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -15001,21 +15001,21 @@ put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
 	case CCAmode:
 	  suffix = "a";
 	  break;
-
 	case CCCmode:
 	  suffix = "c";
 	  break;
-
 	case CCOmode:
 	  suffix = "o";
 	  break;
-
+	case CCPmode:
+	  suffix = "p";
+	  break;
 	case CCSmode:
 	  suffix = "s";
 	  break;
-
 	default:
 	  suffix = "e";
+	  break;
 	}
       break;
     case NE:
@@ -15024,21 +15024,21 @@ put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
 	case CCAmode:
 	  suffix = "na";
 	  break;
-
 	case CCCmode:
 	  suffix = "nc";
 	  break;
-
 	case CCOmode:
 	  suffix = "no";
 	  break;
-
+	case CCPmode:
+	  suffix = "np";
+	  break;
 	case CCSmode:
 	  suffix = "ns";
 	  break;
-
 	default:
 	  suffix = "ne";
+	  break;
 	}
       break;
     case GT:
@@ -19769,6 +19769,7 @@ ix86_match_ccmode (rtx insn, machine_mode req_mode)
     case CCAmode:
     case CCCmode:
     case CCOmode:
+    case CCPmode:
     case CCSmode:
       if (set_mode != req_mode)
 	return false;
@@ -19917,6 +19918,7 @@ ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
     case CCAmode:
     case CCCmode:
     case CCOmode:
+    case CCPmode:
     case CCSmode:
     case CCZmode:
       switch (m2)
@@ -19931,6 +19933,7 @@ ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
 	case CCAmode:
 	case CCCmode:
 	case CCOmode:
+	case CCPmode:
 	case CCSmode:
 	case CCZmode:
 	  return CCmode;
-- 
2.1.0

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 3/6] Canonicalize asm volatility earlier
  2015-05-07 21:39 [RFC 0/6] Flags outputs for asms Richard Henderson
                   ` (2 preceding siblings ...)
  2015-05-07 21:39 ` [PATCH 5/6] i386: Add CCPmode Richard Henderson
@ 2015-05-07 21:39 ` Richard Henderson
  2015-05-08 18:12   ` Jeff Law
  2015-05-07 21:39 ` [PATCH 1/6] Only resolve_asm_operand_names once Richard Henderson
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2015-05-07 21:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: law, peterz, hpa, torvalds, jakub

If gimple_asm_volatile_p is correct, no point re-checking.
This is also done by the C and C++ front ends, but not Ada.
So we can't yet trust ASM_VOLATILE_P from the front end.
---
 gcc/cfgexpand.c | 11 +++--------
 gcc/gimplify.c  |  2 +-
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index a5de512..fbd2101 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2587,8 +2587,6 @@ expand_asm_stmt (gasm *stmt)
   for (i = 0; i < noutputs; ++i)
     orig_outputs[i] = TREE_VALUE (gimple_asm_output_op (stmt, i));
 
-  bool vol = gimple_asm_volatile_p (stmt);
-
   rtvec argvec, constraintvec, labelvec;
   rtx body;
   int ninout;
@@ -2603,10 +2601,6 @@ expand_asm_stmt (gasm *stmt)
   int old_generating_concat_p = generating_concat_p;
   rtx_code_label *fallthru_label = NULL;
 
-  /* An ASM with no outputs needs to be treated as volatile, for now.  */
-  if (noutputs == 0)
-    vol = 1;
-
   if (! check_operand_nalternatives (outputs, inputs))
     return;
 
@@ -2815,7 +2809,7 @@ expand_asm_stmt (gasm *stmt)
 			       empty_string, 0, argvec, constraintvec,
 			       labelvec, locus);
 
-  MEM_VOLATILE_P (body) = vol;
+  MEM_VOLATILE_P (body) = gimple_asm_volatile_p (stmt);
 
   /* Eval the inputs and put them into ARGVEC.
      Put their constraints into ASM_INPUTs and store in CONSTRAINTS.  */
@@ -2964,7 +2958,8 @@ expand_asm_stmt (gasm *stmt)
 			    ggc_strdup (constraints[i]),
 			    i, argvec, constraintvec, labelvec, locus));
 
-	  MEM_VOLATILE_P (SET_SRC (XVECEXP (body, 0, i))) = vol;
+	  MEM_VOLATILE_P (SET_SRC (XVECEXP (body, 0, i)))
+	    = gimple_asm_volatile_p (stmt);
 	}
 
       /* If there are no outputs (but there are some clobbers)
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 9ce3dd9..623d33d 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -5272,7 +5272,7 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
       stmt = gimple_build_asm_vec (TREE_STRING_POINTER (ASM_STRING (expr)),
 				   inputs, outputs, clobbers, labels);
 
-      gimple_asm_set_volatile (stmt, ASM_VOLATILE_P (expr));
+      gimple_asm_set_volatile (stmt, ASM_VOLATILE_P (expr) || noutputs == 0);
       gimple_asm_set_input (stmt, ASM_INPUT_P (expr));
 
       gimplify_seq_add_stmt (pre_p, stmt);
-- 
2.1.0

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/6] Only resolve_asm_operand_names once
  2015-05-07 21:39 [RFC 0/6] Flags outputs for asms Richard Henderson
                   ` (3 preceding siblings ...)
  2015-05-07 21:39 ` [PATCH 3/6] Canonicalize asm volatility earlier Richard Henderson
@ 2015-05-07 21:39 ` Richard Henderson
  2015-05-08 18:11   ` Jeff Law
  2015-05-07 21:39 ` [PATCH 2/6] Merge expand_asm_operands into expand_asm_stmt Richard Henderson
  2015-05-08  1:15 ` [RFC 0/6] Flags outputs for asms H. Peter Anvin
  6 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2015-05-07 21:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: law, peterz, hpa, torvalds, jakub

We do it in the front end already; no need to repeat.
---
 gcc/cfgexpand.c | 2 --
 gcc/stmt.c      | 7 ++++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 5905ddb..c77a4ac 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2554,8 +2554,6 @@ expand_asm_operands (tree string, tree outputs, tree inputs,
   if (! check_operand_nalternatives (outputs, inputs))
     return;
 
-  string = resolve_asm_operand_names (string, outputs, inputs, labels);
-
   /* Collect constraints.  */
   i = 0;
   for (t = outputs; t ; t = TREE_CHAIN (t), i++)
diff --git a/gcc/stmt.c b/gcc/stmt.c
index 6c62a12..e63179f 100644
--- a/gcc/stmt.c
+++ b/gcc/stmt.c
@@ -562,9 +562,10 @@ check_unique_operand_names (tree outputs, tree inputs, tree labels)
   return false;
 }
 
-/* A subroutine of expand_asm_operands.  Resolve the names of the operands
-   in *POUTPUTS and *PINPUTS to numbers, and replace the name expansions in
-   STRING and in the constraints to those numbers.  */
+/* Resolve the names of the operands in *POUTPUTS and *PINPUTS to numbers,
+   and replace the name expansions in STRING and in the constraints to
+   those numbers.  This is generally done in the front end while creating
+   the ASM_EXPR generic tree that eventually becomes the GIMPLE_ASM.  */
 
 tree
 resolve_asm_operand_names (tree string, tree outputs, tree inputs, tree labels)
-- 
2.1.0

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [RFC 0/6] Flags outputs for asms
@ 2015-05-07 21:39 Richard Henderson
  2015-05-07 21:39 ` [PATCH 6/6] i386: Implement asm flag outputs Richard Henderson
                   ` (6 more replies)
  0 siblings, 7 replies; 33+ messages in thread
From: Richard Henderson @ 2015-05-07 21:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: law, peterz, hpa, torvalds, jakub

Here's a prototype for i386 only, which stands up to light testing.
I'd rather post this tonight rather than wait until tomorrow when I
can write more proper dejagnu tests.

I've tested the intermedate patches via config-list.mk, so despite
mucking around with vec.h vs target.h, all targets still compile.

That said, quite a bit of cleanup in expand_asm_stmt was required
in order to make the target hook not be completely unintelligable,
so depsite full regression testing on x86_64 and ppc64, I could
well have broken something.


r~


void bar(void);

#define DO1(C) \
int s##C(void) { int r; asm("" : "=@cc"#C(r)); return r; } \
int j##C(void) { int r; asm("" : "=@cc"#C(r)); if (!r) bar(); }

#define DO2(C) DO1(C) DO1(n##C)

DO2(a)
DO2(b)
DO2(c)
DO2(e)
DO2(g)
DO2(l)
DO2(o)
DO2(p)
DO2(s)
DO2(z)

DO2(ae)
DO2(be)
DO2(ge)
DO2(le)

---
Richard Henderson (6):
  Only resolve_asm_operand_names once
  Merge expand_asm_operands into expand_asm_stmt
  Canonicalize asm volatility earlier
  Convert to md_asm_adjust
  i386: Add CCPmode
  i386: Implement asm flag outputs

 gcc/cfgexpand.c                | 674 ++++++++++++++++++-----------------------
 gcc/config/cris/cris.c         |  88 +++---
 gcc/config/i386/constraints.md |   5 +
 gcc/config/i386/i386-modes.def |   2 +
 gcc/config/i386/i386.c         | 163 ++++++++--
 gcc/config/mn10300/mn10300.c   |  20 +-
 gcc/config/rs6000/rs6000.c     |  17 +-
 gcc/config/visium/visium.c     |  21 +-
 gcc/config/vxworks.c           |   2 +-
 gcc/doc/tm.texi                |  18 +-
 gcc/doc/tm.texi.in             |   2 +-
 gcc/gimple.c                   |   2 +-
 gcc/gimplify.c                 |   2 +-
 gcc/hooks.c                    |   8 -
 gcc/hooks.h                    |   1 -
 gcc/incpath.c                  |   1 +
 gcc/mode-switching.c           |   2 +-
 gcc/stmt.c                     |   7 +-
 gcc/system.h                   |   1 +
 gcc/target.def                 |  30 +-
 20 files changed, 558 insertions(+), 508 deletions(-)

-- 
2.1.0

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 2/6] Merge expand_asm_operands into expand_asm_stmt
  2015-05-07 21:39 [RFC 0/6] Flags outputs for asms Richard Henderson
                   ` (4 preceding siblings ...)
  2015-05-07 21:39 ` [PATCH 1/6] Only resolve_asm_operand_names once Richard Henderson
@ 2015-05-07 21:39 ` Richard Henderson
  2015-05-08 18:54   ` Jeff Law
  2015-05-08  1:15 ` [RFC 0/6] Flags outputs for asms H. Peter Anvin
  6 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2015-05-07 21:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: law, peterz, hpa, torvalds, jakub

Prepatory to converting from tree chains to vectors.
---
 gcc/cfgexpand.c | 167 ++++++++++++++++++++++++--------------------------------
 1 file changed, 72 insertions(+), 95 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index c77a4ac..a5de512 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2522,22 +2522,78 @@ tree_conflicts_with_clobbers_p (tree t, HARD_REG_SET *clobbered_regs)
    VOL nonzero means the insn is volatile; don't optimize it.  */
 
 static void
-expand_asm_operands (tree string, tree outputs, tree inputs,
-		     tree clobbers, tree labels, basic_block fallthru_bb,
-		     int vol, location_t locus)
+expand_asm_stmt (gasm *stmt)
 {
+  int noutputs, ninputs, nclobbers, nlabels, i;
+  tree string, outputs, inputs, clobbers, labels, tail, t;
+  location_t locus = gimple_location (stmt);
+  basic_block fallthru_bb = NULL;
+
+  /* Meh... convert the gimple asm operands into real tree lists.
+     Eventually we should make all routines work on the vectors instead
+     of relying on TREE_CHAIN.  */
+  outputs = NULL_TREE;
+  noutputs = gimple_asm_noutputs (stmt);
+  if (noutputs > 0)
+    {
+      t = outputs = gimple_asm_output_op (stmt, 0);
+      for (i = 1; i < noutputs; i++)
+	t = TREE_CHAIN (t) = gimple_asm_output_op (stmt, i);
+    }
+
+  inputs = NULL_TREE;
+  ninputs = gimple_asm_ninputs (stmt);
+  if (ninputs > 0)
+    {
+      t = inputs = gimple_asm_input_op (stmt, 0);
+      for (i = 1; i < ninputs; i++)
+	t = TREE_CHAIN (t) = gimple_asm_input_op (stmt, i);
+    }
+
+  clobbers = NULL_TREE;
+  nclobbers = gimple_asm_nclobbers (stmt);
+  if (nclobbers > 0)
+    {
+      t = clobbers = gimple_asm_clobber_op (stmt, 0);
+      for (i = 1; i < nclobbers; i++)
+	t = TREE_CHAIN (t) = gimple_asm_clobber_op (stmt, i);
+    }
+
+  labels = NULL_TREE;
+  nlabels = gimple_asm_nlabels (stmt);
+  if (nlabels > 0)
+    {
+      edge fallthru = find_fallthru_edge (gimple_bb (stmt)->succs);
+      if (fallthru)
+	fallthru_bb = fallthru->dest;
+      t = labels = gimple_asm_label_op (stmt, 0);
+      for (i = 1; i < nlabels; i++)
+	t = TREE_CHAIN (t) = gimple_asm_label_op (stmt, i);
+    }
+
+  {
+    const char *s = gimple_asm_string (stmt);
+    string = build_string (strlen (s), s);
+  }
+
+  if (gimple_asm_input_p (stmt))
+    {
+      expand_asm_loc (string, gimple_asm_volatile_p (stmt), locus);
+      return;
+    }
+
+  /* Record the contents of OUTPUTS before it is modified.  */
+  tree *orig_outputs = XALLOCAVEC (tree, noutputs);
+  for (i = 0; i < noutputs; ++i)
+    orig_outputs[i] = TREE_VALUE (gimple_asm_output_op (stmt, i));
+
+  bool vol = gimple_asm_volatile_p (stmt);
+
   rtvec argvec, constraintvec, labelvec;
   rtx body;
-  int ninputs = list_length (inputs);
-  int noutputs = list_length (outputs);
-  int nlabels = list_length (labels);
   int ninout;
-  int nclobbers;
   HARD_REG_SET clobbered_regs;
   int clobber_conflict_found = 0;
-  tree tail;
-  tree t;
-  int i;
   /* Vector of RTX's of evaluated output operands.  */
   rtx *output_rtx = XALLOCAVEC (rtx, noutputs);
   int *inout_opnum = XALLOCAVEC (int, noutputs);
@@ -2995,101 +3051,22 @@ expand_asm_operands (tree string, tree outputs, tree inputs,
     if (real_output_rtx[i])
       emit_move_insn (real_output_rtx[i], output_rtx[i]);
 
-  crtl->has_asm_statement = 1;
-  free_temp_slots ();
-}
-
-
-static void
-expand_asm_stmt (gasm *stmt)
-{
-  int noutputs;
-  tree outputs, tail, t;
-  tree *o;
-  size_t i, n;
-  const char *s;
-  tree str, out, in, cl, labels;
-  location_t locus = gimple_location (stmt);
-  basic_block fallthru_bb = NULL;
-
-  /* Meh... convert the gimple asm operands into real tree lists.
-     Eventually we should make all routines work on the vectors instead
-     of relying on TREE_CHAIN.  */
-  out = NULL_TREE;
-  n = gimple_asm_noutputs (stmt);
-  if (n > 0)
-    {
-      t = out = gimple_asm_output_op (stmt, 0);
-      for (i = 1; i < n; i++)
-	t = TREE_CHAIN (t) = gimple_asm_output_op (stmt, i);
-    }
-
-  in = NULL_TREE;
-  n = gimple_asm_ninputs (stmt);
-  if (n > 0)
-    {
-      t = in = gimple_asm_input_op (stmt, 0);
-      for (i = 1; i < n; i++)
-	t = TREE_CHAIN (t) = gimple_asm_input_op (stmt, i);
-    }
-
-  cl = NULL_TREE;
-  n = gimple_asm_nclobbers (stmt);
-  if (n > 0)
-    {
-      t = cl = gimple_asm_clobber_op (stmt, 0);
-      for (i = 1; i < n; i++)
-	t = TREE_CHAIN (t) = gimple_asm_clobber_op (stmt, i);
-    }
-
-  labels = NULL_TREE;
-  n = gimple_asm_nlabels (stmt);
-  if (n > 0)
-    {
-      edge fallthru = find_fallthru_edge (gimple_bb (stmt)->succs);
-      if (fallthru)
-	fallthru_bb = fallthru->dest;
-      t = labels = gimple_asm_label_op (stmt, 0);
-      for (i = 1; i < n; i++)
-	t = TREE_CHAIN (t) = gimple_asm_label_op (stmt, i);
-    }
-
-  s = gimple_asm_string (stmt);
-  str = build_string (strlen (s), s);
-
-  if (gimple_asm_input_p (stmt))
-    {
-      expand_asm_loc (str, gimple_asm_volatile_p (stmt), locus);
-      return;
-    }
-
-  outputs = out;
-  noutputs = gimple_asm_noutputs (stmt);
-  /* o[I] is the place that output number I should be written.  */
-  o = (tree *) alloca (noutputs * sizeof (tree));
-
-  /* Record the contents of OUTPUTS before it is modified.  */
-  for (i = 0, tail = outputs; tail; tail = TREE_CHAIN (tail), i++)
-    o[i] = TREE_VALUE (tail);
-
-  /* Generate the ASM_OPERANDS insn; store into the TREE_VALUEs of
-     OUTPUTS some trees for where the values were actually stored.  */
-  expand_asm_operands (str, outputs, in, cl, labels, fallthru_bb,
-		       gimple_asm_volatile_p (stmt), locus);
-
   /* Copy all the intermediate outputs into the specified outputs.  */
   for (i = 0, tail = outputs; tail; tail = TREE_CHAIN (tail), i++)
     {
-      if (o[i] != TREE_VALUE (tail))
+      if (orig_outputs[i] != TREE_VALUE (tail))
 	{
-	  expand_assignment (o[i], TREE_VALUE (tail), false);
+	  expand_assignment (orig_outputs[i], TREE_VALUE (tail), false);
 	  free_temp_slots ();
 
 	  /* Restore the original value so that it's correct the next
 	     time we expand this function.  */
-	  TREE_VALUE (tail) = o[i];
+	  TREE_VALUE (tail) = orig_outputs[i];
 	}
     }
+
+  crtl->has_asm_statement = 1;
+  free_temp_slots ();
 }
 
 /* Emit code to jump to the address
-- 
2.1.0

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-07 21:39 [RFC 0/6] Flags outputs for asms Richard Henderson
                   ` (5 preceding siblings ...)
  2015-05-07 21:39 ` [PATCH 2/6] Merge expand_asm_operands into expand_asm_stmt Richard Henderson
@ 2015-05-08  1:15 ` H. Peter Anvin
  2015-05-08  1:20   ` H. Peter Anvin
  2015-05-08 15:54   ` Richard Henderson
  6 siblings, 2 replies; 33+ messages in thread
From: H. Peter Anvin @ 2015-05-08  1:15 UTC (permalink / raw)
  To: Richard Henderson, gcc-patches; +Cc: law, peterz, torvalds, jakub

[-- Attachment #1: Type: text/plain, Size: 989 bytes --]

On 05/07/2015 02:38 PM, Richard Henderson wrote:
> Here's a prototype for i386 only, which stands up to light testing.
> I'd rather post this tonight rather than wait until tomorrow when I
> can write more proper dejagnu tests.
> 
> I've tested the intermedate patches via config-list.mk, so despite
> mucking around with vec.h vs target.h, all targets still compile.
> 
> That said, quite a bit of cleanup in expand_asm_stmt was required
> in order to make the target hook not be completely unintelligable,
> so depsite full regression testing on x86_64 and ppc64, I could
> well have broken something.

Hi!  I took this for a spin, and:

a) It seems to work correctly; haven't been able to break it yet.
b) It seems very easy to provoke it into producing pretty bad code.

(b) is obviously not at all unexpected, this is working impressively
well for a first RFC.  Here is a piece of test code I used for this.

I'm very impressed to see this happen so quickly, thank you!

	-hpa








[-- Attachment #2: flags.c --]
[-- Type: text/x-csrc, Size: 1185 bytes --]

extern void alpha(void);
extern void beta(void);

/* This case really should produce good code in both cases */

void good1(int x, int y)
{
  _Bool pf;

  asm("cmpl %2,%1"
      : "=@ccp" (pf)
      : "r" (x), "g" (y));

  if (pf)
    beta();
}

void bad1(int x, int y)
{
  _Bool le, pf;

  asm("cmpl %3,%2"
      : "=@ccle" (le), "=@ccp" (pf)
      : "r" (x), "g" (y));

  if (le)
    alpha();
  else if (pf)
    beta();
}

/* This case really is too much to ask... */

_Bool good2(int x, int y)
{
  _Bool le;

  asm("cmpl %2,%1"
      : "=@ccle" (le)
      : "r" (x), "g" (y));

  return le;
}

_Bool bad2(int x, int y)
{
  _Bool zf, of, sf;

  asm("cmpl %4,%3"
      : "=@ccz" (zf), "=@cco" (of), "=@ccs" (sf)
      : "r" (x), "g" (y));

  return zf | (sf ^ of);
}

/* One should expect this shouldn't produce *worse* code than the above... */

int good3(int x, int y, int a, int b)
{
  _Bool le;

  asm("cmpl %2,%1"
      : "=@ccle" (le)
      : "r" (x), "g" (y));

  return le ? b : a;
}

int bad3(int x, int y, int a, int b)
{
  _Bool zf, of, sf;

  asm("cmpl %4,%3"
      : "=@ccz" (zf), "=@cco" (of), "=@ccs" (sf)
      : "r" (x), "g" (y));

  return zf | (sf ^ of) ? b : a;
}

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08  1:15 ` [RFC 0/6] Flags outputs for asms H. Peter Anvin
@ 2015-05-08  1:20   ` H. Peter Anvin
  2015-05-08 15:24     ` Richard Henderson
  2015-05-08 15:54   ` Richard Henderson
  1 sibling, 1 reply; 33+ messages in thread
From: H. Peter Anvin @ 2015-05-08  1:20 UTC (permalink / raw)
  To: Richard Henderson, gcc-patches; +Cc: law, peterz, torvalds, jakub

This is a separate issue which really shouldn't have anything to do with
this, but is there a specific reason why:

void good1(int x, int y)
{
  _Bool pf;

  asm("cmpl %2,%1"
      : "=@ccp" (pf)
      : "r" (x), "g" (y));

  if (pf)
    beta();
}

... ends up generating a jump to a jump?

0000000000000000 <good1>:
   0:   39 f7                   cmp    %esi,%edi
   2:   7a 0c                   jp     10 <good1+0x10>
   4:   f3 c3                   repz retq
   6:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
   d:   00 00 00
  10:   e9 00 00 00 00          jmpq   15 <good1+0x15>
                        11: R_X86_64_PC32       beta-0x4
  15:   66 66 2e 0f 1f 84 00    data32 nopw %cs:0x0(%rax,%rax,1)
  1c:   00 00 00 00

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08  1:20   ` H. Peter Anvin
@ 2015-05-08 15:24     ` Richard Henderson
  2015-05-08 15:37       ` Jay Foad
  2015-05-08 15:39       ` Jeff Law
  0 siblings, 2 replies; 33+ messages in thread
From: Richard Henderson @ 2015-05-08 15:24 UTC (permalink / raw)
  To: H. Peter Anvin, gcc-patches; +Cc: law, peterz, torvalds, jakub

On 05/07/2015 06:20 PM, H. Peter Anvin wrote:
> This is a separate issue which really shouldn't have anything to do with
> this, but is there a specific reason why:
> 
> void good1(int x, int y)
> {
>   _Bool pf;
> 
>   asm("cmpl %2,%1"
>       : "=@ccp" (pf)
>       : "r" (x), "g" (y));
> 
>   if (pf)
>     beta();
> }
> 
> ... ends up generating a jump to a jump?
> 
> 0000000000000000 <good1>:
>    0:   39 f7                   cmp    %esi,%edi
>    2:   7a 0c                   jp     10 <good1+0x10>
>    4:   f3 c3                   repz retq
>    6:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
>    d:   00 00 00
>   10:   e9 00 00 00 00          jmpq   15 <good1+0x15>
>                         11: R_X86_64_PC32       beta-0x4
>   15:   66 66 2e 0f 1f 84 00    data32 nopw %cs:0x0(%rax,%rax,1)
>   1c:   00 00 00 00
> 

Yes, the i386 backend has not implemented conditional sibcalls.  AFAIK the only
targets that have done that are ones with predication: ia64 and maybe arm32.

It could certainly be done; I've no idea off hand how difficult it might be.  I
suspect that some new code has to be written generically in order to enable it.


r~

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08 15:24     ` Richard Henderson
@ 2015-05-08 15:37       ` Jay Foad
  2015-05-08 15:39       ` Jeff Law
  1 sibling, 0 replies; 33+ messages in thread
From: Jay Foad @ 2015-05-08 15:37 UTC (permalink / raw)
  To: Richard Henderson
  Cc: H. Peter Anvin, GCC Patches, law, peterz, Linus Torvalds, jakub

On 8 May 2015 at 16:23, Richard Henderson <rth@redhat.com> wrote:
> Yes, the i386 backend has not implemented conditional sibcalls.

See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60159

Jay.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08 15:24     ` Richard Henderson
  2015-05-08 15:37       ` Jay Foad
@ 2015-05-08 15:39       ` Jeff Law
  1 sibling, 0 replies; 33+ messages in thread
From: Jeff Law @ 2015-05-08 15:39 UTC (permalink / raw)
  To: Richard Henderson, H. Peter Anvin, gcc-patches; +Cc: peterz, torvalds, jakub

On 05/08/2015 09:23 AM, Richard Henderson wrote:
> On 05/07/2015 06:20 PM, H. Peter Anvin wrote:
>> This is a separate issue which really shouldn't have anything to do with
>> this, but is there a specific reason why:
>>
>> void good1(int x, int y)
>> {
>>    _Bool pf;
>>
>>    asm("cmpl %2,%1"
>>        : "=@ccp" (pf)
>>        : "r" (x), "g" (y));
>>
>>    if (pf)
>>      beta();
>> }
>>
>> ... ends up generating a jump to a jump?
>>
>> 0000000000000000 <good1>:
>>     0:   39 f7                   cmp    %esi,%edi
>>     2:   7a 0c                   jp     10 <good1+0x10>
>>     4:   f3 c3                   repz retq
>>     6:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
>>     d:   00 00 00
>>    10:   e9 00 00 00 00          jmpq   15 <good1+0x15>
>>                          11: R_X86_64_PC32       beta-0x4
>>    15:   66 66 2e 0f 1f 84 00    data32 nopw %cs:0x0(%rax,%rax,1)
>>    1c:   00 00 00 00
>>
>
> Yes, the i386 backend has not implemented conditional sibcalls.  AFAIK the only
> targets that have done that are ones with predication: ia64 and maybe arm32.
>
> It could certainly be done; I've no idea off hand how difficult it might be.  I
> suspect that some new code has to be written generically in order to enable it.
Kai looked at this last year, it's possible, but rather tedious to do in 
GCC due to the separation of JUMP_INSN vs CALL_INSN and the need to 
duplicate the conditional jumps as conditional sibcalls.

There's a BZ about this, I'm not sure if Kai put all his thoughts on the 
topic into the BZ or not.

jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08  1:15 ` [RFC 0/6] Flags outputs for asms H. Peter Anvin
  2015-05-08  1:20   ` H. Peter Anvin
@ 2015-05-08 15:54   ` Richard Henderson
  2015-05-08 17:46     ` H. Peter Anvin
  2015-05-08 20:15     ` Richard Henderson
  1 sibling, 2 replies; 33+ messages in thread
From: Richard Henderson @ 2015-05-08 15:54 UTC (permalink / raw)
  To: H. Peter Anvin, gcc-patches; +Cc: law, peterz, torvalds, jakub

On 05/07/2015 06:15 PM, H. Peter Anvin wrote:
> /* This case really should produce good code in both cases */
> 
> void good1(int x, int y)
> {
>   _Bool pf;
> 
>   asm("cmpl %2,%1"
>       : "=@ccp" (pf)
>       : "r" (x), "g" (y));
> 
>   if (pf)
>     beta();
> }
> 
> void bad1(int x, int y)
> {
>   _Bool le, pf;
> 
>   asm("cmpl %3,%2"
>       : "=@ccle" (le), "=@ccp" (pf)
>       : "r" (x), "g" (y));
> 
>   if (le)
>     alpha();
>   else if (pf)
>     beta();
> }

I have a feeling I know why these didn't get merged.

The global optimizers aren't allowed to operate on hard registers lest they
extend the lifetime of the hard register such that it creates an impossible
situation for the register allocator.  Think what would happen if EAX were
suddenly live across the entire function.

Similarly, combine is allowed to merge insns with hard registers if the insns
are sequential.  But if the insns aren't sequential, we're lengthening the
lifetime of the hard register.  Now, I thought this didn't apply to fixed
registers like esp or flags, but perhaps not.

Note what happens if you swap the order of le and pf in the asm:

  asm("cmpl %3,%2" : "=@ccp" (pf), "=@ccle" (le) : "r" (x), "g" (y));

the order of the two setcc insns is reversed, and then the setle is in fact
merged with the branch.

Anyway, I'll look into whether the branch around alpha can be optimized, but
I'd be shocked if I'd be able to do anything about the branch around beta.
True, there's nothing in between that will clobber the flags so it would be an
excellent improvement, but combine doesn't work across basic blocks and
changing that would be a major task.


> /* This case really is too much to ask... */
> 
> _Bool good2(int x, int y)
> {
>   _Bool le;
> 
>   asm("cmpl %2,%1"
>       : "=@ccle" (le)
>       : "r" (x), "g" (y));
> 
>   return le;
> }
> 
> _Bool bad2(int x, int y)
> {
>   _Bool zf, of, sf;
> 
>   asm("cmpl %4,%3"
>       : "=@ccz" (zf), "=@cco" (of), "=@ccs" (sf)
>       : "r" (x), "g" (y));
> 
>   return zf | (sf ^ of);
> }

Haha, yes.

> /* One should expect this shouldn't produce *worse* code than the above... */
> 
> int good3(int x, int y, int a, int b)
> {
>   _Bool le;
> 
>   asm("cmpl %2,%1"
>       : "=@ccle" (le)
>       : "r" (x), "g" (y));
> 
>   return le ? b : a;
> }
> 
> int bad3(int x, int y, int a, int b)
> {
>   _Bool zf, of, sf;
> 
>   asm("cmpl %4,%3"
>       : "=@ccz" (zf), "=@cco" (of), "=@ccs" (sf)
>       : "r" (x), "g" (y));
> 
>   return zf | (sf ^ of) ? b : a;
> }

This is a case of the optimizers thinking they're helping you by not folding
too much computation into a condition.

If you use -mbranch-cost=4 you'll get the cmovne that you expect.


r~

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08 15:54   ` Richard Henderson
@ 2015-05-08 17:46     ` H. Peter Anvin
  2015-05-08 20:15     ` Richard Henderson
  1 sibling, 0 replies; 33+ messages in thread
From: H. Peter Anvin @ 2015-05-08 17:46 UTC (permalink / raw)
  To: Richard Henderson, gcc-patches; +Cc: law, peterz, torvalds, jakub

On 05/08/2015 08:54 AM, Richard Henderson wrote:
> 
> Anyway, I'll look into whether the branch around alpha can be optimized, but
> I'd be shocked if I'd be able to do anything about the branch around beta.
> True, there's nothing in between that will clobber the flags so it would be an
> excellent improvement, but combine doesn't work across basic blocks and
> changing that would be a major task.
> 

Either way... optimization is something that can be done gradually.
Once we start using the feature we can figure out where it makes sense
to do further optimizations.

	0hpa


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 1/6] Only resolve_asm_operand_names once
  2015-05-07 21:39 ` [PATCH 1/6] Only resolve_asm_operand_names once Richard Henderson
@ 2015-05-08 18:11   ` Jeff Law
  0 siblings, 0 replies; 33+ messages in thread
From: Jeff Law @ 2015-05-08 18:11 UTC (permalink / raw)
  To: Richard Henderson, gcc-patches; +Cc: peterz, hpa, torvalds, jakub

On 05/07/2015 03:38 PM, Richard Henderson wrote:
> We do it in the front end already; no need to repeat.
> ---
>   gcc/cfgexpand.c | 2 --
>   gcc/stmt.c      | 7 ++++---
>   2 files changed, 4 insertions(+), 5 deletions(-)
Any reason this shouldn't go into the tree immediately?  Seems like it 
stands on its own.

jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 3/6] Canonicalize asm volatility earlier
  2015-05-07 21:39 ` [PATCH 3/6] Canonicalize asm volatility earlier Richard Henderson
@ 2015-05-08 18:12   ` Jeff Law
  0 siblings, 0 replies; 33+ messages in thread
From: Jeff Law @ 2015-05-08 18:12 UTC (permalink / raw)
  To: Richard Henderson, gcc-patches; +Cc: peterz, hpa, torvalds, jakub

On 05/07/2015 03:38 PM, Richard Henderson wrote:
> If gimple_asm_volatile_p is correct, no point re-checking.
> This is also done by the C and C++ front ends, but not Ada.
> So we can't yet trust ASM_VOLATILE_P from the front end.
> ---
>   gcc/cfgexpand.c | 11 +++--------
>   gcc/gimplify.c  |  2 +-
>   2 files changed, 4 insertions(+), 9 deletions(-)
Also seems like it ought to be able to go forward independently now 
rather than waiting.

jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] i386: Add CCPmode
  2015-05-07 21:39 ` [PATCH 5/6] i386: Add CCPmode Richard Henderson
@ 2015-05-08 18:16   ` Jeff Law
  0 siblings, 0 replies; 33+ messages in thread
From: Jeff Law @ 2015-05-08 18:16 UTC (permalink / raw)
  To: Richard Henderson, gcc-patches; +Cc: peterz, hpa, torvalds, jakub

On 05/07/2015 03:38 PM, Richard Henderson wrote:
> For testing parity coming out of asm flags.
> ---
>   gcc/config/i386/i386-modes.def |  2 ++
>   gcc/config/i386/i386.c         | 19 +++++++++++--------
>   2 files changed, 13 insertions(+), 8 deletions(-)
Seems like it ought to move forward now.

Oh yea, I guess you should consider my prior messages as reviews with 
the following note.

Need ChangeLog entry and testing.  With those good for the trunk.

jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 6/6] i386: Implement asm flag outputs
  2015-05-07 21:39 ` [PATCH 6/6] i386: Implement asm flag outputs Richard Henderson
@ 2015-05-08 18:40   ` H. Peter Anvin
  2015-05-08 18:45     ` Jeff Law
  2015-05-15 15:46   ` [PATCH v2 " Richard Henderson
  1 sibling, 1 reply; 33+ messages in thread
From: H. Peter Anvin @ 2015-05-08 18:40 UTC (permalink / raw)
  To: Richard Henderson, gcc-patches; +Cc: law, peterz, torvalds, jakub

On 05/07/2015 02:39 PM, Richard Henderson wrote:
> All j<cc> mnemonics implemented as =@cc<cc>
> to make it easy for someone reading the manual
> to figure out what condition is desired.

One request: would it be possible to get a cpp symbol for this (e.g.
__GCC_X86_INLINE_ASM_CC__) so we don't have to do explicit gcc version
checks?

	-hpa


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 6/6] i386: Implement asm flag outputs
  2015-05-08 18:40   ` H. Peter Anvin
@ 2015-05-08 18:45     ` Jeff Law
  0 siblings, 0 replies; 33+ messages in thread
From: Jeff Law @ 2015-05-08 18:45 UTC (permalink / raw)
  To: H. Peter Anvin, Richard Henderson, gcc-patches; +Cc: peterz, torvalds, jakub

On 05/08/2015 12:40 PM, H. Peter Anvin wrote:
> On 05/07/2015 02:39 PM, Richard Henderson wrote:
>> All j<cc> mnemonics implemented as =@cc<cc>
>> to make it easy for someone reading the manual
>> to figure out what condition is desired.
>
> One request: would it be possible to get a cpp symbol for this (e.g.
> __GCC_X86_INLINE_ASM_CC__) so we don't have to do explicit gcc version
> checks?
Probably wise.  I hate version # checks.
jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 2/6] Merge expand_asm_operands into expand_asm_stmt
  2015-05-07 21:39 ` [PATCH 2/6] Merge expand_asm_operands into expand_asm_stmt Richard Henderson
@ 2015-05-08 18:54   ` Jeff Law
  0 siblings, 0 replies; 33+ messages in thread
From: Jeff Law @ 2015-05-08 18:54 UTC (permalink / raw)
  To: Richard Henderson, gcc-patches; +Cc: peterz, hpa, torvalds, jakub

On 05/07/2015 03:38 PM, Richard Henderson wrote:
> Prepatory to converting from tree chains to vectors.
> ---
>   gcc/cfgexpand.c | 167 ++++++++++++++++++++++++--------------------------------
>   1 file changed, 72 insertions(+), 95 deletions(-)
OK with a ChangeLog and the usual testing.

jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/6] Convert to md_asm_adjust
  2015-05-07 21:39 ` [PATCH 4/6] Convert to md_asm_adjust Richard Henderson
@ 2015-05-08 19:41   ` Jeff Law
  2015-05-08 19:56     ` Richard Henderson
  0 siblings, 1 reply; 33+ messages in thread
From: Jeff Law @ 2015-05-08 19:41 UTC (permalink / raw)
  To: Richard Henderson, gcc-patches; +Cc: peterz, hpa, torvalds, jakub

On 05/07/2015 03:38 PM, Richard Henderson wrote:
> Using proper vectors instead of lists of trees.
> ---
>   gcc/cfgexpand.c              | 614 ++++++++++++++++++++-----------------------
>   gcc/config/cris/cris.c       |  88 +++----
>   gcc/config/i386/i386.c       |  24 +-
>   gcc/config/mn10300/mn10300.c |  20 +-
>   gcc/config/rs6000/rs6000.c   |  17 +-
>   gcc/config/visium/visium.c   |  21 +-
>   gcc/config/vxworks.c         |   2 +-
>   gcc/doc/tm.texi              |  18 +-
>   gcc/doc/tm.texi.in           |   2 +-
>   gcc/gimple.c                 |   2 +-
>   gcc/hooks.c                  |   8 -
>   gcc/hooks.h                  |   1 -
>   gcc/incpath.c                |   1 +
>   gcc/mode-switching.c         |   2 +-
>   gcc/system.h                 |   1 +
>   gcc/target.def               |  30 ++-
>   16 files changed, 401 insertions(+), 450 deletions(-)
ChangeLog and the usual testing.

One less use of TREE_LIST, is goodness.

I'm going to assume the include header order juggling/additions were all 
necessary.

Presumably we don't have a symbolic return value from decode_reg_name. 
   That'd be a good cleanup for someone.


Much of the cfgexpand bits were painful to read.  I did my best, things 
look reasonable, but that's largely from reading the comments and 
scanning the relevant code -- trying to match up old behaviour and new 
behaviour across the board was tough.  I realize this mess isn't of your 
making :-)

With the usual testing, this is fine.

jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/6] Convert to md_asm_adjust
  2015-05-08 19:41   ` Jeff Law
@ 2015-05-08 19:56     ` Richard Henderson
  0 siblings, 0 replies; 33+ messages in thread
From: Richard Henderson @ 2015-05-08 19:56 UTC (permalink / raw)
  To: Jeff Law, gcc-patches; +Cc: peterz, hpa, torvalds, jakub

On 05/08/2015 12:41 PM, Jeff Law wrote:
> I'm going to assume the include header order juggling/additions were all
> necessary.

Correct -- target.h now uses vec<>, which means that vec.h must be included
first.  In the olden days I'd have just put the vec.h include at the top of
target.h.  ;-)

> Presumably we don't have a symbolic return value from decode_reg_name.   That'd
> be a good cleanup for someone.

Correct.  Since the beginning of time, it would seem.

> Much of the cfgexpand bits were painful to read.  I did my best, things look
> reasonable, but that's largely from reading the comments and scanning the
> relevant code -- trying to match up old behaviour and new behaviour across the
> board was tough.  I realize this mess isn't of your making :-)

Yeah.  Sadly there was no smaller step; you can't half-covert a variable from
lists to vectors.  And since the existing target hook used the lists, they had
to be done at the same time.



r~

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08 15:54   ` Richard Henderson
  2015-05-08 17:46     ` H. Peter Anvin
@ 2015-05-08 20:15     ` Richard Henderson
  2015-05-08 21:14       ` Segher Boessenkool
                         ` (2 more replies)
  1 sibling, 3 replies; 33+ messages in thread
From: Richard Henderson @ 2015-05-08 20:15 UTC (permalink / raw)
  To: H. Peter Anvin, law
  Cc: gcc-patches, peterz, torvalds, jakub, Segher Boessenkool

[-- Attachment #1: Type: text/plain, Size: 4446 bytes --]

On 05/08/2015 08:54 AM, Richard Henderson wrote:
> I have a feeling I know why these didn't get merged.
> 
> The global optimizers aren't allowed to operate on hard registers lest they
> extend the lifetime of the hard register such that it creates an impossible
> situation for the register allocator.  Think what would happen if EAX were
> suddenly live across the entire function.
> 
> Similarly, combine is allowed to merge insns with hard registers if the insns
> are sequential.  But if the insns aren't sequential, we're lengthening the
> lifetime of the hard register.  Now, I thought this didn't apply to fixed
> registers like esp or flags, but perhaps not.
> 
> Note what happens if you swap the order of le and pf in the asm:
> 
>   asm("cmpl %3,%2" : "=@ccp" (pf), "=@ccle" (le) : "r" (x), "g" (y));
> 
> the order of the two setcc insns is reversed, and then the setle is in fact
> merged with the branch.
> 
> Anyway, I'll look into whether the branch around alpha can be optimized, but
> I'd be shocked if I'd be able to do anything about the branch around beta.
> True, there's nothing in between that will clobber the flags so it would be an
> excellent improvement, but combine doesn't work across basic blocks and
> changing that would be a major task.

My feeling was wrong.

(insn 9 11 10 2 (set (reg:QI 91 [ le ])
        (le:QI (reg:CCGC 17 flags)
            (const_int 0 [0]))) z.c:8 601 {*setcc_qi}
     (nil))
(insn 10 9 12 2 (set (reg:QI 92 [ pf ])
        (eq:QI (reg:CCP 17 flags)
            (const_int 0 [0]))) z.c:8 601 {*setcc_qi}
     (expr_list:REG_DEAD (reg:CCP 17 flags)
        (nil)))
(insn 12 10 13 2 (set (reg:CCZ 17 flags)
        (compare:CCZ (reg:QI 91 [ le ])
            (const_int 0 [0]))) z.c:12 1 {*cmpqi_ccno_1}
     (expr_list:REG_DEAD (reg:QI 91 [ le ])
        (nil)))
(jump_insn 13 12 14 2 (set (pc)
        (if_then_else (eq (reg:CCZ 17 flags)
                (const_int 0 [0]))
            (label_ref 17)
            (pc))) z.c:12 606 {*jcc_1}
     (expr_list:REG_DEAD (reg:CCZ 17 flags)
        (int_list:REG_BR_PROB 7929 (nil)))

Combine won't allow a combination of insn 9->12->13 in one step one of the
inputs to insn 9, the flags, is clobbered in between insn 9 and insn 13.
Namely, by insn 12.  Combine has a special-case for allowing this when all
insns are consecutive, but not when there's something in the middle.

But it *does* try to match an intermediate pattern,

(set (reg:CCGC 17 flags)
    (compare:CCGC (reg:CCGC 17 flags)
        (const_int 0 [0])))

which can be considered a no-op move.  If I add the attached pattern, then the
combination happens in two steps -- 9->12, 12->13 -- and we get what we hoped:

(note 9 11 10 2 NOTE_INSN_DELETED)
(insn 10 9 12 2 (set (reg:QI 92 [ pf ])
        (eq:QI (reg:CCP 17 flags)
            (const_int 0 [0]))) z.c:8 601 {*setcc_qi}
     (nil))
(note 12 10 13 2 NOTE_INSN_DELETED)
(jump_insn 13 12 14 2 (set (pc)
        (if_then_else (gt (reg:CCGC 17 flags)
                (const_int 0 [0]))
            (label_ref 17)
            (pc))) z.c:12 606 {*jcc_1}
     (int_list:REG_BR_PROB 7929 (expr_list:REG_DEAD (reg:CCZ 17 flags)
            (nil)))
 -> 17)


Jeff or Segher, is it worth complicating the can_combine_p test near line 1958

      /* Make sure that the value that is to be substituted for the register
         does not use any registers whose values alter in between.  However,
         If the insns are adjacent, a use can't cross a set even though we
         think it might (this can happen for a sequence of insns each setting
         the same destination; last_set of that register might point to
         a NOTE).  If INSN has a REG_EQUIV note, the register is always
         equivalent to the memory so the substitution is valid even if there
         are intervening stores.  Also, don't move a volatile asm or
         UNSPEC_VOLATILE across any other insns.  */
      || (! all_adjacent
          && (((!MEM_P (src)
                || ! find_reg_note (insn, REG_EQUIV, src))
               && use_crosses_set_p (src, DF_INSN_LUID (insn)))
              || (GET_CODE (src) == ASM_OPERANDS && MEM_VOLATILE_P (src))
              || GET_CODE (src) == UNSPEC_VOLATILE))

to notice that the set is one of SUCC or SUCC2, and is thus included in the
insns being combined?  That does seem cleaner and more general than the hacky
i386 nop_cmp pattern, but would certainly require tons more testing...



r~



[-- Attachment #2: z --]
[-- Type: text/plain, Size: 847 bytes --]

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index fc320f6..ffa5c46 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1320,6 +1320,22 @@
   [(set_attr "type" "icmp")
    (set_attr "mode" "QI")])
 
+;; This helps combine fold away a chain of setcc insns.
+
+(define_insn_and_split "*nop_cmp"
+  [(set (reg FLAGS_REG)
+	(compare (match_operand 0 "flags_reg_operand")
+		 (const_int 0)))]
+  "ix86_match_ccmode (insn, GET_MODE (operands[0]))"
+  "#"
+  ""
+  [(const_int 0)]
+{
+  /* No-op move.  Can't split to nothing; emit something.  */
+  emit_note (NOTE_INSN_DELETED);
+  DONE;
+})
+
 ;; These implement float point compares.
 ;; %%% See if we can get away with VOIDmode operands on the actual insns,
 ;; which would allow mix and match FP modes on the compares.  Which is what

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08 20:15     ` Richard Henderson
@ 2015-05-08 21:14       ` Segher Boessenkool
  2015-05-08 22:11         ` Richard Henderson
  2015-05-08 21:33       ` Jeff Law
  2015-05-11 13:19       ` Segher Boessenkool
  2 siblings, 1 reply; 33+ messages in thread
From: Segher Boessenkool @ 2015-05-08 21:14 UTC (permalink / raw)
  To: Richard Henderson
  Cc: H. Peter Anvin, law, gcc-patches, peterz, torvalds, jakub

Hi Richard,

On Fri, May 08, 2015 at 01:15:25PM -0700, Richard Henderson wrote:
> But it *does* try to match an intermediate pattern,
> 
> (set (reg:CCGC 17 flags)
>     (compare:CCGC (reg:CCGC 17 flags)
>         (const_int 0 [0])))
> 
> which can be considered a no-op move.

Maybe we should teach combine this is a no-op, then?  Then everything
should work as-is?  Combine knows about no-op moves (they don't cost,
and it deletes them itself).

> Jeff or Segher, is it worth complicating the can_combine_p test near line 1958
> 
>       /* Make sure that the value that is to be substituted for the register
>          does not use any registers whose values alter in between.  However,
>          If the insns are adjacent, a use can't cross a set even though we
>          think it might (this can happen for a sequence of insns each setting
>          the same destination; last_set of that register might point to
>          a NOTE).  If INSN has a REG_EQUIV note, the register is always
>          equivalent to the memory so the substitution is valid even if there
>          are intervening stores.  Also, don't move a volatile asm or
>          UNSPEC_VOLATILE across any other insns.  */
>       || (! all_adjacent
>           && (((!MEM_P (src)
>                 || ! find_reg_note (insn, REG_EQUIV, src))
>                && use_crosses_set_p (src, DF_INSN_LUID (insn)))
>               || (GET_CODE (src) == ASM_OPERANDS && MEM_VOLATILE_P (src))
>               || GET_CODE (src) == UNSPEC_VOLATILE))
> 
> to notice that the set is one of SUCC or SUCC2, and is thus included in the
> insns being combined?  That does seem cleaner and more general than the hacky
> i386 nop_cmp pattern, but would certainly require tons more testing...

"Cleaner"?  In this code?  Heh.

use_crosses_set_p often estimates pessimistically.  Is that what is
happening here?  Using real dataflow in combine would fix that (and many
other problems).  Not that that helps you right now ;-)

I'll build with your patches tomorrow and investigate.


Segher

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08 20:15     ` Richard Henderson
  2015-05-08 21:14       ` Segher Boessenkool
@ 2015-05-08 21:33       ` Jeff Law
  2015-05-08 21:55         ` Richard Henderson
  2015-05-08 22:10         ` Segher Boessenkool
  2015-05-11 13:19       ` Segher Boessenkool
  2 siblings, 2 replies; 33+ messages in thread
From: Jeff Law @ 2015-05-08 21:33 UTC (permalink / raw)
  To: Richard Henderson, H. Peter Anvin
  Cc: gcc-patches, peterz, torvalds, jakub, Segher Boessenkool

On 05/08/2015 02:15 PM, Richard Henderson wrote:
>
> But it *does* try to match an intermediate pattern,
>
> (set (reg:CCGC 17 flags)
>      (compare:CCGC (reg:CCGC 17 flags)
>          (const_int 0 [0])))
>
> which can be considered a no-op move.  If I add the attached pattern, then the
> combination happens in two steps -- 9->12, 12->13 -- and we get what we hoped:
So what happens if that pattern is actually recognized as a nop-move by 
set_noop_p?  That would allow recog_for_combine to see it as a nop and 
"recognize" it as valid.



>
>
> Jeff or Segher, is it worth complicating the can_combine_p test near line 1958
>
>        /* Make sure that the value that is to be substituted for the register
>           does not use any registers whose values alter in between.  However,
>           If the insns are adjacent, a use can't cross a set even though we
>           think it might (this can happen for a sequence of insns each setting
>           the same destination; last_set of that register might point to
>           a NOTE).  If INSN has a REG_EQUIV note, the register is always
>           equivalent to the memory so the substitution is valid even if there
>           are intervening stores.  Also, don't move a volatile asm or
>           UNSPEC_VOLATILE across any other insns.  */
>        || (! all_adjacent
>            && (((!MEM_P (src)
>                  || ! find_reg_note (insn, REG_EQUIV, src))
>                 && use_crosses_set_p (src, DF_INSN_LUID (insn)))
>                || (GET_CODE (src) == ASM_OPERANDS && MEM_VOLATILE_P (src))
>                || GET_CODE (src) == UNSPEC_VOLATILE))
>
> to notice that the set is one of SUCC or SUCC2, and is thus included in the
> insns being combined?  That does seem cleaner and more general than the hacky
> i386 nop_cmp pattern, but would certainly require tons more testing...
See above -- feels like we'd be better off letting combine know about 
this other form of a nop move and hopefully if we do that, all the right 
things happen.

jeff

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08 21:33       ` Jeff Law
@ 2015-05-08 21:55         ` Richard Henderson
  2015-05-08 22:10         ` Segher Boessenkool
  1 sibling, 0 replies; 33+ messages in thread
From: Richard Henderson @ 2015-05-08 21:55 UTC (permalink / raw)
  To: Jeff Law, H. Peter Anvin
  Cc: gcc-patches, peterz, torvalds, jakub, Segher Boessenkool

On 05/08/2015 02:32 PM, Jeff Law wrote:
> On 05/08/2015 02:15 PM, Richard Henderson wrote:
>>
>> But it *does* try to match an intermediate pattern,
>>
>> (set (reg:CCGC 17 flags)
>>      (compare:CCGC (reg:CCGC 17 flags)
>>          (const_int 0 [0])))
>>
>> which can be considered a no-op move.  If I add the attached pattern, then the
>> combination happens in two steps -- 9->12, 12->13 -- and we get what we hoped:
> So what happens if that pattern is actually recognized as a nop-move by
> set_noop_p?  That would allow recog_for_combine to see it as a nop and
> "recognize" it as valid.


Interesting suggestion -- I hadn't thought of that.  It might be easier than
playing with use_crosses_set_p, and certainly better than the nop_cmp pattern.

I'll have a go at this later.


r~

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08 21:33       ` Jeff Law
  2015-05-08 21:55         ` Richard Henderson
@ 2015-05-08 22:10         ` Segher Boessenkool
  1 sibling, 0 replies; 33+ messages in thread
From: Segher Boessenkool @ 2015-05-08 22:10 UTC (permalink / raw)
  To: Jeff Law
  Cc: Richard Henderson, H. Peter Anvin, gcc-patches, peterz, torvalds, jakub

On Fri, May 08, 2015 at 03:32:58PM -0600, Jeff Law wrote:
> On 05/08/2015 02:15 PM, Richard Henderson wrote:
> >
> >But it *does* try to match an intermediate pattern,
> >
> >(set (reg:CCGC 17 flags)
> >     (compare:CCGC (reg:CCGC 17 flags)
> >         (const_int 0 [0])))
> >
> >which can be considered a no-op move.  If I add the attached pattern, then 
> >the
> >combination happens in two steps -- 9->12, 12->13 -- and we get what we 
> >hoped:
> So what happens if that pattern is actually recognized as a nop-move by 
> set_noop_p?  That would allow recog_for_combine to see it as a nop and 
> "recognize" it as valid.

It's not valid RTL though, so it is a bit more work than that.


Segher

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08 21:14       ` Segher Boessenkool
@ 2015-05-08 22:11         ` Richard Henderson
  0 siblings, 0 replies; 33+ messages in thread
From: Richard Henderson @ 2015-05-08 22:11 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: H. Peter Anvin, law, gcc-patches, peterz, torvalds, jakub

On 05/08/2015 02:14 PM, Segher Boessenkool wrote:
> "Cleaner"?  In this code?  Heh.

Heh.

> use_crosses_set_p often estimates pessimistically.  Is that what is
> happening here?  Using real dataflow in combine would fix that (and many
> other problems).  Not that that helps you right now ;-)

Yes, I think so.  Proper data flow would fix this.  But...

My thought is that the use_crosses_set_p could grow another parameter,
ignore_luid, so that if it matches DF_INSN_LUID (rsp->last_set) we don't fail
the test.

Then can_combine_p has to adjust its use like so:

  use_crosses_set_p (src, DF_INSN_LUID (insn),
		     succ ? DF_INSN_LUID (succ) : -1)

which at least handles the 3-insn combine case, if not the 4-insn combine case.

I'll try out both this and Law's set_noop_p suggestion soon.



r~

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [RFC 0/6] Flags outputs for asms
  2015-05-08 20:15     ` Richard Henderson
  2015-05-08 21:14       ` Segher Boessenkool
  2015-05-08 21:33       ` Jeff Law
@ 2015-05-11 13:19       ` Segher Boessenkool
  2 siblings, 0 replies; 33+ messages in thread
From: Segher Boessenkool @ 2015-05-11 13:19 UTC (permalink / raw)
  To: Richard Henderson
  Cc: H. Peter Anvin, law, gcc-patches, peterz, torvalds, jakub

On Fri, May 08, 2015 at 01:15:25PM -0700, Richard Henderson wrote:
> But it *does* try to match an intermediate pattern,
> 
> (set (reg:CCGC 17 flags)
>     (compare:CCGC (reg:CCGC 17 flags)
>         (const_int 0 [0])))

Patch posted at <http://gcc.gnu.org/ml/gcc-patches/2015-05/msg00918.html>.


Segher

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v2 6/6] i386: Implement asm flag outputs
  2015-05-07 21:39 ` [PATCH 6/6] i386: Implement asm flag outputs Richard Henderson
  2015-05-08 18:40   ` H. Peter Anvin
@ 2015-05-15 15:46   ` Richard Henderson
  2015-05-20 16:26     ` Jeff Law
  1 sibling, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2015-05-15 15:46 UTC (permalink / raw)
  To: gcc-patches; +Cc: hpa, law, torvalds

Version 2 includes proper test cases and documentation.
Hopefully the documentation even makes sense.  Suggestions
and improvements there gratefully appreciated.


r~
---
 gcc/config/i386/constraints.md             |   5 ++
 gcc/config/i386/i386.c                     | 137 +++++++++++++++++++++++++++--
 gcc/doc/extend.texi                        |  76 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/asm-flag-0.c |  15 ++++
 gcc/testsuite/gcc.target/i386/asm-flag-1.c |  18 ++++
 gcc/testsuite/gcc.target/i386/asm-flag-2.c |  16 ++++
 gcc/testsuite/gcc.target/i386/asm-flag-3.c |  22 +++++
 gcc/testsuite/gcc.target/i386/asm-flag-4.c |  20 +++++
 gcc/testsuite/gcc.target/i386/asm-flag-5.c |  19 ++++
 9 files changed, 321 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-0.c
 create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-5.c

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 2271bd1..d16e728 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -146,10 +146,15 @@
  "@internal Lower SSE register when avoiding REX prefix and all SSE registers otherwise.")
 
 ;; We use the B prefix to denote any number of internal operands:
+;;  f  FLAGS_REG
 ;;  s  Sibcall memory operand, not valid for TARGET_X32
 ;;  w  Call memory operand, not valid for TARGET_X32
 ;;  z  Constant call address operand.
 
+(define_constraint "Bf"
+  "@internal Flags register operand."
+  (match_operand 0 "flags_reg_operand"))
+
 (define_constraint "Bs"
   "@internal Sibcall memory operand."
   (and (not (match_test "TARGET_X32"))
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index de333d8..868316a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -45447,21 +45447,144 @@ ix86_c_mode_for_suffix (char suffix)
 
 /* Worker function for TARGET_MD_ASM_ADJUST.
 
-   We do this in the new i386 backend to maintain source compatibility
+   We implement asm flag outputs, and maintain source compatibility
    with the old cc0-based compiler.  */
 
 static rtx_insn *
-ix86_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
-		    vec<const char *> &/*constraints*/,
+ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
+		    vec<const char *> &constraints,
 		    vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
 {
-  clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
   clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
-
-  SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
   SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
 
-  return NULL;
+  bool saw_asm_flag = false;
+
+  start_sequence ();
+  for (unsigned i = 0, n = outputs.length (); i < n; ++i)
+    {
+      const char *con = constraints[i];
+      if (strncmp (con, "=@cc", 4) != 0)
+	continue;
+      con += 4;
+      if (strchr (con, ',') != NULL)
+	{
+	  error ("alternatives not allowed in asm flag output");
+	  continue;
+	}
+
+      bool invert = false;
+      if (con[0] == 'n')
+	invert = true, con++;
+
+      machine_mode mode = CCmode;
+      rtx_code code = UNKNOWN;
+
+      switch (con[0])
+	{
+	case 'a':
+	  if (con[1] == 0)
+	    mode = CCAmode, code = EQ;
+	  else if (con[1] == 'e' && con[2] == 0)
+	    mode = CCCmode, code = EQ;
+	  break;
+	case 'b':
+	  if (con[1] == 0)
+	    mode = CCCmode, code = EQ;
+	  else if (con[1] == 'e' && con[2] == 0)
+	    mode = CCAmode, code = NE;
+	  break;
+	case 'c':
+	  if (con[1] == 0)
+	    mode = CCCmode, code = EQ;
+	  break;
+	case 'e':
+	  if (con[1] == 0)
+	    mode = CCZmode, code = EQ;
+	  break;
+	case 'g':
+	  if (con[1] == 0)
+	    mode = CCGCmode, code = GT;
+	  else if (con[1] == 'e' && con[2] == 0)
+	    mode = CCGCmode, code = GE;
+	  break;
+	case 'l':
+	  if (con[1] == 0)
+	    mode = CCGCmode, code = LT;
+	  else if (con[1] == 'e' && con[2] == 0)
+	    mode = CCGCmode, code = LE;
+	  break;
+	case 'o':
+	  if (con[1] == 0)
+	    mode = CCOmode, code = EQ;
+	  break;
+	case 'p':
+	  if (con[1] == 0)
+	    mode = CCPmode, code = EQ;
+	  break;
+	case 's':
+	  if (con[1] == 0)
+	    mode = CCSmode, code = EQ;
+	  break;
+	case 'z':
+	  if (con[1] == 0)
+	    mode = CCZmode, code = EQ;
+	  break;
+	}
+      if (code == UNKNOWN)
+	{
+	  error ("unknown asm flag output %qs", constraints[i]);
+	  continue;
+	}
+      if (invert)
+	code = reverse_condition (code);
+
+      rtx dest = outputs[i];
+      if (!saw_asm_flag)
+	{
+	  /* This is the first asm flag output.  Here we put the flags
+	     register in as the real output and adjust the condition to
+	     allow it.  */
+	  constraints[i] = "=Bf";
+	  outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
+	  saw_asm_flag = true;
+	}
+      else
+	{
+	  /* We don't need the flags register as output twice.  */
+	  constraints[i] = "=X";
+	  outputs[i] = gen_rtx_SCRATCH (SImode);
+	}
+
+      rtx x = gen_rtx_REG (mode, FLAGS_REG);
+      x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
+
+      machine_mode dest_mode = GET_MODE (dest);
+      if (!SCALAR_INT_MODE_P (dest_mode))
+	{
+	  error ("invalid type for asm flag output");
+	  continue;
+	}
+      if (dest_mode != QImode)
+	{
+	  rtx destqi = gen_reg_rtx (QImode);
+	  emit_insn (gen_rtx_SET (destqi, x));
+	  x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
+	}
+      emit_insn (gen_rtx_SET (dest, x));
+    }
+  rtx_insn *seq = get_insns ();
+  end_sequence ();
+
+  if (saw_asm_flag)
+    return seq;
+  else
+    {
+      /* If we had no asm flag outputs, clobber the flags.  */
+      clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
+      SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
+      return NULL;
+    }
 }
 
 /* Implements target vector targetm.asm.encode_section_info.  */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 6004681..4221634 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -7578,6 +7578,82 @@ pointed to by @code{e}
 in a register, you can enable it to choose the best location
 for @code{d} by specifying both constraints.
 
+@anchor{FlagOutputOperands}
+@subsection Flag Output Operands
+@cindex @code{asm} flag output operands
+
+Some targets have a special register that holds the ``flags'' for the
+result of an operation or comparison.  Normally, the contents of that
+register are either unmodifed by the asm, or the asm is considered to
+clobber the contents.
+
+On some targets, a special form of output operand exists by which
+conditions in the flags register may be outputs of the asm.  The set of
+conditions supported are target specific, but the general rule is that
+the output variable must be a scalar integer, and the value will be boolean.
+
+Because of the special nature of the flag output operands, the constraint
+may not include alternatives.
+
+Most often, the target has only one flags register, and thus is an implied
+operand of many instructions.  In this case, the operand should not be
+referenced within the assembler template via @code{%0} etc, as there's
+no corresponding text in the assembly language.
+
+@table @asis
+@item x86 family
+The flag output constraints for the x86 family are of the form
+@samp{=@@cc@var{cond}} where @var{cond} is one of the standard
+conditions defined in the ISA manual for @code{j@var{cc}} or
+@code{set@var{cc}}.
+
+@table @code
+@item a
+``above'' or unsigned greater than
+@item ae
+``above or equal'' or unsigned greater than or equal
+@item b
+``below'' or unsigned less than
+@item be
+``below or equal'' or unsigned less than or equal
+@item c
+carry flag set
+@item e
+@itemx z
+``equal'' or zero flag set
+@item g
+signed greater than
+@item ge
+signed greater than or equal
+@item l
+signed less than
+@item le
+signed less than or equal
+@item o
+overflow flag set
+@item p
+parity flag set
+@item s
+sign flag set
+@item na
+@itemx nae
+@itemx nb
+@itemx nbe
+@itemx nc
+@itemx ne
+@itemx ng
+@itemx nge
+@itemx nl
+@itemx nle
+@itemx no
+@itemx np
+@itemx ns
+@itemx nz
+``not'' @var{flag}, or inverted versions of those above
+@end table
+
+@end table
+
 @anchor{InputOperands}
 @subsubsection Input Operands
 @cindex @code{asm} input operands
diff --git a/gcc/testsuite/gcc.target/i386/asm-flag-0.c b/gcc/testsuite/gcc.target/i386/asm-flag-0.c
new file mode 100644
index 0000000..b0c0523
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/asm-flag-0.c
@@ -0,0 +1,15 @@
+/* Test error conditions of asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void a(void)
+{
+  char x;
+  asm("" : "=@cca,@ccc"(x));  /* { dg-error "alternatives not allowed" } */
+}
+
+void b(void)
+{
+  char x;
+  asm("" : "=@ccbad"(x)); /* { dg-error "unknown asm flag output" } */
+}
diff --git a/gcc/testsuite/gcc.target/i386/asm-flag-1.c b/gcc/testsuite/gcc.target/i386/asm-flag-1.c
new file mode 100644
index 0000000..bcc4952
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/asm-flag-1.c
@@ -0,0 +1,18 @@
+/* Test some of the valid @cc<cc> asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+void f(char *out)
+{
+  asm("" : "=@cca"(out[0]), "=@ccc"(out[1]), "=@cce"(out[2]),
+           "=@ccg"(out[3]), "=@cco"(out[4]), "=@ccp"(out[5]),
+           "=@ccs"(out[6]));
+}
+
+/* { dg-final { scan-assembler "seta" } } */
+/* { dg-final { scan-assembler "setc" } } */
+/* { dg-final { scan-assembler "sete" } } */
+/* { dg-final { scan-assembler "setg" } } */
+/* { dg-final { scan-assembler "seto" } } */
+/* { dg-final { scan-assembler "setp" } } */
+/* { dg-final { scan-assembler "sets" } } */
diff --git a/gcc/testsuite/gcc.target/i386/asm-flag-2.c b/gcc/testsuite/gcc.target/i386/asm-flag-2.c
new file mode 100644
index 0000000..5f8fa13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/asm-flag-2.c
@@ -0,0 +1,16 @@
+/* Test some of the valid @cc<cc> asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+void f(char *out)
+{
+  asm("" : "=@ccb"(out[0]), "=@ccl"(out[1]), "=@ccz"(out[2]),
+           "=@ccbe"(out[4]), "=@ccge"(out[5]), "=@ccle"(out[6]));
+}
+
+/* { dg-final { scan-assembler "setc" } } */
+/* { dg-final { scan-assembler "setl" } } */
+/* { dg-final { scan-assembler "sete" } } */
+/* { dg-final { scan-assembler "setna" } } */
+/* { dg-final { scan-assembler "setge" } } */
+/* { dg-final { scan-assembler "setle" } } */
diff --git a/gcc/testsuite/gcc.target/i386/asm-flag-3.c b/gcc/testsuite/gcc.target/i386/asm-flag-3.c
new file mode 100644
index 0000000..220c07c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/asm-flag-3.c
@@ -0,0 +1,22 @@
+/* Test some of the valid @cc<cc> asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#define DO(C) \
+void f##C(int *y) { char x; asm("" : "=@cc"#C(x)); if (!x) *y = 0; }
+
+DO(a)
+DO(c)
+DO(e)
+DO(g)
+DO(o)
+DO(p)
+DO(s)
+
+/* { dg-final { scan-assembler "ja" } } */
+/* { dg-final { scan-assembler "jc" } } */
+/* { dg-final { scan-assembler "je" } } */
+/* { dg-final { scan-assembler "jg" } } */
+/* { dg-final { scan-assembler "jo" } } */
+/* { dg-final { scan-assembler "jp" } } */
+/* { dg-final { scan-assembler "js" } } */
diff --git a/gcc/testsuite/gcc.target/i386/asm-flag-4.c b/gcc/testsuite/gcc.target/i386/asm-flag-4.c
new file mode 100644
index 0000000..b84b7df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/asm-flag-4.c
@@ -0,0 +1,20 @@
+/* Test some of the valid @cc<cc> asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#define DO(C) \
+void f##C(int *y) { char x; asm("" : "=@cc"#C(x)); if (!x) *y = 0; }
+
+DO(b)
+DO(l)
+DO(z)
+DO(be)
+DO(ge)
+DO(le)
+
+/* { dg-final { scan-assembler "jc" } } */
+/* { dg-final { scan-assembler "jl" } } */
+/* { dg-final { scan-assembler "je" } } */
+/* { dg-final { scan-assembler "jna" } } */
+/* { dg-final { scan-assembler "jge" } } */
+/* { dg-final { scan-assembler "jle" } } */
diff --git a/gcc/testsuite/gcc.target/i386/asm-flag-5.c b/gcc/testsuite/gcc.target/i386/asm-flag-5.c
new file mode 100644
index 0000000..59fb72a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/asm-flag-5.c
@@ -0,0 +1,19 @@
+/* Test error conditions of asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+#define DO(T) \
+void f_##T(void) { T x; asm("" : "=@ccc"(x)); }
+
+DO(_Bool)
+DO(char)
+DO(short)
+DO(int)
+DO(long)
+
+DO(float)	/* { dg-error invalid type } */
+DO(double)	/* { dg-error invalid type } */
+
+typedef struct { int x[3]; } S;
+
+DO(S)		/* { dg-error invalid type } */
-- 
2.1.0

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 6/6] i386: Implement asm flag outputs
  2015-05-15 15:46   ` [PATCH v2 " Richard Henderson
@ 2015-05-20 16:26     ` Jeff Law
  2015-05-20 16:36       ` H. Peter Anvin
  2015-05-20 17:28       ` Richard Henderson
  0 siblings, 2 replies; 33+ messages in thread
From: Jeff Law @ 2015-05-20 16:26 UTC (permalink / raw)
  To: Richard Henderson, gcc-patches; +Cc: hpa, torvalds

On 05/15/2015 09:37 AM, Richard Henderson wrote:
> Version 2 includes proper test cases and documentation.
> Hopefully the documentation even makes sense.  Suggestions
> and improvements there gratefully appreciated.
>
>
> r~
> ---
>   gcc/config/i386/constraints.md             |   5 ++
>   gcc/config/i386/i386.c                     | 137 +++++++++++++++++++++++++++--
>   gcc/doc/extend.texi                        |  76 ++++++++++++++++
>   gcc/testsuite/gcc.target/i386/asm-flag-0.c |  15 ++++
>   gcc/testsuite/gcc.target/i386/asm-flag-1.c |  18 ++++
>   gcc/testsuite/gcc.target/i386/asm-flag-2.c |  16 ++++
>   gcc/testsuite/gcc.target/i386/asm-flag-3.c |  22 +++++
>   gcc/testsuite/gcc.target/i386/asm-flag-4.c |  20 +++++
>   gcc/testsuite/gcc.target/i386/asm-flag-5.c |  19 ++++
>   9 files changed, 321 insertions(+), 7 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-0.c
>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-1.c
>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-2.c
>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-3.c
>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-4.c
>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-5.c
It all seems to make sense.  Obviously you'll need a ChangeLog and the 
usual testing before committing.

I won't stress much if this needs a bit of further tweaking as the 
kernel folks start to exploit the capability and we find weaknesses in 
the implementation.

What I don't see is any way to know if the target supports asm flag 
outputs.  Are we expecting the kernel folks to do some kind of test then 
enable/disable based on the result?

I'm going to assume the mapping of the constraints to the actual modes 
and codes is correct.


Jeff


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 6/6] i386: Implement asm flag outputs
  2015-05-20 16:26     ` Jeff Law
@ 2015-05-20 16:36       ` H. Peter Anvin
  2015-05-20 17:28       ` Richard Henderson
  1 sibling, 0 replies; 33+ messages in thread
From: H. Peter Anvin @ 2015-05-20 16:36 UTC (permalink / raw)
  To: Jeff Law, Richard Henderson, gcc-patches; +Cc: torvalds

Well, these kinds of asm are inherently target specific, but I did already ask for a cpp symbol to indicate this faculty us available.

On May 20, 2015 9:21:07 AM PDT, Jeff Law <law@redhat.com> wrote:
>On 05/15/2015 09:37 AM, Richard Henderson wrote:
>> Version 2 includes proper test cases and documentation.
>> Hopefully the documentation even makes sense.  Suggestions
>> and improvements there gratefully appreciated.
>>
>>
>> r~
>> ---
>>   gcc/config/i386/constraints.md             |   5 ++
>>   gcc/config/i386/i386.c                     | 137
>+++++++++++++++++++++++++++--
>>   gcc/doc/extend.texi                        |  76 ++++++++++++++++
>>   gcc/testsuite/gcc.target/i386/asm-flag-0.c |  15 ++++
>>   gcc/testsuite/gcc.target/i386/asm-flag-1.c |  18 ++++
>>   gcc/testsuite/gcc.target/i386/asm-flag-2.c |  16 ++++
>>   gcc/testsuite/gcc.target/i386/asm-flag-3.c |  22 +++++
>>   gcc/testsuite/gcc.target/i386/asm-flag-4.c |  20 +++++
>>   gcc/testsuite/gcc.target/i386/asm-flag-5.c |  19 ++++
>>   9 files changed, 321 insertions(+), 7 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-0.c
>>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-1.c
>>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-2.c
>>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-3.c
>>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-4.c
>>   create mode 100644 gcc/testsuite/gcc.target/i386/asm-flag-5.c
>It all seems to make sense.  Obviously you'll need a ChangeLog and the 
>usual testing before committing.
>
>I won't stress much if this needs a bit of further tweaking as the 
>kernel folks start to exploit the capability and we find weaknesses in 
>the implementation.
>
>What I don't see is any way to know if the target supports asm flag 
>outputs.  Are we expecting the kernel folks to do some kind of test
>then 
>enable/disable based on the result?
>
>I'm going to assume the mapping of the constraints to the actual modes 
>and codes is correct.
>
>
>Jeff

-- 
Sent from my mobile phone.  Please pardon brevity and lack of formatting.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v2 6/6] i386: Implement asm flag outputs
  2015-05-20 16:26     ` Jeff Law
  2015-05-20 16:36       ` H. Peter Anvin
@ 2015-05-20 17:28       ` Richard Henderson
  1 sibling, 0 replies; 33+ messages in thread
From: Richard Henderson @ 2015-05-20 17:28 UTC (permalink / raw)
  To: Jeff Law, gcc-patches; +Cc: hpa, torvalds

On 05/20/2015 09:21 AM, Jeff Law wrote:
> What I don't see is any way to know if the target supports asm flag outputs. 
> Are we expecting the kernel folks to do some kind of test then enable/disable
> based on the result?

I'd forgotten that we'd talked about a cpp symbol.
I'll add that.


r~

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2015-05-20 17:27 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-07 21:39 [RFC 0/6] Flags outputs for asms Richard Henderson
2015-05-07 21:39 ` [PATCH 6/6] i386: Implement asm flag outputs Richard Henderson
2015-05-08 18:40   ` H. Peter Anvin
2015-05-08 18:45     ` Jeff Law
2015-05-15 15:46   ` [PATCH v2 " Richard Henderson
2015-05-20 16:26     ` Jeff Law
2015-05-20 16:36       ` H. Peter Anvin
2015-05-20 17:28       ` Richard Henderson
2015-05-07 21:39 ` [PATCH 4/6] Convert to md_asm_adjust Richard Henderson
2015-05-08 19:41   ` Jeff Law
2015-05-08 19:56     ` Richard Henderson
2015-05-07 21:39 ` [PATCH 5/6] i386: Add CCPmode Richard Henderson
2015-05-08 18:16   ` Jeff Law
2015-05-07 21:39 ` [PATCH 3/6] Canonicalize asm volatility earlier Richard Henderson
2015-05-08 18:12   ` Jeff Law
2015-05-07 21:39 ` [PATCH 1/6] Only resolve_asm_operand_names once Richard Henderson
2015-05-08 18:11   ` Jeff Law
2015-05-07 21:39 ` [PATCH 2/6] Merge expand_asm_operands into expand_asm_stmt Richard Henderson
2015-05-08 18:54   ` Jeff Law
2015-05-08  1:15 ` [RFC 0/6] Flags outputs for asms H. Peter Anvin
2015-05-08  1:20   ` H. Peter Anvin
2015-05-08 15:24     ` Richard Henderson
2015-05-08 15:37       ` Jay Foad
2015-05-08 15:39       ` Jeff Law
2015-05-08 15:54   ` Richard Henderson
2015-05-08 17:46     ` H. Peter Anvin
2015-05-08 20:15     ` Richard Henderson
2015-05-08 21:14       ` Segher Boessenkool
2015-05-08 22:11         ` Richard Henderson
2015-05-08 21:33       ` Jeff Law
2015-05-08 21:55         ` Richard Henderson
2015-05-08 22:10         ` Segher Boessenkool
2015-05-11 13:19       ` Segher Boessenkool

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).