public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 9/9] ifcvt: Also pass reversed cc comparison.
  2019-08-02 15:10 [PATCH 0/9] Improve icvt "convert multiple" Robin Dapp
                   ` (4 preceding siblings ...)
  2019-08-02 15:10 ` [PATCH 8/9] ifcvt: Handle swap-style idioms differently Robin Dapp
@ 2019-08-02 15:10 ` Robin Dapp
  2019-10-27 15:27   ` Richard Sandiford
  2019-08-02 15:10 ` [PATCH 3/9] ifcvt: Only created temporaries as needed Robin Dapp
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 25+ messages in thread
From: Robin Dapp @ 2019-08-02 15:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: james.greenhalgh, rdapp

When then and else are reversed, we would swap new_val and old_val.
The same has to be done for our new code paths.
Also, emit_conditional_move may perform swapping.  In case we need to
swap, the cc comparison also needs to be swapped and for this we pass
the reversed cc comparison directly.  An alternative would be to pass
the constituents of the compare directly, but the functions have more
than enough parameters already.
---
 gcc/ifcvt.c  | 37 ++++++++++++++++++++++++++-----------
 gcc/optabs.c |  4 +++-
 gcc/optabs.h |  2 +-
 3 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 09bf443656c..d3959b870f6 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -777,7 +777,7 @@ static int noce_try_addcc (struct noce_if_info *);
 static int noce_try_store_flag_constants (struct noce_if_info *);
 static int noce_try_store_flag_mask (struct noce_if_info *);
 static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
-			    rtx, rtx, rtx, rtx = NULL);
+			    rtx, rtx, rtx, rtx = NULL, rtx = NULL);
 static int noce_try_cmove (struct noce_if_info *);
 static int noce_try_cmove_arith (struct noce_if_info *);
 static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
@@ -1660,7 +1660,8 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
 
 static rtx
 noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
-		 rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue, rtx cc_cmp)
+		 rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue, rtx cc_cmp,
+		 rtx rev_cc_cmp)
 {
   rtx target ATTRIBUTE_UNUSED;
   int unsignedp ATTRIBUTE_UNUSED;
@@ -1708,7 +1709,7 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 
   target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
 				  vtrue, vfalse, GET_MODE (x),
-				  unsignedp, cc_cmp);
+				  unsignedp, cc_cmp, rev_cc_cmp);
   if (target)
     return target;
 
@@ -3197,6 +3198,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   rtx_code cond_code = GET_CODE (cond);
 
   rtx cc_cmp = noce_get_compare (jump, false);
+  rtx rev_cc_cmp = noce_get_compare (jump, if_info->then_else_reversed);
 
   /* The true targets for a conditional move.  */
   auto_vec<rtx> targets;
@@ -3313,7 +3315,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
       */
 
       rtx_insn *seq, *seq1 = 0, *seq2 = 0;
-      rtx temp_dest, temp_dest1, temp_dest2;
+      rtx temp_dest, temp_dest1 = NULL_RTX, temp_dest2 = NULL_RTX;
       unsigned int cost1 = 0, cost2 = 0;
       bool speed_p = if_info->speed_p;
       push_topmost_sequence ();
@@ -3324,7 +3326,13 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 	  temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
 	      x, y, new_val, old_val, NULL_RTX);
 	else
-	  noce_emit_move_insn (target, new_val);
+	  {
+	    temp_dest1 = target;
+	    if (if_info->then_else_reversed)
+	      noce_emit_move_insn (target, old_val);
+	    else
+	      noce_emit_move_insn (target, new_val);
+	  }
 
 	/* If we failed to expand the conditional move, drop out and don't
 	   try to continue.  */
@@ -3352,9 +3360,15 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 	start_sequence ();
 	if (!need_no_cmovs.get (insn))
 	  temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
-	      x, y, new_val, old_val, cc_cmp);
+	      x, y, new_val, old_val, cc_cmp, rev_cc_cmp);
 	else
-	  noce_emit_move_insn (target, new_val);
+	  {
+	    temp_dest2 = target;
+	    if (if_info->then_else_reversed)
+	      noce_emit_move_insn (target, old_val);
+	    else
+	      noce_emit_move_insn (target, new_val);
+	  }
 
 	/* If we failed to expand the conditional move, drop out and don't
 	   try to continue.  */
@@ -3386,7 +3400,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 	  /* We do not require a temp register, so remove it from the list.  */
 	  seq = seq2;
 	  temp_dest = temp_dest2;
-	  temps_created.remove (temp);
+	  if (temps_created.get (temp))
+	    temps_created.remove (temp);
 	}
       else
 	{
@@ -3436,7 +3451,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   /* Mark all our temporaries and targets as used.  */
   for (int i = 0; i < count; i++)
     {
-      set_used_flags (temporaries[i]);
+      if (temps_created.get(targets[i]))
+	set_used_flags (temporaries[i]);
       set_used_flags (targets[i]);
     }
 
@@ -3941,7 +3957,7 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
 
   FOR_BB_INSNS (bb, insn)
     {
-      rtx set, src, dest;
+      rtx set, dest;
 
       if (!active_insn_p (insn))
 	continue;
@@ -3950,7 +3966,6 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
       if (set == NULL_RTX)
 	continue;
 
-      src = SET_SRC (set);
       dest = SET_DEST (set);
 
       /* noce_emit_cmove will emit the condition check every time it is called
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 3ce6f8cdd30..b1d88fd58b4 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -4325,7 +4325,8 @@ emit_indirect_jump (rtx loc)
 rtx
 emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
 		       machine_mode cmode, rtx op2, rtx op3,
-		       machine_mode mode, int unsignedp, rtx cc_cmp)
+		       machine_mode mode, int unsignedp, rtx cc_cmp,
+		       rtx rev_cc_cmp)
 {
   rtx comparison;
   rtx_insn *last;
@@ -4372,6 +4373,7 @@ emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
       std::swap (op2, op3);
       code = reversed;
       swapped = true;
+      cc_cmp = rev_cc_cmp;
     }
 
   if (mode == VOIDmode)
diff --git a/gcc/optabs.h b/gcc/optabs.h
index c4540f87144..17b4e71ffd4 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -258,7 +258,7 @@ extern void emit_indirect_jump (rtx);
 
 /* Emit a conditional move operation.  */
 rtx emit_conditional_move (rtx, enum rtx_code, rtx, rtx, machine_mode,
-			   rtx, rtx, machine_mode, int, rtx = NULL);
+			   rtx, rtx, machine_mode, int, rtx = NULL, rtx = NULL);
 
 /* Emit a conditional negate or bitwise complement operation.  */
 rtx emit_conditional_neg_or_complement (rtx, rtx_code, machine_mode, rtx,
-- 
2.17.0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 1/9] ifcvt: Store the number of created cmovs.
  2019-08-02 15:10 [PATCH 0/9] Improve icvt "convert multiple" Robin Dapp
  2019-08-02 15:10 ` [PATCH 7/9] ifcvt: Emit two cmov variants and choose the less expensive one Robin Dapp
@ 2019-08-02 15:10 ` Robin Dapp
  2019-08-02 15:10 ` [PATCH 2/9] ifcvt: Use enum instead of transform_name string Robin Dapp
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Robin Dapp @ 2019-08-02 15:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: james.greenhalgh, rdapp

This patch saves the number of created conditional moves by
noce_convert_multiple_sets in the IF_INFO struct.  This may be used by
the backend to easier decide whether to accept a generated sequence or
not.

---
 gcc/ifcvt.c | 10 ++++++++--
 gcc/ifcvt.h |  4 ++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index e0c9522057a..b80dbc43d83 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -3247,9 +3247,14 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   /* Actually emit the sequence if it isn't too expensive.  */
   rtx_insn *seq = get_insns ();
 
+  if_info->transform_name = "noce_convert_multiple_sets";
+  if_info->created_cmovs = count;
+
   if (!targetm.noce_conversion_profitable_p (seq, if_info))
     {
       end_sequence ();
+      if_info->transform_name = "";
+      if_info->created_cmovs = 0;
       return FALSE;
     }
 
@@ -3296,7 +3301,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
     }
 
   num_updated_if_blocks++;
-  if_info->transform_name = "noce_convert_multiple_sets";
+
   return TRUE;
 }
 
@@ -4060,7 +4065,8 @@ noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge,
      and jump_insns are always given a cost of 1 by seq_cost, so treat
      both instructions as having cost COSTS_N_INSNS (1).  */
   if_info.original_cost = COSTS_N_INSNS (2);
-
+  if_info.transform_name = "";
+  if_info.created_cmovs = 0;
 
   /* Do the real work.  */
 
diff --git a/gcc/ifcvt.h b/gcc/ifcvt.h
index 153ad961b2c..130559c0911 100644
--- a/gcc/ifcvt.h
+++ b/gcc/ifcvt.h
@@ -108,6 +108,10 @@ struct noce_if_info
   /* The name of the noce transform that succeeded in if-converting
      this structure.  Used for debugging.  */
   const char *transform_name;
+
+  /* The number of created conditional moves in case we convert multiple
+     sets.  */
+  unsigned int created_cmovs;
 };
 
 #endif /* GCC_IFCVT_H */
-- 
2.17.0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 4/9] ifcvt: Estimate original costs before convert_multiple.
  2019-08-02 15:10 [PATCH 0/9] Improve icvt "convert multiple" Robin Dapp
                   ` (6 preceding siblings ...)
  2019-08-02 15:10 ` [PATCH 3/9] ifcvt: Only created temporaries as needed Robin Dapp
@ 2019-08-02 15:10 ` Robin Dapp
  2019-08-06 20:30   ` Richard Sandiford
  2019-08-02 15:10 ` [PATCH 5/9] ifcvt: Allow constants operands in noce_convert_multiple_sets Robin Dapp
  8 siblings, 1 reply; 25+ messages in thread
From: Robin Dapp @ 2019-08-02 15:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: james.greenhalgh, rdapp

This patch extends bb_ok_for_noce_convert_multiple_sets by a temporary
cost estimation that can be used by noce_convert_multiple_sets.

---
 gcc/ifcvt.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 253b8a96c1a..55205cac153 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -3333,11 +3333,13 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
    fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  */
 
 static bool
-bb_ok_for_noce_convert_multiple_sets (basic_block test_bb)
+bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, unsigned *cost)
 {
   rtx_insn *insn;
   unsigned count = 0;
   unsigned param = PARAM_VALUE (PARAM_MAX_RTL_IF_CONVERSION_INSNS);
+  bool speed_p = optimize_bb_for_speed_p (test_bb);
+  unsigned potential_cost = 0;
 
   FOR_BB_INSNS (test_bb, insn)
     {
@@ -3373,9 +3375,14 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb)
       if (!can_conditionally_move_p (GET_MODE (dest)))
 	return false;
 
+      rtx sset = single_set (insn);
+      potential_cost += pattern_cost (sset, speed_p);
+
       count++;
     }
 
+  *cost += potential_cost;
+
   /* If we would only put out one conditional move, the other strategies
      this pass tries are better optimized and will be more appropriate.
      Some targets want to strictly limit the number of conditional moves
@@ -3414,11 +3421,15 @@ noce_process_if_block (struct noce_if_info *if_info)
      ??? For future expansion, further expand the "multiple X" rules.  */
 
   /* First look for multiple SETS.  */
+  unsigned int mcost = if_info->original_cost;
+  unsigned tmp_cost = if_info->original_cost;
   if (!else_bb
       && HAVE_conditional_move
       && !HAVE_cc0
-      && bb_ok_for_noce_convert_multiple_sets (then_bb))
+      && bb_ok_for_noce_convert_multiple_sets (then_bb, &mcost))
     {
+      /* Temporarily set the original costs to what we estimated.  */
+      if_info->original_cost = mcost;
       if (noce_convert_multiple_sets (if_info))
 	{
 	  if (dump_file && if_info->transform)
@@ -3427,6 +3438,8 @@ noce_process_if_block (struct noce_if_info *if_info)
 	  return TRUE;
 	}
     }
+  /* Restore the original costs.  */
+  if_info->original_cost = tmp_cost;
 
   bool speed_p = optimize_bb_for_speed_p (test_bb);
   unsigned int then_cost = 0, else_cost = 0;
-- 
2.17.0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 6/9] ifcvt: Extract cc comparison from jump.
  2019-08-02 15:10 [PATCH 0/9] Improve icvt "convert multiple" Robin Dapp
                   ` (2 preceding siblings ...)
  2019-08-02 15:10 ` [PATCH 2/9] ifcvt: Use enum instead of transform_name string Robin Dapp
@ 2019-08-02 15:10 ` Robin Dapp
  2019-10-27 14:31   ` Richard Sandiford
  2019-08-02 15:10 ` [PATCH 8/9] ifcvt: Handle swap-style idioms differently Robin Dapp
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 25+ messages in thread
From: Robin Dapp @ 2019-08-02 15:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: james.greenhalgh, rdapp

This patch extracts a cc comparison from the initial compare/jump
insn and allows it to be passed to noce_emit_cmove and
emit_conditional_move.
---
 gcc/ifcvt.c  | 68 ++++++++++++++++++++++++++++++++++++++++++++++++----
 gcc/optabs.c |  7 ++++--
 gcc/optabs.h |  2 +-
 3 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 99716e5f63c..3db707e1fd1 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -86,6 +86,7 @@ static rtx_insn *find_active_insn_after (basic_block, rtx_insn *);
 static basic_block block_fallthru (basic_block);
 static rtx cond_exec_get_condition (rtx_insn *);
 static rtx noce_get_condition (rtx_insn *, rtx_insn **, bool);
+static rtx noce_get_compare (rtx_insn *, bool);
 static int noce_operand_ok (const_rtx);
 static void merge_if_block (ce_if_block *);
 static int find_cond_trap (basic_block, edge, edge);
@@ -775,7 +776,7 @@ static int noce_try_addcc (struct noce_if_info *);
 static int noce_try_store_flag_constants (struct noce_if_info *);
 static int noce_try_store_flag_mask (struct noce_if_info *);
 static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
-			    rtx, rtx, rtx);
+			    rtx, rtx, rtx, rtx = NULL);
 static int noce_try_cmove (struct noce_if_info *);
 static int noce_try_cmove_arith (struct noce_if_info *);
 static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
@@ -1658,7 +1659,7 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
 
 static rtx
 noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
-		 rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue)
+		 rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue, rtx cc_cmp)
 {
   rtx target ATTRIBUTE_UNUSED;
   int unsignedp ATTRIBUTE_UNUSED;
@@ -1706,7 +1707,7 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
 
   target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
 				  vtrue, vfalse, GET_MODE (x),
-				  unsignedp);
+				  unsignedp, cc_cmp);
   if (target)
     return target;
 
@@ -2970,6 +2971,60 @@ noce_get_condition (rtx_insn *jump, rtx_insn **earliest, bool then_else_reversed
   return tmp;
 }
 
+/* Get the comparison from the insn.  */
+static rtx
+noce_get_compare (rtx_insn *jump, bool then_else_reversed)
+{
+  enum rtx_code code;
+  const_rtx set;
+  rtx tem;
+  rtx op0, op1;
+
+  if (!have_cbranchcc4)
+    return 0;
+
+  if (! any_condjump_p (jump))
+    return NULL_RTX;
+
+  set = pc_set (jump);
+
+  /* If this branches to JUMP_LABEL when the condition is false,
+     reverse the condition.  */
+  bool reverse = (GET_CODE (XEXP (SET_SRC (set), 2)) == LABEL_REF
+	     && label_ref_label (XEXP (SET_SRC (set), 2)) == JUMP_LABEL (jump));
+
+  /* We may have to reverse because the caller's if block is not canonical,
+     i.e. the THEN block isn't the fallthrough block for the TEST block
+     (see find_if_header).  */
+  if (then_else_reversed)
+    reverse = !reverse;
+
+  rtx cond = XEXP (SET_SRC (set), 0);
+
+  code = GET_CODE (cond);
+  op0 = XEXP (cond, 0);
+  op1 = XEXP (cond, 1);
+
+  if (reverse)
+    code = reversed_comparison_code (cond, jump);
+  if (code == UNKNOWN)
+    return 0;
+
+  /* If constant is first, put it last.  */
+  if (CONSTANT_P (op0))
+    code = swap_condition (code), tem = op0, op0 = op1, op1 = tem;
+
+  /* Never return CC0; return zero instead.  */
+  if (CC0_P (op0))
+    return 0;
+
+  /* We promised to return a comparison.  */
+  rtx ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+  if (COMPARISON_P (ret))
+    return ret;
+  return 0;
+}
+
 /* Return true if OP is ok for if-then-else processing.  */
 
 static int
@@ -3140,6 +3195,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   rtx y = XEXP (cond, 1);
   rtx_code cond_code = GET_CODE (cond);
 
+  rtx cc_cmp = noce_get_compare (jump, false);
+
   /* The true targets for a conditional move.  */
   auto_vec<rtx> targets;
   /* The temporaries introduced to allow us to not consider register
@@ -3151,7 +3208,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 
   hash_map<rtx, bool> need_temps;
 
-  check_need_temps (then_bb, &need_temps, cond);
+  if (!cc_cmp)
+    check_need_temps (then_bb, &need_temps, cond);
 
   hash_map<rtx, bool> temps_created;
 
@@ -3242,7 +3300,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 
       /* Actually emit the conditional move.  */
       rtx temp_dest = noce_emit_cmove (if_info, temp, cond_code,
-				       x, y, new_val, old_val);
+				       x, y, new_val, old_val, cc_cmp);
 
       /* If we failed to expand the conditional move, drop out and don't
 	 try to continue.  */
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 06bcaab1f55..3ce6f8cdd30 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -4325,7 +4325,7 @@ emit_indirect_jump (rtx loc)
 rtx
 emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
 		       machine_mode cmode, rtx op2, rtx op3,
-		       machine_mode mode, int unsignedp)
+		       machine_mode mode, int unsignedp, rtx cc_cmp)
 {
   rtx comparison;
   rtx_insn *last;
@@ -4408,7 +4408,10 @@ emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
 	      class expand_operand ops[4];
 
 	      create_output_operand (&ops[0], target, mode);
-	      create_fixed_operand (&ops[1], comparison);
+	      if (cc_cmp)
+		create_fixed_operand (&ops[1], cc_cmp);
+	      else
+		create_fixed_operand (&ops[1], comparison);
 	      create_input_operand (&ops[2], op2, mode);
 	      create_input_operand (&ops[3], op3, mode);
 	      if (maybe_expand_insn (icode, 4, ops))
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 0654107d6e3..c4540f87144 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -258,7 +258,7 @@ extern void emit_indirect_jump (rtx);
 
 /* Emit a conditional move operation.  */
 rtx emit_conditional_move (rtx, enum rtx_code, rtx, rtx, machine_mode,
-			   rtx, rtx, machine_mode, int);
+			   rtx, rtx, machine_mode, int, rtx = NULL);
 
 /* Emit a conditional negate or bitwise complement operation.  */
 rtx emit_conditional_neg_or_complement (rtx, rtx_code, machine_mode, rtx,
-- 
2.17.0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 5/9] ifcvt: Allow constants operands in noce_convert_multiple_sets.
  2019-08-02 15:10 [PATCH 0/9] Improve icvt "convert multiple" Robin Dapp
                   ` (7 preceding siblings ...)
  2019-08-02 15:10 ` [PATCH 4/9] ifcvt: Estimate original costs before convert_multiple Robin Dapp
@ 2019-08-02 15:10 ` Robin Dapp
  2019-08-06 20:37   ` Richard Sandiford
  2019-10-27 14:12   ` Richard Sandiford
  8 siblings, 2 replies; 25+ messages in thread
From: Robin Dapp @ 2019-08-02 15:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: james.greenhalgh, rdapp

This patch checks allows immediate then/else operands for cmovs.
We rely on,emit_conditional_move returning NULL if something unsupported
was generated.

Also, minor refactoring is performed.

--

gcc/ChangeLog:

2018-11-14  Robin Dapp  <rdapp@linux.ibm.com>

	* ifcvt.c (have_const_cmov): New function.
	(noce_convert_multiple_sets): Allow constants if supported.
	(bb_ok_for_noce_convert_multiple_sets): Likewise.
	(check_cond_move_block): Refactor.
---
 gcc/ifcvt.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 55205cac153..99716e5f63c 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -3214,7 +3214,9 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 	 we'll end up trying to emit r4:HI = cond ? (r1:SI) : (r3:HI).
 	 Wrap the two cmove operands into subregs if appropriate to prevent
 	 that.  */
-      if (GET_MODE (new_val) != GET_MODE (temp))
+
+      if (!CONST_INT_P (new_val)
+         && GET_MODE (new_val) != GET_MODE (temp))
 	{
 	  machine_mode src_mode = GET_MODE (new_val);
 	  machine_mode dst_mode = GET_MODE (temp);
@@ -3225,7 +3227,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 	    }
 	  new_val = lowpart_subreg (dst_mode, new_val, src_mode);
 	}
-      if (GET_MODE (old_val) != GET_MODE (temp))
+      if (!CONST_INT_P (old_val)
+         && GET_MODE (old_val) != GET_MODE (temp))
 	{
 	  machine_mode src_mode = GET_MODE (old_val);
 	  machine_mode dst_mode = GET_MODE (temp);
@@ -3362,9 +3365,9 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, unsigned *cost)
       if (!REG_P (dest))
 	return false;
 
-      if (!(REG_P (src)
-	   || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
-	       && subreg_lowpart_p (src))))
+      if (!((REG_P (src) || (CONST_INT_P (src)))
+	    || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
+	      && subreg_lowpart_p (src))))
 	return false;
 
       /* Destination must be appropriate for a conditional write.  */
@@ -3724,7 +3727,7 @@ check_cond_move_block (basic_block bb,
     {
       rtx set, dest, src;
 
-      if (!NONDEBUG_INSN_P (insn) || JUMP_P (insn))
+      if (!active_insn_p (insn))
 	continue;
       set = single_set (insn);
       if (!set)
@@ -3740,10 +3743,8 @@ check_cond_move_block (basic_block bb,
       if (!CONSTANT_P (src) && !register_operand (src, VOIDmode))
 	return FALSE;
 
-      if (side_effects_p (src) || side_effects_p (dest))
-	return FALSE;
-
-      if (may_trap_p (src) || may_trap_p (dest))
+      /* Check for side effects and trapping.  */
+      if (!noce_operand_ok (src) || !noce_operand_ok (dest))
 	return FALSE;
 
       /* Don't try to handle this if the source register was
-- 
2.17.0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 2/9] ifcvt: Use enum instead of transform_name string.
  2019-08-02 15:10 [PATCH 0/9] Improve icvt "convert multiple" Robin Dapp
  2019-08-02 15:10 ` [PATCH 7/9] ifcvt: Emit two cmov variants and choose the less expensive one Robin Dapp
  2019-08-02 15:10 ` [PATCH 1/9] ifcvt: Store the number of created cmovs Robin Dapp
@ 2019-08-02 15:10 ` Robin Dapp
  2019-08-06 21:03   ` Richard Sandiford
  2019-08-02 15:10 ` [PATCH 6/9] ifcvt: Extract cc comparison from jump Robin Dapp
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 25+ messages in thread
From: Robin Dapp @ 2019-08-02 15:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: james.greenhalgh, rdapp

This patch introduces an enum for ifcvt's various noce transformations.
As the transformation might be queried by the backend, I find it nicer
to allow checking for a proper type instead of a string comparison.

---
 gcc/ifcvt.c | 46 ++++++++++++++++++------------------
 gcc/ifcvt.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 88 insertions(+), 25 deletions(-)

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index b80dbc43d83..e95ff9ee9b0 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -1105,7 +1105,7 @@ noce_try_move (struct noce_if_info *if_info)
 	  emit_insn_before_setloc (seq, if_info->jump,
 				   INSN_LOCATION (if_info->insn_a));
 	}
-      if_info->transform_name = "noce_try_move";
+      if_info->transform = ifcvt_transform_noce_try_move;
       return TRUE;
     }
   return FALSE;
@@ -1139,7 +1139,7 @@ noce_try_ifelse_collapse (struct noce_if_info * if_info)
   emit_insn_before_setloc (seq, if_info->jump,
 			  INSN_LOCATION (if_info->insn_a));
 
-  if_info->transform_name = "noce_try_ifelse_collapse";
+  if_info->transform = ifcvt_transform_noce_try_ifelse_collapse;
   return TRUE;
 }
 
@@ -1186,7 +1186,7 @@ noce_try_store_flag (struct noce_if_info *if_info)
 
       emit_insn_before_setloc (seq, if_info->jump,
 			       INSN_LOCATION (if_info->insn_a));
-      if_info->transform_name = "noce_try_store_flag";
+      if_info->transform = ifcvt_transform_noce_try_store_flag;
       return TRUE;
     }
   else
@@ -1265,7 +1265,7 @@ noce_try_inverse_constants (struct noce_if_info *if_info)
 
       emit_insn_before_setloc (seq, if_info->jump,
 			       INSN_LOCATION (if_info->insn_a));
-      if_info->transform_name = "noce_try_inverse_constants";
+      if_info->transform = ifcvt_transform_noce_try_inverse_constants;
       return true;
     }
 
@@ -1485,7 +1485,7 @@ noce_try_store_flag_constants (struct noce_if_info *if_info)
 
       emit_insn_before_setloc (seq, if_info->jump,
 			       INSN_LOCATION (if_info->insn_a));
-      if_info->transform_name = "noce_try_store_flag_constants";
+      if_info->transform = ifcvt_transform_noce_try_store_flag_constants;
 
       return TRUE;
     }
@@ -1546,7 +1546,7 @@ noce_try_addcc (struct noce_if_info *if_info)
 
 	      emit_insn_before_setloc (seq, if_info->jump,
 				       INSN_LOCATION (if_info->insn_a));
-	      if_info->transform_name = "noce_try_addcc";
+	      if_info->transform = ifcvt_transform_noce_try_addcc;
 
 	      return TRUE;
 	    }
@@ -1588,7 +1588,7 @@ noce_try_addcc (struct noce_if_info *if_info)
 
 	      emit_insn_before_setloc (seq, if_info->jump,
 				       INSN_LOCATION (if_info->insn_a));
-	      if_info->transform_name = "noce_try_addcc";
+	      if_info->transform = ifcvt_transform_noce_try_addcc;
 	      return TRUE;
 	    }
 	  end_sequence ();
@@ -1639,7 +1639,7 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
 
 	  emit_insn_before_setloc (seq, if_info->jump,
 				   INSN_LOCATION (if_info->insn_a));
-	  if_info->transform_name = "noce_try_store_flag_mask";
+	  if_info->transform = ifcvt_transform_noce_try_store_flag_mask;
 
 	  return TRUE;
 	}
@@ -1791,7 +1791,7 @@ noce_try_cmove (struct noce_if_info *if_info)
 
 	  emit_insn_before_setloc (seq, if_info->jump,
 				   INSN_LOCATION (if_info->insn_a));
-	  if_info->transform_name = "noce_try_cmove";
+	  if_info->transform = ifcvt_transform_noce_try_cmove;
 
 	  return TRUE;
 	}
@@ -1844,7 +1844,7 @@ noce_try_cmove (struct noce_if_info *if_info)
 
 	      emit_insn_before_setloc (seq, if_info->jump,
 				   INSN_LOCATION (if_info->insn_a));
-	      if_info->transform_name = "noce_try_cmove";
+	      if_info->transform = ifcvt_transform_noce_try_cmove;
 	      return TRUE;
 	    }
 	  else
@@ -2286,7 +2286,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
 
   emit_insn_before_setloc (ifcvt_seq, if_info->jump,
 			   INSN_LOCATION (if_info->insn_a));
-  if_info->transform_name = "noce_try_cmove_arith";
+  if_info->transform = ifcvt_transform_noce_try_cmove_arith;
   return TRUE;
 
  end_seq_and_fail:
@@ -2544,7 +2544,7 @@ noce_try_minmax (struct noce_if_info *if_info)
   if_info->cond = cond;
   if_info->cond_earliest = earliest;
   if_info->rev_cond = NULL_RTX;
-  if_info->transform_name = "noce_try_minmax";
+  if_info->transform = ifcvt_transform_noce_try_minmax;
 
   return TRUE;
 }
@@ -2712,7 +2712,7 @@ noce_try_abs (struct noce_if_info *if_info)
   if_info->cond = cond;
   if_info->cond_earliest = earliest;
   if_info->rev_cond = NULL_RTX;
-  if_info->transform_name = "noce_try_abs";
+  if_info->transform = ifcvt_transform_noce_try_abs;
 
   return TRUE;
 }
@@ -2794,7 +2794,7 @@ noce_try_sign_mask (struct noce_if_info *if_info)
     return FALSE;
 
   emit_insn_before_setloc (seq, if_info->jump, INSN_LOCATION (if_info->insn_a));
-  if_info->transform_name = "noce_try_sign_mask";
+  if_info->transform = ifcvt_transform_noce_try_sign_mask;
 
   return TRUE;
 }
@@ -2904,7 +2904,7 @@ noce_try_bitop (struct noce_if_info *if_info)
       emit_insn_before_setloc (seq, if_info->jump,
 			       INSN_LOCATION (if_info->insn_a));
     }
-  if_info->transform_name = "noce_try_bitop";
+  if_info->transform = ifcvt_transform_noce_try_bitop;
   return TRUE;
 }
 
@@ -3244,16 +3244,17 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   for (int i = 0; i < count; i++)
     noce_emit_move_insn (targets[i], temporaries[i]);
 
+
   /* Actually emit the sequence if it isn't too expensive.  */
   rtx_insn *seq = get_insns ();
 
-  if_info->transform_name = "noce_convert_multiple_sets";
+  if_info->transform = ifcvt_transform_noce_convert_multiple_sets;
   if_info->created_cmovs = count;
 
   if (!targetm.noce_conversion_profitable_p (seq, if_info))
     {
       end_sequence ();
-      if_info->transform_name = "";
+      if_info->transform = ifcvt_transform_none;
       if_info->created_cmovs = 0;
       return FALSE;
     }
@@ -3400,15 +3401,16 @@ noce_process_if_block (struct noce_if_info *if_info)
     {
       if (noce_convert_multiple_sets (if_info))
 	{
-	  if (dump_file && if_info->transform_name)
+	  if (dump_file && if_info->transform)
 	    fprintf (dump_file, "if-conversion succeeded through %s\n",
-		     if_info->transform_name);
+		ifcvt_get_transform_name (if_info->transform));
 	  return TRUE;
 	}
     }
 
   bool speed_p = optimize_bb_for_speed_p (test_bb);
   unsigned int then_cost = 0, else_cost = 0;
+
   if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost,
 				    &if_info->then_simple))
     return false;
@@ -3619,9 +3621,9 @@ noce_process_if_block (struct noce_if_info *if_info)
   return FALSE;
 
  success:
-  if (dump_file && if_info->transform_name)
+  if (dump_file && if_info->transform)
     fprintf (dump_file, "if-conversion succeeded through %s\n",
-	     if_info->transform_name);
+	ifcvt_get_transform_name (if_info->transform));
 
   /* If we used a temporary, fix it up now.  */
   if (orig_x != x)
@@ -4065,7 +4067,7 @@ noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge,
      and jump_insns are always given a cost of 1 by seq_cost, so treat
      both instructions as having cost COSTS_N_INSNS (1).  */
   if_info.original_cost = COSTS_N_INSNS (2);
-  if_info.transform_name = "";
+  if_info.transform = ifcvt_transform_none;
   if_info.created_cmovs = 0;
 
   /* Do the real work.  */
diff --git a/gcc/ifcvt.h b/gcc/ifcvt.h
index 130559c0911..3776a25c3f6 100644
--- a/gcc/ifcvt.h
+++ b/gcc/ifcvt.h
@@ -40,6 +40,66 @@ struct ce_if_block
   int pass;				/* Pass number.  */
 };
 
+enum ifcvt_transform
+{
+  ifcvt_transform_none = 0,
+  ifcvt_transform_noce_try_move,
+  ifcvt_transform_noce_try_ifelse_collapse,
+  ifcvt_transform_noce_try_store_flag,
+  ifcvt_transform_noce_try_inverse_constants,
+  ifcvt_transform_noce_try_store_flag_constants,
+  ifcvt_transform_noce_try_addcc,
+  ifcvt_transform_noce_try_store_flag_mask,
+  ifcvt_transform_noce_try_cmove,
+  ifcvt_transform_noce_try_cmove_arith,
+  ifcvt_transform_noce_try_minmax,
+  ifcvt_transform_noce_try_abs,
+  ifcvt_transform_noce_try_sign_mask,
+  ifcvt_transform_noce_try_bitop,
+  ifcvt_transform_noce_convert_multiple_sets
+};
+
+inline const char *
+ifcvt_get_transform_name (enum ifcvt_transform transform)
+{
+  switch (transform)
+    {
+    case ifcvt_transform_none:
+      return "";
+    case ifcvt_transform_noce_try_move:
+      return "noce_try_move";
+    case ifcvt_transform_noce_try_ifelse_collapse:
+      return "noce_try_ifelse_collapse";
+    case ifcvt_transform_noce_try_store_flag:
+      return "noce_try_store_flag";
+    case ifcvt_transform_noce_try_inverse_constants:
+      return "noce_try_inverse_constants";
+    case ifcvt_transform_noce_try_store_flag_constants:
+      return "noce_try_store_flag_constants";
+    case ifcvt_transform_noce_try_addcc:
+      return "noce_try_addcc";
+    case ifcvt_transform_noce_try_store_flag_mask:
+      return "noce_try_store_flag_mask";
+    case ifcvt_transform_noce_try_cmove:
+      return "noce_try_cmove";
+    case ifcvt_transform_noce_try_cmove_arith:
+      return "noce_try_cmove_arith";
+    case ifcvt_transform_noce_try_minmax:
+      return "noce_try_minmax";
+    case ifcvt_transform_noce_try_abs:
+      return "noce_try_abs";
+    case ifcvt_transform_noce_try_sign_mask:
+      return "noce_try_sign_mask";
+    case ifcvt_transform_noce_try_bitop:
+      return "noce_try_bitop";
+    case ifcvt_transform_noce_convert_multiple_sets:
+      return "noce_convert_multiple_sets";
+    default:
+      return "unknown transform";
+    }
+}
+
+
 /* Used by noce_process_if_block to communicate with its subroutines.
 
    The subroutines know that A and B may be evaluated freely.  They
@@ -105,9 +165,10 @@ struct noce_if_info
      generate to replace this branch.  */
   unsigned int max_seq_cost;
 
-  /* The name of the noce transform that succeeded in if-converting
-     this structure.  Used for debugging.  */
-  const char *transform_name;
+  /* The noce transform that succeeded in if-converting this structure.
+     Used for letting the backend determine which transform succeeded
+     and for debugging output.  */
+  enum ifcvt_transform transform;
 
   /* The number of created conditional moves in case we convert multiple
      sets.  */
-- 
2.17.0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 0/9] Improve icvt "convert multiple"
@ 2019-08-02 15:10 Robin Dapp
  2019-08-02 15:10 ` [PATCH 7/9] ifcvt: Emit two cmov variants and choose the less expensive one Robin Dapp
                   ` (8 more replies)
  0 siblings, 9 replies; 25+ messages in thread
From: Robin Dapp @ 2019-08-02 15:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: james.greenhalgh, rdapp

Hi,

this patch series tries to address some problems of noce_convert_multiple_sets.
The main drawback is that it emits temporaries for each conversion which are
supposed to be eliminated in a later pass but are still included in cost
estimation.

In an attempt to alleviate this I have ifcvt store the number of transformed
moves (patches 1 and 2) to allow backends to simply consider this number when
implementing their own conversion_profitable hook.  This never went upstream
and I included it in this series even though it should not be strictly
necessary after the other changes.

Patch 3 limits the number of situations where temporaries are needed and patch
4 tries to estimate the original seq's costs which was only done after
noce_convert_multiple_sets (and no matter how well the converted seq's costs
were estimated, would always lose against the initial COSTS_N_INSNS(2)).

Patch 6 extracts a comparison vs the condition code.  At least the S/390 and
the i386 backend emit seqs like
  compare
  compare
  cmove
  compare
  cmove
because noce_emit_cmove gets passed a canonicalized comparison (not vs the cc)
causing the backend to create a cc comparison again.

Patch 7 duplicates the noce_emit_cmove mechanism, passing a canonicalized
comparison first, costing the sequence, then passing a cc comparison and
comparing costs of both variants.  The better one is then emitted.

Patch 8 and 9 fix some missing things in patch 7.  I split them for the sake of
clarity.

In total, the series still feels a bit clunky.  If somebody has a nice idea how
to simplify things, I'd gladly incorporate it.  I bootstrapped and regtested on
s390 and i386, ppc is still running.

There are also some changes needed in the S/390 backend that I'm going to send
separately at a later time.

Regards
 Robin

Robin Dapp (9):
  ifcvt: Store the number of created cmovs.
  ifcvt: Use enum instead of transform_name string.
  ifcvt: Only created temporaries as needed.
  ifcvt: Estimate original costs before convert_multiple.
  ifcvt: Allow constants operands in noce_convert_multiple_sets.
  ifcvt: Extract cc comparison from jump.
  ifcvt: Emit two cmov variants and choose the less expensive one.
  ifcvt: Handle swap-style idioms differently.
  ifcvt: Also pass reversed cc comparison.

 gcc/ifcvt.c  | 400 ++++++++++++++++++++++++++++++++++++++++++++-------
 gcc/ifcvt.h  |  71 ++++++++-
 gcc/optabs.c |   9 +-
 gcc/optabs.h |   2 +-
 4 files changed, 421 insertions(+), 61 deletions(-)

-- 
2.17.0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 8/9] ifcvt: Handle swap-style idioms differently.
  2019-08-02 15:10 [PATCH 0/9] Improve icvt "convert multiple" Robin Dapp
                   ` (3 preceding siblings ...)
  2019-08-02 15:10 ` [PATCH 6/9] ifcvt: Extract cc comparison from jump Robin Dapp
@ 2019-08-02 15:10 ` Robin Dapp
  2019-08-06 20:47   ` Richard Sandiford
  2019-08-02 15:10 ` [PATCH 9/9] ifcvt: Also pass reversed cc comparison Robin Dapp
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 25+ messages in thread
From: Robin Dapp @ 2019-08-02 15:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: james.greenhalgh, rdapp

A swap-style idiom like
 tmp = a
   a = b
   b = tmp
would be transformed like
 tmp_tmp = cond ? a : tmp
 tmp_a   = cond ? b : a
 tmp_b   = cond ? tmp_tmp : b
 [...]
including rewiring the first source operand to previous writes (e.g. tmp ->
tmp_tmp).

The code would recognize this, though, and change the last line to
 tmp_b   = cond ? a : b.

Without additional temporaries we can now emit the following sequence:
 tmp = a	     // (no condition here)
 a = cond ? b : a
 b = cond ? tmp : b
avoiding any rewiring which would break things now.

check_need_cmovs () finds swap-style idioms and marks the first of the
three instructions as not needing a cmove.
---
 gcc/ifcvt.c | 97 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 78 insertions(+), 19 deletions(-)

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 955f9541f60..09bf443656c 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -103,6 +103,7 @@ static rtx_insn *block_has_only_trap (basic_block);
 static void check_need_temps (basic_block bb,
                               hash_map<rtx, bool> *need_temp,
                               rtx cond);
+static void check_need_cmovs (basic_block bb, hash_map<rtx, bool> *need_cmov);
 
 \f
 /* Count the number of non-jump active insns in BB.  */
@@ -3207,6 +3208,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   int count = 0;
 
   hash_map<rtx, bool> need_temps;
+  hash_map<rtx, bool> need_no_cmovs;
 
   /* If we newly set a CC before a cmov, we might need a temporary
      even though the compare will be removed by a later pass.  Costing
@@ -3214,6 +3216,9 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 
   check_need_temps (then_bb, &need_temps, cond);
 
+  /* Identify swap-style idioms.  */
+  check_need_cmovs (then_bb, &need_no_cmovs);
+
   hash_map<rtx, bool> temps_created;
 
   FOR_BB_INSNS (then_bb, insn)
@@ -3229,10 +3234,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
       rtx new_val = SET_SRC (set);
       rtx old_val = target;
 
-      rtx dest = SET_DEST (set);
-
       rtx temp;
-      if (need_temps.get (dest))
+      if (need_temps.get (target))
        {
          temp = gen_reg_rtx (GET_MODE (target));
          temps_created.put (target, true);
@@ -3241,18 +3244,11 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
        temp = target;
 
       /* If we were supposed to read from an earlier write in this block,
-	 we've changed the register allocation.  Rewire the read.  While
-	 we are looking, also try to catch a swap idiom.  */
+	 we've changed the register allocation.  Rewire the read.   */
       for (int i = count - 1; i >= 0; --i)
 	if (reg_overlap_mentioned_p (new_val, targets[i]))
 	  {
-	    /* Catch a "swap" style idiom.  */
-	    if (find_reg_note (insn, REG_DEAD, new_val) != NULL_RTX)
-	      /* The write to targets[i] is only live until the read
-		 here.  As the condition codes match, we can propagate
-		 the set to here.  */
-	      new_val = SET_SRC (single_set (unmodified_insns[i]));
-	    else
+	    if (find_reg_note (insn, REG_DEAD, new_val) == NULL_RTX)
 	      new_val = temporaries[i];
 	    break;
 	  }
@@ -3324,8 +3320,11 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 
       {
 	start_sequence ();
-	temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
-	    x, y, new_val, old_val, NULL_RTX);
+	if (!need_no_cmovs.get (insn))
+	  temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
+	      x, y, new_val, old_val, NULL_RTX);
+	else
+	  noce_emit_move_insn (target, new_val);
 
 	/* If we failed to expand the conditional move, drop out and don't
 	   try to continue.  */
@@ -3346,13 +3345,16 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 	end_sequence ();
       }
 
-	/* Now try emitting one passing a non-canonicalized cc comparison.
-	   This allows the backend to emit a cmov directly without additional
+	/* Now try emitting a cmov passing a non-canonicalized cc comparison.
+	   This allows backends to emit a cmov directly without additional
 	   compare.  */
       {
 	start_sequence ();
-	temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
-	    x, y, new_val, old_val, cc_cmp);
+	if (!need_no_cmovs.get (insn))
+	  temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
+	      x, y, new_val, old_val, cc_cmp);
+	else
+	  noce_emit_move_insn (target, new_val);
 
 	/* If we failed to expand the conditional move, drop out and don't
 	   try to continue.  */
@@ -3931,6 +3933,7 @@ check_cond_move_block (basic_block bb,
 
 /* Check for which sets we need to emit temporaries to hold the destination of
    a conditional move.  */
+
 static void
 check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
 {
@@ -3938,7 +3941,7 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
 
   FOR_BB_INSNS (bb, insn)
     {
-      rtx set, dest;
+      rtx set, src, dest;
 
       if (!active_insn_p (insn))
 	continue;
@@ -3947,12 +3950,68 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
       if (set == NULL_RTX)
 	continue;
 
+      src = SET_SRC (set);
       dest = SET_DEST (set);
 
       /* noce_emit_cmove will emit the condition check every time it is called
          so we need a temp register if the destination is modified.  */
       if (reg_overlap_mentioned_p (dest, cond))
 	need_temp->put (dest, true);
+
+    }
+}
+
+/* Find local swap-style idioms in BB and mark the first insn (1)
+   that is only a temporary as not needing a conditional move as
+   it is going to be dead afterwards anyway.
+
+     (1) int tmp = a;
+	 a = b;
+	 b = tmp;
+
+
+        ifcvt
+	 -->
+
+	 load tmp,a
+	 cmov a,b
+	 cmov b,tmp */
+
+static void
+check_need_cmovs (basic_block bb, hash_map<rtx, bool> *need_cmov)
+{
+  rtx_insn *insn;
+  int count = 0;
+  auto_vec<rtx> insns;
+  auto_vec<rtx> dests;
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      rtx set, src, dest;
+
+      if (!active_insn_p (insn))
+	continue;
+
+      set = single_set (insn);
+      if (set == NULL_RTX)
+	continue;
+
+      src = SET_SRC (set);
+      dest = SET_DEST (set);
+
+      for (int i = count - 1; i >= 0; --i)
+	{
+	  if (reg_overlap_mentioned_p (src, dests[i])
+	      && find_reg_note (insn, REG_DEAD, src) != NULL_RTX)
+	    {
+	      need_cmov->put (insns[i], false);
+	    }
+	}
+
+      insns.safe_push (insn);
+      dests.safe_push (dest);
+
+      count++;
     }
 }
 
-- 
2.17.0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 3/9] ifcvt: Only created temporaries as needed.
  2019-08-02 15:10 [PATCH 0/9] Improve icvt "convert multiple" Robin Dapp
                   ` (5 preceding siblings ...)
  2019-08-02 15:10 ` [PATCH 9/9] ifcvt: Also pass reversed cc comparison Robin Dapp
@ 2019-08-02 15:10 ` Robin Dapp
  2019-08-06 20:14   ` Richard Sandiford
  2019-08-02 15:10 ` [PATCH 4/9] ifcvt: Estimate original costs before convert_multiple Robin Dapp
  2019-08-02 15:10 ` [PATCH 5/9] ifcvt: Allow constants operands in noce_convert_multiple_sets Robin Dapp
  8 siblings, 1 reply; 25+ messages in thread
From: Robin Dapp @ 2019-08-02 15:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: james.greenhalgh, rdapp

noce_convert_multiple_sets creates temporaries for the destination of
every emitted cmov and expects subsequent passes to get rid of them.  This
does not happen every time and even if the temporaries are removed, code
generation can be affected adversely.  In this patch, temporaries are
only created if the destination of a set is used in an emitted condition
check.

---
 gcc/ifcvt.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 3 deletions(-)

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index e95ff9ee9b0..253b8a96c1a 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -99,6 +99,10 @@ static int dead_or_predicable (basic_block, basic_block, basic_block,
 			       edge, int);
 static void noce_emit_move_insn (rtx, rtx);
 static rtx_insn *block_has_only_trap (basic_block);
+static void check_need_temps (basic_block bb,
+                              hash_map<rtx, bool> *need_temp,
+                              rtx cond);
+
 \f
 /* Count the number of non-jump active insns in BB.  */
 
@@ -3145,6 +3149,12 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
   auto_vec<rtx_insn *> unmodified_insns;
   int count = 0;
 
+  hash_map<rtx, bool> need_temps;
+
+  check_need_temps (then_bb, &need_temps, cond);
+
+  hash_map<rtx, bool> temps_created;
+
   FOR_BB_INSNS (then_bb, insn)
     {
       /* Skip over non-insns.  */
@@ -3155,10 +3165,20 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
       gcc_checking_assert (set);
 
       rtx target = SET_DEST (set);
-      rtx temp = gen_reg_rtx (GET_MODE (target));
       rtx new_val = SET_SRC (set);
       rtx old_val = target;
 
+      rtx dest = SET_DEST (set);
+
+      rtx temp;
+      if (need_temps.get (dest))
+       {
+         temp = gen_reg_rtx (GET_MODE (target));
+         temps_created.put (target, true);
+       }
+      else
+       temp = target;
+
       /* If we were supposed to read from an earlier write in this block,
 	 we've changed the register allocation.  Rewire the read.  While
 	 we are looking, also try to catch a swap idiom.  */
@@ -3242,8 +3262,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 
   /* Now fixup the assignments.  */
   for (int i = 0; i < count; i++)
-    noce_emit_move_insn (targets[i], temporaries[i]);
-
+    if (temps_created.get(targets[i]) && targets[i] != temporaries[i])
+      noce_emit_move_insn (targets[i], temporaries[i]);
 
   /* Actually emit the sequence if it isn't too expensive.  */
   rtx_insn *seq = get_insns ();
@@ -3749,6 +3769,34 @@ check_cond_move_block (basic_block bb,
   return TRUE;
 }
 
+/* Check for which sets we need to emit temporaries to hold the destination of
+   a conditional move.  */
+static void
+check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
+{
+  rtx_insn *insn;
+
+  FOR_BB_INSNS (bb, insn)
+    {
+      rtx set, dest;
+
+      if (!active_insn_p (insn))
+	continue;
+
+      set = single_set (insn);
+      if (set == NULL_RTX)
+	continue;
+
+      dest = SET_DEST (set);
+
+      /* noce_emit_cmove will emit the condition check every time it is called
+         so we need a temp register if the destination is modified.  */
+      if (reg_overlap_mentioned_p (dest, cond))
+	need_temp->put (dest, true);
+    }
+}
+
+
 /* Given a basic block BB suitable for conditional move conversion,
    a condition COND, and pointer maps THEN_VALS and ELSE_VALS containing
    the register values depending on COND, emit the insns in the block as
-- 
2.17.0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 7/9] ifcvt: Emit two cmov variants and choose the less expensive one.
  2019-08-02 15:10 [PATCH 0/9] Improve icvt "convert multiple" Robin Dapp
@ 2019-08-02 15:10 ` Robin Dapp
  2019-10-27 14:38   ` Richard Sandiford
  2019-08-02 15:10 ` [PATCH 1/9] ifcvt: Store the number of created cmovs Robin Dapp
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 25+ messages in thread
From: Robin Dapp @ 2019-08-02 15:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: james.greenhalgh, rdapp

This patch duplicates the previous noce_emit_cmove logic.  First it
passes the canonical comparison emits the sequence and costs it.
Then, a second, separate sequence is created by passing the cc compare
we extracted before.  The costs of both sequences are compared and the
cheaper one is emitted.
---
 gcc/ifcvt.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 97 insertions(+), 9 deletions(-)

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 3db707e1fd1..955f9541f60 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -3208,8 +3208,11 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 
   hash_map<rtx, bool> need_temps;
 
-  if (!cc_cmp)
-    check_need_temps (then_bb, &need_temps, cond);
+  /* If we newly set a CC before a cmov, we might need a temporary
+     even though the compare will be removed by a later pass.  Costing
+     of a sequence with these will be inaccurate.  */
+
+  check_need_temps (then_bb, &need_temps, cond);
 
   hash_map<rtx, bool> temps_created;
 
@@ -3298,18 +3301,103 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
 	  old_val = lowpart_subreg (dst_mode, old_val, src_mode);
 	}
 
-      /* Actually emit the conditional move.  */
-      rtx temp_dest = noce_emit_cmove (if_info, temp, cond_code,
-				       x, y, new_val, old_val, cc_cmp);
+      /* Try emitting a conditional move passing the backend the
+	 canonicalized comparison.  This can e.g. be used to recognize
+	 expressions like
+
+	   if (a < b)
+	   {
+	     ...
+	     b = a;
+	   }
+
+	 A backend can recognize this and emit a min/max insn.
+	 We will still emit a superfluous CC comparison before the
+	 min/max, though, which complicates costing.
+      */
+
+      rtx_insn *seq, *seq1 = 0, *seq2 = 0;
+      rtx temp_dest, temp_dest1, temp_dest2;
+      unsigned int cost1 = 0, cost2 = 0;
+      bool speed_p = if_info->speed_p;
+      push_topmost_sequence ();
+
+      {
+	start_sequence ();
+	temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
+	    x, y, new_val, old_val, NULL_RTX);
+
+	/* If we failed to expand the conditional move, drop out and don't
+	   try to continue.  */
+	if (temp_dest1 == NULL_RTX)
+	  {
+	    seq1 = NULL;
+	    cost1 = COSTS_N_INSNS (100);
+	  }
+	else
+	  {
+	    seq1 = get_insns ();
+	    cost1 = seq_cost (seq1, speed_p);
+	    rtx_insn *ii;
+	    for (ii = seq1; ii; ii = NEXT_INSN (ii))
+	      if (recog_memozied (ii) == -1)
+		cost1 += 10000;
+	  }
+	end_sequence ();
+      }
+
+	/* Now try emitting one passing a non-canonicalized cc comparison.
+	   This allows the backend to emit a cmov directly without additional
+	   compare.  */
+      {
+	start_sequence ();
+	temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
+	    x, y, new_val, old_val, cc_cmp);
+
+	/* If we failed to expand the conditional move, drop out and don't
+	   try to continue.  */
+	if (temp_dest2 == NULL_RTX)
+	  {
+	    seq2 = NULL;
+	    cost2 = COSTS_N_INSNS (100);
+	  }
+	else
+	  {
+	    seq2 = get_insns ();
+	    cost2 = seq_cost (seq2, speed_p);
+	    rtx_insn *ii;
+	    for (ii = seq2; ii; ii = NEXT_INSN (ii))
+	      if (recog_memozied (ii) == -1)
+		cost2 += 10000;
+	  }
+	end_sequence ();
+      }
 
-      /* If we failed to expand the conditional move, drop out and don't
-	 try to continue.  */
-      if (temp_dest == NULL_RTX)
+      /* Check which version is less expensive.  */
+      if (cost1 <= cost2 && seq1 != NULL_RTX)
+	{
+	  temp_dest = temp_dest1;
+	  seq = seq1;
+	}
+      else if (seq2 != NULL_RTX)
+	{
+	  /* We do not require a temp register, so remove it from the list.  */
+	  seq = seq2;
+	  temp_dest = temp_dest2;
+	  temps_created.remove (temp);
+	}
+      else
 	{
+	  /* Nothing worked, bail out.  */
+	  pop_topmost_sequence ();
 	  end_sequence ();
-	  return FALSE;
+	  return false;
 	}
 
+      /* End the sub sequence and emit to the main sequence.  */
+      pop_topmost_sequence ();
+      emit_insn (seq);
+
       /* Bookkeeping.  */
       count++;
       targets.safe_push (target);
-- 
2.17.0

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/9] ifcvt: Only created temporaries as needed.
  2019-08-02 15:10 ` [PATCH 3/9] ifcvt: Only created temporaries as needed Robin Dapp
@ 2019-08-06 20:14   ` Richard Sandiford
  2019-08-08  9:13     ` Robin Dapp
  0 siblings, 1 reply; 25+ messages in thread
From: Richard Sandiford @ 2019-08-06 20:14 UTC (permalink / raw)
  To: Robin Dapp; +Cc: gcc-patches, james.greenhalgh

Robin Dapp <rdapp@linux.ibm.com> writes:
> noce_convert_multiple_sets creates temporaries for the destination of
> every emitted cmov and expects subsequent passes to get rid of them.  This
> does not happen every time and even if the temporaries are removed, code
> generation can be affected adversely.  In this patch, temporaries are
> only created if the destination of a set is used in an emitted condition
> check.

Still digesting the series, but a couple of implementation questions:

>
> ---
>  gcc/ifcvt.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 51 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index e95ff9ee9b0..253b8a96c1a 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -99,6 +99,10 @@ static int dead_or_predicable (basic_block, basic_block, basic_block,
>  			       edge, int);
>  static void noce_emit_move_insn (rtx, rtx);
>  static rtx_insn *block_has_only_trap (basic_block);
> +static void check_need_temps (basic_block bb,
> +                              hash_map<rtx, bool> *need_temp,
> +                              rtx cond);
> +
>  \f
>  /* Count the number of non-jump active insns in BB.  */
>  
> @@ -3145,6 +3149,12 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>    auto_vec<rtx_insn *> unmodified_insns;
>    int count = 0;
>  
> +  hash_map<rtx, bool> need_temps;
> +
> +  check_need_temps (then_bb, &need_temps, cond);

Is the separate need_temps scan required for correctness?  It looked
like we could test:

      if (reg_overlap_mentioned_p (dest, cond))
	...

on-the-fly during the main noce_convert_multiple_sets loop.

> +
> +  hash_map<rtx, bool> temps_created;
> +
>    FOR_BB_INSNS (then_bb, insn)
>      {
>        /* Skip over non-insns.  */
> @@ -3155,10 +3165,20 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>        gcc_checking_assert (set);
>  
>        rtx target = SET_DEST (set);
> -      rtx temp = gen_reg_rtx (GET_MODE (target));
>        rtx new_val = SET_SRC (set);
>        rtx old_val = target;
>  
> +      rtx dest = SET_DEST (set);
> +
> +      rtx temp;
> +      if (need_temps.get (dest))
> +       {
> +         temp = gen_reg_rtx (GET_MODE (target));
> +         temps_created.put (target, true);
> +       }
> +      else
> +       temp = target;
> +
>        /* If we were supposed to read from an earlier write in this block,
>  	 we've changed the register allocation.  Rewire the read.  While
>  	 we are looking, also try to catch a swap idiom.  */
> @@ -3242,8 +3262,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  
>    /* Now fixup the assignments.  */
>    for (int i = 0; i < count; i++)
> -    noce_emit_move_insn (targets[i], temporaries[i]);
> -
> +    if (temps_created.get(targets[i]) && targets[i] != temporaries[i])
> +      noce_emit_move_insn (targets[i], temporaries[i]);

Would it work to set temporaries[i] to targets[i] whenever a temporary
isn't needed, and avoid temps_created?

Thanks,
Richard

>  
>    /* Actually emit the sequence if it isn't too expensive.  */
>    rtx_insn *seq = get_insns ();
> @@ -3749,6 +3769,34 @@ check_cond_move_block (basic_block bb,
>    return TRUE;
>  }
>  
> +/* Check for which sets we need to emit temporaries to hold the destination of
> +   a conditional move.  */
> +static void
> +check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
> +{
> +  rtx_insn *insn;
> +
> +  FOR_BB_INSNS (bb, insn)
> +    {
> +      rtx set, dest;
> +
> +      if (!active_insn_p (insn))
> +	continue;
> +
> +      set = single_set (insn);
> +      if (set == NULL_RTX)
> +	continue;
> +
> +      dest = SET_DEST (set);
> +
> +      /* noce_emit_cmove will emit the condition check every time it is called
> +         so we need a temp register if the destination is modified.  */
> +      if (reg_overlap_mentioned_p (dest, cond))
> +	need_temp->put (dest, true);
> +    }
> +}
> +
> +
>  /* Given a basic block BB suitable for conditional move conversion,
>     a condition COND, and pointer maps THEN_VALS and ELSE_VALS containing
>     the register values depending on COND, emit the insns in the block as

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/9] ifcvt: Estimate original costs before convert_multiple.
  2019-08-02 15:10 ` [PATCH 4/9] ifcvt: Estimate original costs before convert_multiple Robin Dapp
@ 2019-08-06 20:30   ` Richard Sandiford
  0 siblings, 0 replies; 25+ messages in thread
From: Richard Sandiford @ 2019-08-06 20:30 UTC (permalink / raw)
  To: Robin Dapp; +Cc: gcc-patches, james.greenhalgh

Robin Dapp <rdapp@linux.ibm.com> writes:
> This patch extends bb_ok_for_noce_convert_multiple_sets by a temporary
> cost estimation that can be used by noce_convert_multiple_sets.

I agree it looks like an omission that we didn't do this.  The patch
looks OK to me (maybe independently of the rest?) bar minor things:

> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index 253b8a96c1a..55205cac153 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -3333,11 +3333,13 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>     fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets.  */
>  
>  static bool
> -bb_ok_for_noce_convert_multiple_sets (basic_block test_bb)
> +bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, unsigned *cost)

The function comment should document COST.

>  {
>    rtx_insn *insn;
>    unsigned count = 0;
>    unsigned param = PARAM_VALUE (PARAM_MAX_RTL_IF_CONVERSION_INSNS);
> +  bool speed_p = optimize_bb_for_speed_p (test_bb);
> +  unsigned potential_cost = 0;
>  
>    FOR_BB_INSNS (test_bb, insn)
>      {
> @@ -3373,9 +3375,14 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb)
>        if (!can_conditionally_move_p (GET_MODE (dest)))
>  	return false;
>  
> +      rtx sset = single_set (insn);

This is already available as "set", unless I'm missing something.

> +      potential_cost += pattern_cost (sset, speed_p);
> +
>        count++;
>      }
>  
> +  *cost += potential_cost;
> +
>    /* If we would only put out one conditional move, the other strategies
>       this pass tries are better optimized and will be more appropriate.
>       Some targets want to strictly limit the number of conditional moves
> @@ -3414,11 +3421,15 @@ noce_process_if_block (struct noce_if_info *if_info)
>       ??? For future expansion, further expand the "multiple X" rules.  */
>  
>    /* First look for multiple SETS.  */
> +  unsigned int mcost = if_info->original_cost;
> +  unsigned tmp_cost = if_info->original_cost;

Very minor, but it'd be good to be consistent about the choice
between unsigned and unsigned int. Maybe "old_cost" would be a
better name than "tmp_cost".

>    if (!else_bb
>        && HAVE_conditional_move
>        && !HAVE_cc0
> -      && bb_ok_for_noce_convert_multiple_sets (then_bb))
> +      && bb_ok_for_noce_convert_multiple_sets (then_bb, &mcost))
>      {
> +      /* Temporarily set the original costs to what we estimated.  */
> +      if_info->original_cost = mcost;
>        if (noce_convert_multiple_sets (if_info))
>  	{
>  	  if (dump_file && if_info->transform)
> @@ -3427,6 +3438,8 @@ noce_process_if_block (struct noce_if_info *if_info)
>  	  return TRUE;
>  	}
>      }
> +  /* Restore the original costs.  */
> +  if_info->original_cost = tmp_cost;
>  
>    bool speed_p = optimize_bb_for_speed_p (test_bb);
>    unsigned int then_cost = 0, else_cost = 0;

I guess the save and restore only really need to be done inside the
outer "if".  Not that the performance difference is going to be
noticeable, but maybe it would be a bit clearer to read.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/9] ifcvt: Allow constants operands in noce_convert_multiple_sets.
  2019-08-02 15:10 ` [PATCH 5/9] ifcvt: Allow constants operands in noce_convert_multiple_sets Robin Dapp
@ 2019-08-06 20:37   ` Richard Sandiford
  2019-08-08 10:04     ` Robin Dapp
  2019-10-27 14:12   ` Richard Sandiford
  1 sibling, 1 reply; 25+ messages in thread
From: Richard Sandiford @ 2019-08-06 20:37 UTC (permalink / raw)
  To: Robin Dapp; +Cc: gcc-patches, james.greenhalgh

Robin Dapp <rdapp@linux.ibm.com> writes:
> This patch checks allows immediate then/else operands for cmovs.
> We rely on,emit_conditional_move returning NULL if something unsupported
> was generated.

It seems like this is making noce_convert_multiple_sets overlap
a lot with cond_move_process_if_block (although that uses CONSTANT_P
instead of CONST_INT_P).  How do they fit together after this patch,
i.e. which cases is each one meant to handle that the other doesn't?

Thanks,
Richard

> Also, minor refactoring is performed.
>
> --
>
> gcc/ChangeLog:
>
> 2018-11-14  Robin Dapp  <rdapp@linux.ibm.com>
>
> 	* ifcvt.c (have_const_cmov): New function.
> 	(noce_convert_multiple_sets): Allow constants if supported.
> 	(bb_ok_for_noce_convert_multiple_sets): Likewise.
> 	(check_cond_move_block): Refactor.
> ---
>  gcc/ifcvt.c | 21 +++++++++++----------
>  1 file changed, 11 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index 55205cac153..99716e5f63c 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -3214,7 +3214,9 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  	 we'll end up trying to emit r4:HI = cond ? (r1:SI) : (r3:HI).
>  	 Wrap the two cmove operands into subregs if appropriate to prevent
>  	 that.  */
> -      if (GET_MODE (new_val) != GET_MODE (temp))
> +
> +      if (!CONST_INT_P (new_val)
> +         && GET_MODE (new_val) != GET_MODE (temp))
>  	{
>  	  machine_mode src_mode = GET_MODE (new_val);
>  	  machine_mode dst_mode = GET_MODE (temp);
> @@ -3225,7 +3227,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  	    }
>  	  new_val = lowpart_subreg (dst_mode, new_val, src_mode);
>  	}
> -      if (GET_MODE (old_val) != GET_MODE (temp))
> +      if (!CONST_INT_P (old_val)
> +         && GET_MODE (old_val) != GET_MODE (temp))
>  	{
>  	  machine_mode src_mode = GET_MODE (old_val);
>  	  machine_mode dst_mode = GET_MODE (temp);
> @@ -3362,9 +3365,9 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, unsigned *cost)
>        if (!REG_P (dest))
>  	return false;
>  
> -      if (!(REG_P (src)
> -	   || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
> -	       && subreg_lowpart_p (src))))
> +      if (!((REG_P (src) || (CONST_INT_P (src)))
> +	    || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
> +	      && subreg_lowpart_p (src))))
>  	return false;
>  
>        /* Destination must be appropriate for a conditional write.  */
> @@ -3724,7 +3727,7 @@ check_cond_move_block (basic_block bb,
>      {
>        rtx set, dest, src;
>  
> -      if (!NONDEBUG_INSN_P (insn) || JUMP_P (insn))
> +      if (!active_insn_p (insn))
>  	continue;
>        set = single_set (insn);
>        if (!set)
> @@ -3740,10 +3743,8 @@ check_cond_move_block (basic_block bb,
>        if (!CONSTANT_P (src) && !register_operand (src, VOIDmode))
>  	return FALSE;
>  
> -      if (side_effects_p (src) || side_effects_p (dest))
> -	return FALSE;
> -
> -      if (may_trap_p (src) || may_trap_p (dest))
> +      /* Check for side effects and trapping.  */
> +      if (!noce_operand_ok (src) || !noce_operand_ok (dest))
>  	return FALSE;
>  
>        /* Don't try to handle this if the source register was

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 8/9] ifcvt: Handle swap-style idioms differently.
  2019-08-02 15:10 ` [PATCH 8/9] ifcvt: Handle swap-style idioms differently Robin Dapp
@ 2019-08-06 20:47   ` Richard Sandiford
  2019-08-16 13:22     ` Robin Dapp
  0 siblings, 1 reply; 25+ messages in thread
From: Richard Sandiford @ 2019-08-06 20:47 UTC (permalink / raw)
  To: Robin Dapp; +Cc: gcc-patches, james.greenhalgh

Robin Dapp <rdapp@linux.ibm.com> writes:
> A swap-style idiom like
>  tmp = a
>    a = b
>    b = tmp
> would be transformed like
>  tmp_tmp = cond ? a : tmp
>  tmp_a   = cond ? b : a
>  tmp_b   = cond ? tmp_tmp : b
>  [...]
> including rewiring the first source operand to previous writes (e.g. tmp ->
> tmp_tmp).
>
> The code would recognize this, though, and change the last line to
>  tmp_b   = cond ? a : b.
>
> Without additional temporaries we can now emit the following sequence:
>  tmp = a	     // (no condition here)
>  a = cond ? b : a
>  b = cond ? tmp : b
> avoiding any rewiring which would break things now.
>
> check_need_cmovs () finds swap-style idioms and marks the first of the
> three instructions as not needing a cmove.

Looks like a nice optimisation, but could we just test whether the
destination of a set isn't live on exit from the then block?  I think
we could do that on the fly during the main noce_convert_multiple_sets
loop.

Thanks,
Richard

> ---
>  gcc/ifcvt.c | 97 ++++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 78 insertions(+), 19 deletions(-)
>
> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index 955f9541f60..09bf443656c 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -103,6 +103,7 @@ static rtx_insn *block_has_only_trap (basic_block);
>  static void check_need_temps (basic_block bb,
>                                hash_map<rtx, bool> *need_temp,
>                                rtx cond);
> +static void check_need_cmovs (basic_block bb, hash_map<rtx, bool> *need_cmov);
>  
>  \f
>  /* Count the number of non-jump active insns in BB.  */
> @@ -3207,6 +3208,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>    int count = 0;
>  
>    hash_map<rtx, bool> need_temps;
> +  hash_map<rtx, bool> need_no_cmovs;
>  
>    /* If we newly set a CC before a cmov, we might need a temporary
>       even though the compare will be removed by a later pass.  Costing
> @@ -3214,6 +3216,9 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  
>    check_need_temps (then_bb, &need_temps, cond);
>  
> +  /* Identify swap-style idioms.  */
> +  check_need_cmovs (then_bb, &need_no_cmovs);
> +
>    hash_map<rtx, bool> temps_created;
>  
>    FOR_BB_INSNS (then_bb, insn)
> @@ -3229,10 +3234,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>        rtx new_val = SET_SRC (set);
>        rtx old_val = target;
>  
> -      rtx dest = SET_DEST (set);
> -
>        rtx temp;
> -      if (need_temps.get (dest))
> +      if (need_temps.get (target))
>         {
>           temp = gen_reg_rtx (GET_MODE (target));
>           temps_created.put (target, true);
> @@ -3241,18 +3244,11 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>         temp = target;
>  
>        /* If we were supposed to read from an earlier write in this block,
> -	 we've changed the register allocation.  Rewire the read.  While
> -	 we are looking, also try to catch a swap idiom.  */
> +	 we've changed the register allocation.  Rewire the read.   */
>        for (int i = count - 1; i >= 0; --i)
>  	if (reg_overlap_mentioned_p (new_val, targets[i]))
>  	  {
> -	    /* Catch a "swap" style idiom.  */
> -	    if (find_reg_note (insn, REG_DEAD, new_val) != NULL_RTX)
> -	      /* The write to targets[i] is only live until the read
> -		 here.  As the condition codes match, we can propagate
> -		 the set to here.  */
> -	      new_val = SET_SRC (single_set (unmodified_insns[i]));
> -	    else
> +	    if (find_reg_note (insn, REG_DEAD, new_val) == NULL_RTX)
>  	      new_val = temporaries[i];
>  	    break;
>  	  }
> @@ -3324,8 +3320,11 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  
>        {
>  	start_sequence ();
> -	temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
> -	    x, y, new_val, old_val, NULL_RTX);
> +	if (!need_no_cmovs.get (insn))
> +	  temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
> +	      x, y, new_val, old_val, NULL_RTX);
> +	else
> +	  noce_emit_move_insn (target, new_val);
>  
>  	/* If we failed to expand the conditional move, drop out and don't
>  	   try to continue.  */
> @@ -3346,13 +3345,16 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  	end_sequence ();
>        }
>  
> -	/* Now try emitting one passing a non-canonicalized cc comparison.
> -	   This allows the backend to emit a cmov directly without additional
> +	/* Now try emitting a cmov passing a non-canonicalized cc comparison.
> +	   This allows backends to emit a cmov directly without additional
>  	   compare.  */
>        {
>  	start_sequence ();
> -	temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
> -	    x, y, new_val, old_val, cc_cmp);
> +	if (!need_no_cmovs.get (insn))
> +	  temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
> +	      x, y, new_val, old_val, cc_cmp);
> +	else
> +	  noce_emit_move_insn (target, new_val);
>  
>  	/* If we failed to expand the conditional move, drop out and don't
>  	   try to continue.  */
> @@ -3931,6 +3933,7 @@ check_cond_move_block (basic_block bb,
>  
>  /* Check for which sets we need to emit temporaries to hold the destination of
>     a conditional move.  */
> +
>  static void
>  check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
>  {
> @@ -3938,7 +3941,7 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
>  
>    FOR_BB_INSNS (bb, insn)
>      {
> -      rtx set, dest;
> +      rtx set, src, dest;
>  
>        if (!active_insn_p (insn))
>  	continue;
> @@ -3947,12 +3950,68 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
>        if (set == NULL_RTX)
>  	continue;
>  
> +      src = SET_SRC (set);
>        dest = SET_DEST (set);
>  
>        /* noce_emit_cmove will emit the condition check every time it is called
>           so we need a temp register if the destination is modified.  */
>        if (reg_overlap_mentioned_p (dest, cond))
>  	need_temp->put (dest, true);
> +
> +    }
> +}
> +
> +/* Find local swap-style idioms in BB and mark the first insn (1)
> +   that is only a temporary as not needing a conditional move as
> +   it is going to be dead afterwards anyway.
> +
> +     (1) int tmp = a;
> +	 a = b;
> +	 b = tmp;
> +
> +
> +        ifcvt
> +	 -->
> +
> +	 load tmp,a
> +	 cmov a,b
> +	 cmov b,tmp */
> +
> +static void
> +check_need_cmovs (basic_block bb, hash_map<rtx, bool> *need_cmov)
> +{
> +  rtx_insn *insn;
> +  int count = 0;
> +  auto_vec<rtx> insns;
> +  auto_vec<rtx> dests;
> +
> +  FOR_BB_INSNS (bb, insn)
> +    {
> +      rtx set, src, dest;
> +
> +      if (!active_insn_p (insn))
> +	continue;
> +
> +      set = single_set (insn);
> +      if (set == NULL_RTX)
> +	continue;
> +
> +      src = SET_SRC (set);
> +      dest = SET_DEST (set);
> +
> +      for (int i = count - 1; i >= 0; --i)
> +	{
> +	  if (reg_overlap_mentioned_p (src, dests[i])
> +	      && find_reg_note (insn, REG_DEAD, src) != NULL_RTX)
> +	    {
> +	      need_cmov->put (insns[i], false);
> +	    }
> +	}
> +
> +      insns.safe_push (insn);
> +      dests.safe_push (dest);
> +
> +      count++;
>      }
>  }

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/9] ifcvt: Use enum instead of transform_name string.
  2019-08-02 15:10 ` [PATCH 2/9] ifcvt: Use enum instead of transform_name string Robin Dapp
@ 2019-08-06 21:03   ` Richard Sandiford
  2019-08-08 22:18     ` Jeff Law
  0 siblings, 1 reply; 25+ messages in thread
From: Richard Sandiford @ 2019-08-06 21:03 UTC (permalink / raw)
  To: Robin Dapp; +Cc: gcc-patches, james.greenhalgh

Robin Dapp <rdapp@linux.ibm.com> writes:
> This patch introduces an enum for ifcvt's various noce transformations.
> As the transformation might be queried by the backend, I find it nicer
> to allow checking for a proper type instead of a string comparison.

TBH I think the names of internal ifcvt routines are too level to expose as
an "official" part of the interface.  (OK, like you say, the information's
already there and rogue backends could already check it if they wanted.)

If we were going to have an enum, I think it should be a higher-level
description of what the converted code is going to do, rather than the
name of the routine that's going to do it.

It sounded from the covering note that you might not need this
any more though.

Thanks,
Richard

>
> ---
>  gcc/ifcvt.c | 46 ++++++++++++++++++------------------
>  gcc/ifcvt.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 88 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index b80dbc43d83..e95ff9ee9b0 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -1105,7 +1105,7 @@ noce_try_move (struct noce_if_info *if_info)
>  	  emit_insn_before_setloc (seq, if_info->jump,
>  				   INSN_LOCATION (if_info->insn_a));
>  	}
> -      if_info->transform_name = "noce_try_move";
> +      if_info->transform = ifcvt_transform_noce_try_move;
>        return TRUE;
>      }
>    return FALSE;
> @@ -1139,7 +1139,7 @@ noce_try_ifelse_collapse (struct noce_if_info * if_info)
>    emit_insn_before_setloc (seq, if_info->jump,
>  			  INSN_LOCATION (if_info->insn_a));
>  
> -  if_info->transform_name = "noce_try_ifelse_collapse";
> +  if_info->transform = ifcvt_transform_noce_try_ifelse_collapse;
>    return TRUE;
>  }
>  
> @@ -1186,7 +1186,7 @@ noce_try_store_flag (struct noce_if_info *if_info)
>  
>        emit_insn_before_setloc (seq, if_info->jump,
>  			       INSN_LOCATION (if_info->insn_a));
> -      if_info->transform_name = "noce_try_store_flag";
> +      if_info->transform = ifcvt_transform_noce_try_store_flag;
>        return TRUE;
>      }
>    else
> @@ -1265,7 +1265,7 @@ noce_try_inverse_constants (struct noce_if_info *if_info)
>  
>        emit_insn_before_setloc (seq, if_info->jump,
>  			       INSN_LOCATION (if_info->insn_a));
> -      if_info->transform_name = "noce_try_inverse_constants";
> +      if_info->transform = ifcvt_transform_noce_try_inverse_constants;
>        return true;
>      }
>  
> @@ -1485,7 +1485,7 @@ noce_try_store_flag_constants (struct noce_if_info *if_info)
>  
>        emit_insn_before_setloc (seq, if_info->jump,
>  			       INSN_LOCATION (if_info->insn_a));
> -      if_info->transform_name = "noce_try_store_flag_constants";
> +      if_info->transform = ifcvt_transform_noce_try_store_flag_constants;
>  
>        return TRUE;
>      }
> @@ -1546,7 +1546,7 @@ noce_try_addcc (struct noce_if_info *if_info)
>  
>  	      emit_insn_before_setloc (seq, if_info->jump,
>  				       INSN_LOCATION (if_info->insn_a));
> -	      if_info->transform_name = "noce_try_addcc";
> +	      if_info->transform = ifcvt_transform_noce_try_addcc;
>  
>  	      return TRUE;
>  	    }
> @@ -1588,7 +1588,7 @@ noce_try_addcc (struct noce_if_info *if_info)
>  
>  	      emit_insn_before_setloc (seq, if_info->jump,
>  				       INSN_LOCATION (if_info->insn_a));
> -	      if_info->transform_name = "noce_try_addcc";
> +	      if_info->transform = ifcvt_transform_noce_try_addcc;
>  	      return TRUE;
>  	    }
>  	  end_sequence ();
> @@ -1639,7 +1639,7 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
>  
>  	  emit_insn_before_setloc (seq, if_info->jump,
>  				   INSN_LOCATION (if_info->insn_a));
> -	  if_info->transform_name = "noce_try_store_flag_mask";
> +	  if_info->transform = ifcvt_transform_noce_try_store_flag_mask;
>  
>  	  return TRUE;
>  	}
> @@ -1791,7 +1791,7 @@ noce_try_cmove (struct noce_if_info *if_info)
>  
>  	  emit_insn_before_setloc (seq, if_info->jump,
>  				   INSN_LOCATION (if_info->insn_a));
> -	  if_info->transform_name = "noce_try_cmove";
> +	  if_info->transform = ifcvt_transform_noce_try_cmove;
>  
>  	  return TRUE;
>  	}
> @@ -1844,7 +1844,7 @@ noce_try_cmove (struct noce_if_info *if_info)
>  
>  	      emit_insn_before_setloc (seq, if_info->jump,
>  				   INSN_LOCATION (if_info->insn_a));
> -	      if_info->transform_name = "noce_try_cmove";
> +	      if_info->transform = ifcvt_transform_noce_try_cmove;
>  	      return TRUE;
>  	    }
>  	  else
> @@ -2286,7 +2286,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
>  
>    emit_insn_before_setloc (ifcvt_seq, if_info->jump,
>  			   INSN_LOCATION (if_info->insn_a));
> -  if_info->transform_name = "noce_try_cmove_arith";
> +  if_info->transform = ifcvt_transform_noce_try_cmove_arith;
>    return TRUE;
>  
>   end_seq_and_fail:
> @@ -2544,7 +2544,7 @@ noce_try_minmax (struct noce_if_info *if_info)
>    if_info->cond = cond;
>    if_info->cond_earliest = earliest;
>    if_info->rev_cond = NULL_RTX;
> -  if_info->transform_name = "noce_try_minmax";
> +  if_info->transform = ifcvt_transform_noce_try_minmax;
>  
>    return TRUE;
>  }
> @@ -2712,7 +2712,7 @@ noce_try_abs (struct noce_if_info *if_info)
>    if_info->cond = cond;
>    if_info->cond_earliest = earliest;
>    if_info->rev_cond = NULL_RTX;
> -  if_info->transform_name = "noce_try_abs";
> +  if_info->transform = ifcvt_transform_noce_try_abs;
>  
>    return TRUE;
>  }
> @@ -2794,7 +2794,7 @@ noce_try_sign_mask (struct noce_if_info *if_info)
>      return FALSE;
>  
>    emit_insn_before_setloc (seq, if_info->jump, INSN_LOCATION (if_info->insn_a));
> -  if_info->transform_name = "noce_try_sign_mask";
> +  if_info->transform = ifcvt_transform_noce_try_sign_mask;
>  
>    return TRUE;
>  }
> @@ -2904,7 +2904,7 @@ noce_try_bitop (struct noce_if_info *if_info)
>        emit_insn_before_setloc (seq, if_info->jump,
>  			       INSN_LOCATION (if_info->insn_a));
>      }
> -  if_info->transform_name = "noce_try_bitop";
> +  if_info->transform = ifcvt_transform_noce_try_bitop;
>    return TRUE;
>  }
>  
> @@ -3244,16 +3244,17 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>    for (int i = 0; i < count; i++)
>      noce_emit_move_insn (targets[i], temporaries[i]);
>  
> +
>    /* Actually emit the sequence if it isn't too expensive.  */
>    rtx_insn *seq = get_insns ();
>  
> -  if_info->transform_name = "noce_convert_multiple_sets";
> +  if_info->transform = ifcvt_transform_noce_convert_multiple_sets;
>    if_info->created_cmovs = count;
>  
>    if (!targetm.noce_conversion_profitable_p (seq, if_info))
>      {
>        end_sequence ();
> -      if_info->transform_name = "";
> +      if_info->transform = ifcvt_transform_none;
>        if_info->created_cmovs = 0;
>        return FALSE;
>      }
> @@ -3400,15 +3401,16 @@ noce_process_if_block (struct noce_if_info *if_info)
>      {
>        if (noce_convert_multiple_sets (if_info))
>  	{
> -	  if (dump_file && if_info->transform_name)
> +	  if (dump_file && if_info->transform)
>  	    fprintf (dump_file, "if-conversion succeeded through %s\n",
> -		     if_info->transform_name);
> +		ifcvt_get_transform_name (if_info->transform));
>  	  return TRUE;
>  	}
>      }
>  
>    bool speed_p = optimize_bb_for_speed_p (test_bb);
>    unsigned int then_cost = 0, else_cost = 0;
> +
>    if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost,
>  				    &if_info->then_simple))
>      return false;
> @@ -3619,9 +3621,9 @@ noce_process_if_block (struct noce_if_info *if_info)
>    return FALSE;
>  
>   success:
> -  if (dump_file && if_info->transform_name)
> +  if (dump_file && if_info->transform)
>      fprintf (dump_file, "if-conversion succeeded through %s\n",
> -	     if_info->transform_name);
> +	ifcvt_get_transform_name (if_info->transform));
>  
>    /* If we used a temporary, fix it up now.  */
>    if (orig_x != x)
> @@ -4065,7 +4067,7 @@ noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge,
>       and jump_insns are always given a cost of 1 by seq_cost, so treat
>       both instructions as having cost COSTS_N_INSNS (1).  */
>    if_info.original_cost = COSTS_N_INSNS (2);
> -  if_info.transform_name = "";
> +  if_info.transform = ifcvt_transform_none;
>    if_info.created_cmovs = 0;
>  
>    /* Do the real work.  */
> diff --git a/gcc/ifcvt.h b/gcc/ifcvt.h
> index 130559c0911..3776a25c3f6 100644
> --- a/gcc/ifcvt.h
> +++ b/gcc/ifcvt.h
> @@ -40,6 +40,66 @@ struct ce_if_block
>    int pass;				/* Pass number.  */
>  };
>  
> +enum ifcvt_transform
> +{
> +  ifcvt_transform_none = 0,
> +  ifcvt_transform_noce_try_move,
> +  ifcvt_transform_noce_try_ifelse_collapse,
> +  ifcvt_transform_noce_try_store_flag,
> +  ifcvt_transform_noce_try_inverse_constants,
> +  ifcvt_transform_noce_try_store_flag_constants,
> +  ifcvt_transform_noce_try_addcc,
> +  ifcvt_transform_noce_try_store_flag_mask,
> +  ifcvt_transform_noce_try_cmove,
> +  ifcvt_transform_noce_try_cmove_arith,
> +  ifcvt_transform_noce_try_minmax,
> +  ifcvt_transform_noce_try_abs,
> +  ifcvt_transform_noce_try_sign_mask,
> +  ifcvt_transform_noce_try_bitop,
> +  ifcvt_transform_noce_convert_multiple_sets
> +};
> +
> +inline const char *
> +ifcvt_get_transform_name (enum ifcvt_transform transform)
> +{
> +  switch (transform)
> +    {
> +    case ifcvt_transform_none:
> +      return "";
> +    case ifcvt_transform_noce_try_move:
> +      return "noce_try_move";
> +    case ifcvt_transform_noce_try_ifelse_collapse:
> +      return "noce_try_ifelse_collapse";
> +    case ifcvt_transform_noce_try_store_flag:
> +      return "noce_try_store_flag";
> +    case ifcvt_transform_noce_try_inverse_constants:
> +      return "noce_try_inverse_constants";
> +    case ifcvt_transform_noce_try_store_flag_constants:
> +      return "noce_try_store_flag_constants";
> +    case ifcvt_transform_noce_try_addcc:
> +      return "noce_try_addcc";
> +    case ifcvt_transform_noce_try_store_flag_mask:
> +      return "noce_try_store_flag_mask";
> +    case ifcvt_transform_noce_try_cmove:
> +      return "noce_try_cmove";
> +    case ifcvt_transform_noce_try_cmove_arith:
> +      return "noce_try_cmove_arith";
> +    case ifcvt_transform_noce_try_minmax:
> +      return "noce_try_minmax";
> +    case ifcvt_transform_noce_try_abs:
> +      return "noce_try_abs";
> +    case ifcvt_transform_noce_try_sign_mask:
> +      return "noce_try_sign_mask";
> +    case ifcvt_transform_noce_try_bitop:
> +      return "noce_try_bitop";
> +    case ifcvt_transform_noce_convert_multiple_sets:
> +      return "noce_convert_multiple_sets";
> +    default:
> +      return "unknown transform";
> +    }
> +}
> +
> +
>  /* Used by noce_process_if_block to communicate with its subroutines.
>  
>     The subroutines know that A and B may be evaluated freely.  They
> @@ -105,9 +165,10 @@ struct noce_if_info
>       generate to replace this branch.  */
>    unsigned int max_seq_cost;
>  
> -  /* The name of the noce transform that succeeded in if-converting
> -     this structure.  Used for debugging.  */
> -  const char *transform_name;
> +  /* The noce transform that succeeded in if-converting this structure.
> +     Used for letting the backend determine which transform succeeded
> +     and for debugging output.  */
> +  enum ifcvt_transform transform;
>  
>    /* The number of created conditional moves in case we convert multiple
>       sets.  */

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/9] ifcvt: Only created temporaries as needed.
  2019-08-06 20:14   ` Richard Sandiford
@ 2019-08-08  9:13     ` Robin Dapp
  0 siblings, 0 replies; 25+ messages in thread
From: Robin Dapp @ 2019-08-08  9:13 UTC (permalink / raw)
  To: gcc-patches, james.greenhalgh, richard.sandiford

Hi Richard,

> Is the separate need_temps scan required for correctness?  It looked
> like we could test:
> 
>       if (reg_overlap_mentioned_p (dest, cond))
> 	...
> 
> on-the-fly during the main noce_convert_multiple_sets loop.

right, I didn't re-check it but after changes during interal patch
rounds the check is now degenerated enought to allow this. Going to
refactor that.

> Would it work to set temporaries[i] to targets[i] whenever a temporary
> isn't needed, and avoid temps_created?

Yes, that should work. Already made that change locally.

Regards
 Robin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/9] ifcvt: Allow constants operands in noce_convert_multiple_sets.
  2019-08-06 20:37   ` Richard Sandiford
@ 2019-08-08 10:04     ` Robin Dapp
  0 siblings, 0 replies; 25+ messages in thread
From: Robin Dapp @ 2019-08-08 10:04 UTC (permalink / raw)
  To: gcc-patches, james.greenhalgh, richard.sandiford

> It seems like this is making noce_convert_multiple_sets overlap
> a lot with cond_move_process_if_block (although that uses CONSTANT_P
> instead of CONST_INT_P).  How do they fit together after this patch,
> i.e. which cases is each one meant to handle that the other doesn't?

IMHO all of icvt is currently one unholy overlap with itself and the
backend where at least I could not decipher within reasonable time which
part is supposed to cover what :) Ok, it might not be that bad...

Currently convert_multiple only handles situations without an else_bb
but that seems a rather artificial limit and I see no reason why it
could not be unified with cond_move_process_if_block at a later time.

Regards
 Robin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/9] ifcvt: Use enum instead of transform_name string.
  2019-08-06 21:03   ` Richard Sandiford
@ 2019-08-08 22:18     ` Jeff Law
  0 siblings, 0 replies; 25+ messages in thread
From: Jeff Law @ 2019-08-08 22:18 UTC (permalink / raw)
  To: Robin Dapp, gcc-patches, james.greenhalgh, richard.sandiford

On 8/6/19 2:42 PM, Richard Sandiford wrote:
> Robin Dapp <rdapp@linux.ibm.com> writes:
>> This patch introduces an enum for ifcvt's various noce transformations.
>> As the transformation might be queried by the backend, I find it nicer
>> to allow checking for a proper type instead of a string comparison.
> 
> TBH I think the names of internal ifcvt routines are too level to expose as
> an "official" part of the interface.  (OK, like you say, the information's
> already there and rogue backends could already check it if they wanted.)
> 
> If we were going to have an enum, I think it should be a higher-level
> description of what the converted code is going to do, rather than the
> name of the routine that's going to do it.
> 
> It sounded from the covering note that you might not need this
> any more though.
You're probably right on the naming.   I think the main question with
patch #1 and #2 is whether or not targets would likely use that information.

My sense is that arm & aarch64 want to be doing if-conversion very
aggressively and likely wouldn't care about them.  On x86 if-conversion
costing seems more complex -- in fact, I don't think we really have a
clear picture of what the costing factors really are for x86.  It sounds
like at least for the issues Robin is addressing that s390 won't care.

So my inclination would be to defer #1 and #2 until a particular target
indicates that it needs this information.  If that happens, we dust off
patches #1 and #2.

Jeff

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 8/9] ifcvt: Handle swap-style idioms differently.
  2019-08-06 20:47   ` Richard Sandiford
@ 2019-08-16 13:22     ` Robin Dapp
  2019-08-16 15:38       ` Richard Sandiford
  0 siblings, 1 reply; 25+ messages in thread
From: Robin Dapp @ 2019-08-16 13:22 UTC (permalink / raw)
  To: gcc-patches, james.greenhalgh, richard.sandiford

> Looks like a nice optimisation, but could we just test whether the
> destination of a set isn't live on exit from the then block?  I think
> we could do that on the fly during the main noce_convert_multiple_sets
> loop.

I included this locally along with the rest of the remarks. Any comments
on the rest/bulk of the patch (the part trying to compare costs of both
cmovs "types")?

Regards
 Robin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 8/9] ifcvt: Handle swap-style idioms differently.
  2019-08-16 13:22     ` Robin Dapp
@ 2019-08-16 15:38       ` Richard Sandiford
  2019-08-17 23:48         ` Robin Dapp
  0 siblings, 1 reply; 25+ messages in thread
From: Richard Sandiford @ 2019-08-16 15:38 UTC (permalink / raw)
  To: Robin Dapp; +Cc: gcc-patches, james.greenhalgh

Robin Dapp <rdapp@linux.ibm.com> writes:
>> Looks like a nice optimisation, but could we just test whether the
>> destination of a set isn't live on exit from the then block?  I think
>> we could do that on the fly during the main noce_convert_multiple_sets
>> loop.
>
> I included this locally along with the rest of the remarks. Any comments
> on the rest/bulk of the patch (the part trying to compare costs of both
> cmovs "types")?

I'm still a bit worried about the overlap between the expanded
noce_convert_multiple_sets and cond_move_process_if_block (5/9).
It seems like we're making noce_convert_multiple_set handle most of
the conditional move cases that cond_move_process_if_block can handle.
But like you say, noce_convert_multiple_set is still restricted to
half-diamonds, whereas cond_move_process_if_block can handle full
diamonds.

3/9, 4/9 and 8/9 seems like good independent improvements.
But for 5/9, 6/9 and 7/9, it looks to me like it would be better
to improve cond_move_process_if_block so that it handles all the
conditional move cases you're targetting.

Definitely welcome other opinions though. :-)

Thanks,
Richard

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 8/9] ifcvt: Handle swap-style idioms differently.
  2019-08-16 15:38       ` Richard Sandiford
@ 2019-08-17 23:48         ` Robin Dapp
  0 siblings, 0 replies; 25+ messages in thread
From: Robin Dapp @ 2019-08-17 23:48 UTC (permalink / raw)
  To: gcc-patches, james.greenhalgh, richard.sandiford

> I'm still a bit worried about the overlap between the expanded
> noce_convert_multiple_sets and cond_move_process_if_block (5/9).
> It seems like we're making noce_convert_multiple_set handle most of
> the conditional move cases that cond_move_process_if_block can handle.
> But like you say, noce_convert_multiple_set is still restricted to
> half-diamonds, whereas cond_move_process_if_block can handle full
> diamonds.

I see your concern and would also agree on that there should be a
clearer demarcation of which method can handle what.  I'm not really
sure I fully understand the distinction between "noce" and "cond_move"
anyway, when we emit conditional moves in the "noce" parts.  As said
before, in my opinion, both noce_convert_multiple and cond_move_process
should be unified rather sooner than later.

Yet, before and after my suggested patch, noce_convert_multiple_set can
handle blocks that cond_move_process cannot, e.g. cmovs that write to
one of the registers the condition checked (like min/max idioms) or even
cmovs that touch registers that are modified earlier in the block (like
in swap idioms).  As far as I understand, this is why the additional
temporaries have been introduced in the first place - my patch now tries
to limit these temporaries to situations where we really need them.

I would argue that there is still sufficient difference between them,
even though both can now handle constants.

Regards
 Robin

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/9] ifcvt: Allow constants operands in noce_convert_multiple_sets.
  2019-08-02 15:10 ` [PATCH 5/9] ifcvt: Allow constants operands in noce_convert_multiple_sets Robin Dapp
  2019-08-06 20:37   ` Richard Sandiford
@ 2019-10-27 14:12   ` Richard Sandiford
  1 sibling, 0 replies; 25+ messages in thread
From: Richard Sandiford @ 2019-10-27 14:12 UTC (permalink / raw)
  To: Robin Dapp; +Cc: gcc-patches, james.greenhalgh

Coming back to this just in time for it not to be three months later,
sorry...

I still think it would be better to consolidate ifcvt a bit more,
rather than effectively duplicate bits of cond_move_process_if_block
in noce_convert_multiple_sets.  But perhaps it was a historical
mistake to have two separate routines in the first place, and I guess
making noce_convert_multiple_sets handle more cases might allow us
to get rid of cond_move_process_if_block at some point.  So let's
go ahead with this anyway.

Robin Dapp <rdapp@linux.ibm.com> writes:
> This patch checks allows immediate then/else operands for cmovs.
> We rely on,emit_conditional_move returning NULL if something unsupported
> was generated.
>
> Also, minor refactoring is performed.
>
> --
>
> gcc/ChangeLog:
>
> 2018-11-14  Robin Dapp  <rdapp@linux.ibm.com>
>
> 	* ifcvt.c (have_const_cmov): New function.
> 	(noce_convert_multiple_sets): Allow constants if supported.
> 	(bb_ok_for_noce_convert_multiple_sets): Likewise.
> 	(check_cond_move_block): Refactor.
> ---
>  gcc/ifcvt.c | 21 +++++++++++----------
>  1 file changed, 11 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index 55205cac153..99716e5f63c 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -3214,7 +3214,9 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  	 we'll end up trying to emit r4:HI = cond ? (r1:SI) : (r3:HI).
>  	 Wrap the two cmove operands into subregs if appropriate to prevent
>  	 that.  */
> -      if (GET_MODE (new_val) != GET_MODE (temp))
> +
> +      if (!CONST_INT_P (new_val)
> +         && GET_MODE (new_val) != GET_MODE (temp))
>  	{
>  	  machine_mode src_mode = GET_MODE (new_val);
>  	  machine_mode dst_mode = GET_MODE (temp);
> @@ -3225,7 +3227,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  	    }
>  	  new_val = lowpart_subreg (dst_mode, new_val, src_mode);
>  	}
> -      if (GET_MODE (old_val) != GET_MODE (temp))
> +      if (!CONST_INT_P (old_val)
> +         && GET_MODE (old_val) != GET_MODE (temp))
>  	{
>  	  machine_mode src_mode = GET_MODE (old_val);
>  	  machine_mode dst_mode = GET_MODE (temp);
> @@ -3362,9 +3365,9 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, unsigned *cost)
>        if (!REG_P (dest))
>  	return false;
>  
> -      if (!(REG_P (src)
> -	   || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
> -	       && subreg_lowpart_p (src))))
> +      if (!((REG_P (src) || (CONST_INT_P (src)))
> +	    || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
> +	      && subreg_lowpart_p (src))))
>  	return false;
>  
>        /* Destination must be appropriate for a conditional write.  */

CONSTANT_P (as for check_cond_move_block) would cover more cases than
CONST_INT_P.  No need for brackets around CONST_INT_P (...).

I was tempted to say we should use register_operand too, but I guess
that allows unwanted (subreg (mem...))s (as if there's any other kind).

> @@ -3724,7 +3727,7 @@ check_cond_move_block (basic_block bb,
>      {
>        rtx set, dest, src;
>  
> -      if (!NONDEBUG_INSN_P (insn) || JUMP_P (insn))
> +      if (!active_insn_p (insn))
>  	continue;
>        set = single_set (insn);
>        if (!set)

I assume this is to skip notes, but shouldn't this still include JUMP_P?

It also looks like it should be an independent patch.

> @@ -3740,10 +3743,8 @@ check_cond_move_block (basic_block bb,
>        if (!CONSTANT_P (src) && !register_operand (src, VOIDmode))
>  	return FALSE;
>  
> -      if (side_effects_p (src) || side_effects_p (dest))
> -	return FALSE;
> -
> -      if (may_trap_p (src) || may_trap_p (dest))
> +      /* Check for side effects and trapping.  */
> +      if (!noce_operand_ok (src) || !noce_operand_ok (dest))
>  	return FALSE;
>  
>        /* Don't try to handle this if the source register was

Seems wrong to use something called noce_operand_ok in a routine
that is explicitly handling cases in which conditional execution
is possible.

More importantly, I can't see anywhere that handles the MEM_P case
specially for cond_move_*, so are you sure this is really safe?
Might be better to leave it as-is if this part is just a clean-up.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 6/9] ifcvt: Extract cc comparison from jump.
  2019-08-02 15:10 ` [PATCH 6/9] ifcvt: Extract cc comparison from jump Robin Dapp
@ 2019-10-27 14:31   ` Richard Sandiford
  0 siblings, 0 replies; 25+ messages in thread
From: Richard Sandiford @ 2019-10-27 14:31 UTC (permalink / raw)
  To: Robin Dapp; +Cc: gcc-patches, james.greenhalgh

Robin Dapp <rdapp@linux.ibm.com> writes:
> This patch extracts a cc comparison from the initial compare/jump
> insn and allows it to be passed to noce_emit_cmove and
> emit_conditional_move.
> ---
>  gcc/ifcvt.c  | 68 ++++++++++++++++++++++++++++++++++++++++++++++++----
>  gcc/optabs.c |  7 ++++--
>  gcc/optabs.h |  2 +-
>  3 files changed, 69 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index 99716e5f63c..3db707e1fd1 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -86,6 +86,7 @@ static rtx_insn *find_active_insn_after (basic_block, rtx_insn *);
>  static basic_block block_fallthru (basic_block);
>  static rtx cond_exec_get_condition (rtx_insn *);
>  static rtx noce_get_condition (rtx_insn *, rtx_insn **, bool);
> +static rtx noce_get_compare (rtx_insn *, bool);
>  static int noce_operand_ok (const_rtx);
>  static void merge_if_block (ce_if_block *);
>  static int find_cond_trap (basic_block, edge, edge);
> @@ -775,7 +776,7 @@ static int noce_try_addcc (struct noce_if_info *);
>  static int noce_try_store_flag_constants (struct noce_if_info *);
>  static int noce_try_store_flag_mask (struct noce_if_info *);
>  static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
> -			    rtx, rtx, rtx);
> +			    rtx, rtx, rtx, rtx = NULL);
>  static int noce_try_cmove (struct noce_if_info *);
>  static int noce_try_cmove_arith (struct noce_if_info *);
>  static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
> @@ -1658,7 +1659,7 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
>  
>  static rtx
>  noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
> -		 rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue)
> +		 rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue, rtx cc_cmp)
>  {
>    rtx target ATTRIBUTE_UNUSED;
>    int unsignedp ATTRIBUTE_UNUSED;
> @@ -1706,7 +1707,7 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
>  
>    target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
>  				  vtrue, vfalse, GET_MODE (x),
> -				  unsignedp);
> +				  unsignedp, cc_cmp);
>    if (target)
>      return target;
>  
> @@ -2970,6 +2971,60 @@ noce_get_condition (rtx_insn *jump, rtx_insn **earliest, bool then_else_reversed
>    return tmp;
>  }
>  
> +/* Get the comparison from the insn.  */
> +static rtx
> +noce_get_compare (rtx_insn *jump, bool then_else_reversed)
> +{
> +  enum rtx_code code;
> +  const_rtx set;
> +  rtx tem;
> +  rtx op0, op1;
> +
> +  if (!have_cbranchcc4)
> +    return 0;

Why do we need to check this?

> +
> +  if (! any_condjump_p (jump))
> +    return NULL_RTX;

There's a mixture of "return 0" and "return NULL_RTX"; think the latter
is preferred.

> +
> +  set = pc_set (jump);
> +
> +  /* If this branches to JUMP_LABEL when the condition is false,
> +     reverse the condition.  */
> +  bool reverse = (GET_CODE (XEXP (SET_SRC (set), 2)) == LABEL_REF
> +	     && label_ref_label (XEXP (SET_SRC (set), 2)) == JUMP_LABEL (jump));
> +
> +  /* We may have to reverse because the caller's if block is not canonical,
> +     i.e. the THEN block isn't the fallthrough block for the TEST block
> +     (see find_if_header).  */
> +  if (then_else_reversed)
> +    reverse = !reverse;
> +
> +  rtx cond = XEXP (SET_SRC (set), 0);
> +
> +  code = GET_CODE (cond);
> +  op0 = XEXP (cond, 0);
> +  op1 = XEXP (cond, 1);
> +
> +  if (reverse)
> +    code = reversed_comparison_code (cond, jump);
> +  if (code == UNKNOWN)
> +    return 0;
> +
> +  /* If constant is first, put it last.  */
> +  if (CONSTANT_P (op0))
> +    code = swap_condition (code), tem = op0, op0 = op1, op1 = tem;

Can use std::swap here.  But this should never happen: we're looking
at the operands to a condition in an existing instruction, which should
always be canonical.

> +
> +  /* Never return CC0; return zero instead.  */
> +  if (CC0_P (op0))
> +    return 0;
> +
> +  /* We promised to return a comparison.  */
> +  rtx ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
> +  if (COMPARISON_P (ret))
> +    return ret;
> +  return 0;
> +}

Without the unnecessary swapping, it looks like the only difference
between this routine and cond_exec_get_condition is the initial
have_cbranchcc4 test and the final COMPARISON_P (ret) test.

The COMPARISON_P test shouldn't really be needed, since I think
all jump (if_then_else ...)s are supposed to have comparisons,
and targets can have no complaints if an if_then_else condition
from a jump gets carried over to a conditional move.  So I think
we can probably drop that too.

I didn't understand why have_cbranchcc4 was needed so can't
comment on that.  But assuming that that can be dropped, it looks
like we could rename cond_exec_get_condition to something more generic
and use it here too.

> +
>  /* Return true if OP is ok for if-then-else processing.  */
>  
>  static int
> @@ -3140,6 +3195,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>    rtx y = XEXP (cond, 1);
>    rtx_code cond_code = GET_CODE (cond);
>  
> +  rtx cc_cmp = noce_get_compare (jump, false);
> +
>    /* The true targets for a conditional move.  */
>    auto_vec<rtx> targets;
>    /* The temporaries introduced to allow us to not consider register
> @@ -3151,7 +3208,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  
>    hash_map<rtx, bool> need_temps;
>  
> -  check_need_temps (then_bb, &need_temps, cond);
> +  if (!cc_cmp)
> +    check_need_temps (then_bb, &need_temps, cond);
>  
>    hash_map<rtx, bool> temps_created;
>  
> @@ -3242,7 +3300,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  
>        /* Actually emit the conditional move.  */
>        rtx temp_dest = noce_emit_cmove (if_info, temp, cond_code,
> -				       x, y, new_val, old_val);
> +				       x, y, new_val, old_val, cc_cmp);
>  
>        /* If we failed to expand the conditional move, drop out and don't
>  	 try to continue.  */
> diff --git a/gcc/optabs.c b/gcc/optabs.c
> index 06bcaab1f55..3ce6f8cdd30 100644
> --- a/gcc/optabs.c
> +++ b/gcc/optabs.c
> @@ -4325,7 +4325,7 @@ emit_indirect_jump (rtx loc)
>  rtx
>  emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
>  		       machine_mode cmode, rtx op2, rtx op3,
> -		       machine_mode mode, int unsignedp)
> +		       machine_mode mode, int unsignedp, rtx cc_cmp)
>  {
>    rtx comparison;
>    rtx_insn *last;
> @@ -4408,7 +4408,10 @@ emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
>  	      class expand_operand ops[4];
>  
>  	      create_output_operand (&ops[0], target, mode);
> -	      create_fixed_operand (&ops[1], comparison);
> +	      if (cc_cmp)
> +		create_fixed_operand (&ops[1], cc_cmp);
> +	      else
> +		create_fixed_operand (&ops[1], comparison);
>  	      create_input_operand (&ops[2], op2, mode);
>  	      create_input_operand (&ops[3], op3, mode);
>  	      if (maybe_expand_insn (icode, 4, ops))

Most of what emit_conditional_move does is redundant if we discard
"comparison" here.  So I think instead we should split this expand_operand
code out into a new overload of emit_conditional_move that replaces
code, op0 and op1, cmode and unsignedp with "comparison".  We can then
call it here and in ifcvt.c.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 7/9] ifcvt: Emit two cmov variants and choose the less expensive one.
  2019-08-02 15:10 ` [PATCH 7/9] ifcvt: Emit two cmov variants and choose the less expensive one Robin Dapp
@ 2019-10-27 14:38   ` Richard Sandiford
  0 siblings, 0 replies; 25+ messages in thread
From: Richard Sandiford @ 2019-10-27 14:38 UTC (permalink / raw)
  To: Robin Dapp; +Cc: gcc-patches, james.greenhalgh

Robin Dapp <rdapp@linux.ibm.com> writes:
> This patch duplicates the previous noce_emit_cmove logic.  First it
> passes the canonical comparison emits the sequence and costs it.
> Then, a second, separate sequence is created by passing the cc compare
> we extracted before.  The costs of both sequences are compared and the
> cheaper one is emitted.
> ---
>  gcc/ifcvt.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 97 insertions(+), 9 deletions(-)
>
> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index 3db707e1fd1..955f9541f60 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -3208,8 +3208,11 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  
>    hash_map<rtx, bool> need_temps;
>  
> -  if (!cc_cmp)
> -    check_need_temps (then_bb, &need_temps, cond);
> +  /* If we newly set a CC before a cmov, we might need a temporary
> +     even though the compare will be removed by a later pass.  Costing
> +     of a sequence with these will be inaccurate.  */
> +
> +  check_need_temps (then_bb, &need_temps, cond);
>  
>    hash_map<rtx, bool> temps_created;
>  
> @@ -3298,18 +3301,103 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  	  old_val = lowpart_subreg (dst_mode, old_val, src_mode);
>  	}
>  
> -      /* Actually emit the conditional move.  */
> -      rtx temp_dest = noce_emit_cmove (if_info, temp, cond_code,
> -				       x, y, new_val, old_val, cc_cmp);
> +      /* Try emitting a conditional move passing the backend the
> +	 canonicalized comparison.  This can e.g. be used to recognize
> +	 expressions like
> +
> +	   if (a < b)
> +	   {
> +	     ...
> +	     b = a;
> +	   }
> +
> +	 A backend can recognize this and emit a min/max insn.
> +	 We will still emit a superfluous CC comparison before the
> +	 min/max, though, which complicates costing.
> +      */
> +
> +      rtx_insn *seq, *seq1 = 0, *seq2 = 0;
> +      rtx temp_dest, temp_dest1, temp_dest2;
> +      unsigned int cost1 = 0, cost2 = 0;
> +      bool speed_p = if_info->speed_p;
> +      push_topmost_sequence ();

Why do we need the push_topmost_sequence/pop_topmost_sequence?
start_sequence/end_sequence ought to be enough.

> +
> +      {
> +	start_sequence ();
> +	temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
> +	    x, y, new_val, old_val, NULL_RTX);

Nit: arguments should be indented below "if_info, ".

> +
> +	/* If we failed to expand the conditional move, drop out and don't
> +	   try to continue.  */
> +	if (temp_dest1 == NULL_RTX)
> +	  {
> +	    seq1 = NULL;
> +	    cost1 = COSTS_N_INSNS (100);
> +	  }
> +	else
> +	  {
> +	    seq1 = get_insns ();
> +	    cost1 = seq_cost (seq1, speed_p);
> +	    rtx_insn *ii;
> +	    for (ii = seq1; ii; ii = NEXT_INSN (ii))
> +	      if (recog_memozied (ii) == -1)
> +		cost1 += 10000;
> +	  }
> +	end_sequence ();

Don't think we should use fake costs for failure.  Just setting seq1 to
null should be enough, and should be OK for the recog_memoized case too.
Can break the loop as soon as we find a failure.

> +      }
> +
> +	/* Now try emitting one passing a non-canonicalized cc comparison.
> +	   This allows the backend to emit a cmov directly without additional
> +	   compare.  */
> +      {
> +	start_sequence ();
> +	temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
> +	    x, y, new_val, old_val, cc_cmp);
> +
> +	/* If we failed to expand the conditional move, drop out and don't
> +	   try to continue.  */
> +	if (temp_dest2 == NULL_RTX)
> +	  {
> +	    seq2 = NULL;
> +	    cost2 = COSTS_N_INSNS (100);
> +	  }
> +	else
> +	  {
> +	    seq2 = get_insns ();
> +	    cost2 = seq_cost (seq2, speed_p);
> +	    rtx_insn *ii;
> +	    for (ii = seq2; ii; ii = NEXT_INSN (ii))
> +	      if (recog_memozied (ii) == -1)
> +		cost2 += 10000;
> +	  }
> +	end_sequence ();
> +      }

Same comments here.  Would be nice to split this out into a subroutine
rather than duplicate it.

Thanks,
Richard

>  
> -      /* If we failed to expand the conditional move, drop out and don't
> -	 try to continue.  */
> -      if (temp_dest == NULL_RTX)
> +      /* Check which version is less expensive.  */
> +      if (cost1 <= cost2 && seq1 != NULL_RTX)
> +	{
> +	  temp_dest = temp_dest1;
> +	  seq = seq1;
> +	}
> +      else if (seq2 != NULL_RTX)
> +	{
> +	  /* We do not require a temp register, so remove it from the list.  */
> +	  seq = seq2;
> +	  temp_dest = temp_dest2;
> +	  temps_created.remove (temp);
> +	}
> +      else
>  	{
> +	  /* Nothing worked, bail out.  */
> +	  pop_topmost_sequence ();
>  	  end_sequence ();
> -	  return FALSE;
> +	  return false;
>  	}
>  
> +      /* End the sub sequence and emit to the main sequence.  */
> +      pop_topmost_sequence ();
> +      emit_insn (seq);
> +
>        /* Bookkeeping.  */
>        count++;
>        targets.safe_push (target);

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 9/9] ifcvt: Also pass reversed cc comparison.
  2019-08-02 15:10 ` [PATCH 9/9] ifcvt: Also pass reversed cc comparison Robin Dapp
@ 2019-10-27 15:27   ` Richard Sandiford
  0 siblings, 0 replies; 25+ messages in thread
From: Richard Sandiford @ 2019-10-27 15:27 UTC (permalink / raw)
  To: Robin Dapp; +Cc: gcc-patches, james.greenhalgh

Robin Dapp <rdapp@linux.ibm.com> writes:
> When then and else are reversed, we would swap new_val and old_val.
> The same has to be done for our new code paths.
> Also, emit_conditional_move may perform swapping.  In case we need to
> swap, the cc comparison also needs to be swapped and for this we pass
> the reversed cc comparison directly.  An alternative would be to pass
> the constituents of the compare directly, but the functions have more
> than enough parameters already.

The changes to emit_conditional_move shouldn't be needed if we pass
the condition directly to a new overload, as per the 6/9 comments.
Let me know if that's not true.

> ---
>  gcc/ifcvt.c  | 37 ++++++++++++++++++++++++++-----------
>  gcc/optabs.c |  4 +++-
>  gcc/optabs.h |  2 +-
>  3 files changed, 30 insertions(+), 13 deletions(-)
>
> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index 09bf443656c..d3959b870f6 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -3313,7 +3315,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>        */
>  
>        rtx_insn *seq, *seq1 = 0, *seq2 = 0;
> -      rtx temp_dest, temp_dest1, temp_dest2;
> +      rtx temp_dest, temp_dest1 = NULL_RTX, temp_dest2 = NULL_RTX;
>        unsigned int cost1 = 0, cost2 = 0;
>        bool speed_p = if_info->speed_p;
>        push_topmost_sequence ();
> @@ -3324,7 +3326,13 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  	  temp_dest1 = noce_emit_cmove (if_info, temp, cond_code,
>  	      x, y, new_val, old_val, NULL_RTX);
>  	else
> -	  noce_emit_move_insn (target, new_val);
> +	  {
> +	    temp_dest1 = target;
> +	    if (if_info->then_else_reversed)
> +	      noce_emit_move_insn (target, old_val);
> +	    else
> +	      noce_emit_move_insn (target, new_val);
> +	  }
>  
>  	/* If we failed to expand the conditional move, drop out and don't
>  	   try to continue.  */
> @@ -3352,9 +3360,15 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  	start_sequence ();
>  	if (!need_no_cmovs.get (insn))
>  	  temp_dest2 = noce_emit_cmove (if_info, target, cond_code,
> -	      x, y, new_val, old_val, cc_cmp);
> +	      x, y, new_val, old_val, cc_cmp, rev_cc_cmp);
>  	else
> -	  noce_emit_move_insn (target, new_val);
> +	  {
> +	    temp_dest2 = target;
> +	    if (if_info->then_else_reversed)
> +	      noce_emit_move_insn (target, old_val);
> +	    else
> +	      noce_emit_move_insn (target, new_val);
> +	  }
>  
>  	/* If we failed to expand the conditional move, drop out and don't
>  	   try to continue.  */
> @@ -3386,7 +3400,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>  	  /* We do not require a temp register, so remove it from the list.  */
>  	  seq = seq2;
>  	  temp_dest = temp_dest2;
> -	  temps_created.remove (temp);
> +	  if (temps_created.get (temp))
> +	    temps_created.remove (temp);
>  	}
>        else
>  	{
> @@ -3436,7 +3451,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
>    /* Mark all our temporaries and targets as used.  */
>    for (int i = 0; i < count; i++)
>      {
> -      set_used_flags (temporaries[i]);
> +      if (temps_created.get(targets[i]))
> +	set_used_flags (temporaries[i]);
>        set_used_flags (targets[i]);
>      }
>  
> @@ -3941,7 +3957,7 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
>  
>    FOR_BB_INSNS (bb, insn)
>      {
> -      rtx set, src, dest;
> +      rtx set, dest;
>  
>        if (!active_insn_p (insn))
>  	continue;
> @@ -3950,7 +3966,6 @@ check_need_temps (basic_block bb, hash_map<rtx, bool> *need_temp, rtx cond)
>        if (set == NULL_RTX)
>  	continue;
>  
> -      src = SET_SRC (set);
>        dest = SET_DEST (set);
>  
>        /* noce_emit_cmove will emit the condition check every time it is called

It looks like these changes are really fixes for previous patches in the
series.  It'd be clearer to fold them into the relevant previous patch
if possible.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2019-10-27 14:38 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-02 15:10 [PATCH 0/9] Improve icvt "convert multiple" Robin Dapp
2019-08-02 15:10 ` [PATCH 7/9] ifcvt: Emit two cmov variants and choose the less expensive one Robin Dapp
2019-10-27 14:38   ` Richard Sandiford
2019-08-02 15:10 ` [PATCH 1/9] ifcvt: Store the number of created cmovs Robin Dapp
2019-08-02 15:10 ` [PATCH 2/9] ifcvt: Use enum instead of transform_name string Robin Dapp
2019-08-06 21:03   ` Richard Sandiford
2019-08-08 22:18     ` Jeff Law
2019-08-02 15:10 ` [PATCH 6/9] ifcvt: Extract cc comparison from jump Robin Dapp
2019-10-27 14:31   ` Richard Sandiford
2019-08-02 15:10 ` [PATCH 8/9] ifcvt: Handle swap-style idioms differently Robin Dapp
2019-08-06 20:47   ` Richard Sandiford
2019-08-16 13:22     ` Robin Dapp
2019-08-16 15:38       ` Richard Sandiford
2019-08-17 23:48         ` Robin Dapp
2019-08-02 15:10 ` [PATCH 9/9] ifcvt: Also pass reversed cc comparison Robin Dapp
2019-10-27 15:27   ` Richard Sandiford
2019-08-02 15:10 ` [PATCH 3/9] ifcvt: Only created temporaries as needed Robin Dapp
2019-08-06 20:14   ` Richard Sandiford
2019-08-08  9:13     ` Robin Dapp
2019-08-02 15:10 ` [PATCH 4/9] ifcvt: Estimate original costs before convert_multiple Robin Dapp
2019-08-06 20:30   ` Richard Sandiford
2019-08-02 15:10 ` [PATCH 5/9] ifcvt: Allow constants operands in noce_convert_multiple_sets Robin Dapp
2019-08-06 20:37   ` Richard Sandiford
2019-08-08 10:04     ` Robin Dapp
2019-10-27 14:12   ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).