[PATCH] widening_mul: Pattern recognize unsigned multiplication with overflow check [PR95852]

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] widening_mul: Pattern recognize unsigned multiplication with overflow check [PR95852]
@ 2021-01-09  9:15 Jakub Jelinek
  2021-01-11  8:23 ` Richard Biener
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2021-01-09  9:15 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Hi!

The following patch pattern recognizes some forms of multiplication followed
by overflow check through division by one of the operands compared to the
other one, with optional removal of guarding non-zero check for that operand
if possible.  The patterns are replaced with effectively
__builtin_mul_overflow or __builtin_mul_overflow_p.  The testcases cover 64
different forms of that.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-01-08  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/95852
	* tree-ssa-math-opts.c (maybe_optimize_guarding_check): New function.
	(uaddsub_overflow_check_p): Renamed to ...
	(arith_overflow_check_p): ... this.  Handle also multiplication
	with overflow check.
	(match_uaddsub_overflow): Renamed to ...
	(match_arith_overflow): ... this.  Add cfg_changed argument.  Handle
	also multiplication with overflow check.  Adjust function comment.
	(math_opts_dom_walker::after_dom_children): Adjust callers.  Call
	match_arith_overflow also for MULT_EXPR.

	* gcc.target/i386/pr95852-1.c: New test.

--- gcc/tree-ssa-math-opts.c.jj	2021-01-08 12:27:46.066177063 +0100
+++ gcc/tree-ssa-math-opts.c	2021-01-08 14:39:11.872358339 +0100
@@ -3451,17 +3451,228 @@ convert_mult_to_fma (gimple *mul_stmt, t
 }
 
 
-/* Helper function of match_uaddsub_overflow.  Return 1
+/* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
+   a check for non-zero like:
+   _1 = x_4(D) * y_5(D);
+   *res_7(D) = _1;
+   if (x_4(D) != 0)
+     goto <bb 3>; [50.00%]
+   else
+     goto <bb 4>; [50.00%]
+
+   <bb 3> [local count: 536870913]:
+   _2 = _1 / x_4(D);
+   _9 = _2 != y_5(D);
+   _10 = (int) _9;
+
+   <bb 4> [local count: 1073741824]:
+   # iftmp.0_3 = PHI <_10(3), 0(2)>
+   then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
+   optimize the x_4(D) != 0 condition to 1.  */
+
+static void
+maybe_optimize_guarding_check (gimple **mul_stmts, gimple *cond_stmt,
+			       gimple *div_stmt, bool *cfg_changed)
+{
+  basic_block bb = gimple_bb (cond_stmt);
+  if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
+    return;
+  edge pred_edge = single_pred_edge (bb);
+  basic_block pred_bb = pred_edge->src;
+  if (EDGE_COUNT (pred_bb->succs) != 2)
+    return;
+  edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
+  edge other_succ_edge = NULL;
+  if (gimple_code (cond_stmt) == GIMPLE_COND)
+    {
+      if (EDGE_COUNT (bb->succs) != 2)
+	return;
+      other_succ_edge = EDGE_SUCC (bb, 0);
+      if (gimple_cond_code (cond_stmt) == NE_EXPR)
+	{
+	  if (other_succ_edge->flags & EDGE_TRUE_VALUE)
+	    other_succ_edge = EDGE_SUCC (bb, 1);
+	}
+      else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
+	other_succ_edge = EDGE_SUCC (bb, 0);
+      if (other_edge->dest != other_succ_edge->dest)
+	return;
+    }
+  else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
+    return;
+  gimple *zero_cond = last_stmt (pred_bb);
+  if (zero_cond == NULL
+      || gimple_code (zero_cond) != GIMPLE_COND
+      || (gimple_cond_code (zero_cond)
+	  != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
+      || !integer_zerop (gimple_cond_rhs (zero_cond)))
+    return;
+  tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
+  if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
+    return;
+  if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
+    {
+      /* Allow the divisor to be result of a same precision cast
+	 from zero_cond_lhs.  */
+      tree rhs2 = gimple_assign_rhs2 (div_stmt);
+      if (TREE_CODE (rhs2) != SSA_NAME)
+	return;
+      gimple *g = SSA_NAME_DEF_STMT (rhs2);
+      if (!gimple_assign_cast_p (g)
+	  || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
+	  || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
+	  || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
+	      != TYPE_PRECISION (TREE_TYPE (rhs2))))
+	return;
+    }
+  gimple_stmt_iterator gsi = gsi_for_stmt (div_stmt);
+  gsi_prev_nondebug (&gsi);
+  if (!gsi_end_p (gsi))
+    {
+      /* If original mul_stmt has a single use, allow it in the same bb,
+	 we are looking then just at __builtin_mul_overflow_p.
+	 Though, in that case the original mul_stmt will be replaced
+	 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
+      for (int i = 2; i >= 0; --i)
+	{
+	  if (gsi_stmt (gsi) != mul_stmts[i])
+	    return;
+	  gsi_prev_nondebug (&gsi);
+	}
+      /* Allow up to 2 extra casts.  Given the way we check PHIs,
+	 nothing from this bb should be consumed by any other bb
+	 anyway.  */
+      for (int i = 0; i < 2 && !gsi_end_p (gsi); i++)
+	{
+	  gimple *g = gsi_stmt (gsi);
+	  if (!gimple_assign_cast_p (g))
+	    return;
+	  gsi_prev_nondebug (&gsi);
+	}
+      if (!gsi_end_p (gsi))
+	return;
+    }
+  gsi = gsi_for_stmt (div_stmt);
+  gsi_next_nondebug (&gsi);
+  if (gsi_stmt (gsi) != cond_stmt)
+    return;
+  if (gimple_code (cond_stmt) == GIMPLE_COND)
+    {
+      basic_block succ_bb = other_edge->dest;
+      for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
+	   gsi_next (&gpi))
+	{
+	  gphi *phi = gpi.phi ();
+	  tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
+	  tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
+	  if (!operand_equal_p (v1, v2, 0))
+	    return;
+	}
+    }
+  else
+    {
+      tree lhs = gimple_assign_lhs (cond_stmt);
+      if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
+	return;
+      gsi_next_nondebug (&gsi);
+      if (!gsi_end_p (gsi))
+	{
+	  if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
+	    return;
+	  gimple *cast_stmt = gsi_stmt (gsi);
+	  if (!gimple_assign_cast_p (cast_stmt))
+	    return;
+	  tree new_lhs = gimple_assign_lhs (cast_stmt);
+	  gsi_next_nondebug (&gsi);
+	  if (!gsi_end_p (gsi)
+	      || !new_lhs
+	      || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
+	      || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
+	    return;
+	  lhs = new_lhs;
+	}
+      edge succ_edge = single_succ_edge (bb);
+      basic_block succ_bb = succ_edge->dest;
+      gsi = gsi_start_phis (succ_bb);
+      if (gsi_end_p (gsi))
+	return;
+      gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
+      gsi_next (&gsi);
+      if (!gsi_end_p (gsi))
+	return;
+      if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
+	return;
+      tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
+      if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
+	{
+	  tree cond = gimple_assign_rhs1 (cond_stmt);
+	  if (TREE_CODE (cond) == NE_EXPR)
+	    {
+	      if (!operand_equal_p (other_val,
+				    gimple_assign_rhs3 (cond_stmt), 0))
+		return;
+	    }
+	  else if (!operand_equal_p (other_val,
+				     gimple_assign_rhs2 (cond_stmt), 0))
+	    return;
+	}
+      else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
+	{
+	  if (!integer_zerop (other_val))
+	    return;
+	}
+      else if (!integer_onep (other_val))
+	return;
+    }
+  gcond *zero_gcond = as_a <gcond *> (zero_cond);
+  if (pred_edge->flags & EDGE_TRUE_VALUE)
+    gimple_cond_make_true (zero_gcond);
+  else
+    gimple_cond_make_false (zero_gcond);
+  update_stmt (zero_cond);
+  *cfg_changed = true;
+}
+
+/* Helper function of match_arith_overflow.  Return 1
    if USE_STMT is unsigned overflow check ovf != 0 for
    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
    and 0 otherwise.  */
 
 static int
-uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval,
-			  tree *other)
+arith_overflow_check_p (gimple *stmt, gimple *&use_stmt, tree maxval,
+			tree *other)
 {
   enum tree_code ccode = ERROR_MARK;
   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
+  enum tree_code code = gimple_assign_rhs_code (stmt);
+  tree lhs = gimple_assign_lhs (stmt);
+  tree rhs1 = gimple_assign_rhs1 (stmt);
+  tree rhs2 = gimple_assign_rhs2 (stmt);
+  tree multop = NULL_TREE, divlhs = NULL_TREE;
+
+  if (code == MULT_EXPR)
+    {
+      if (!is_gimple_assign (use_stmt))
+	return 0;
+      if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
+	return 0;
+      if (gimple_assign_rhs1 (use_stmt) != lhs)
+	return 0;
+      if (gimple_assign_rhs2 (use_stmt) == rhs1)
+	multop = rhs2;
+      else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
+	multop = rhs1;
+      else
+	return 0;
+      if (stmt_ends_bb_p (use_stmt))
+	return 0;
+      divlhs = gimple_assign_lhs (use_stmt);
+      if (!divlhs)
+	return 0;
+      use_operand_p use;
+      if (!single_imm_use (divlhs, &use, &use_stmt))
+	return 0;
+    }
   if (gimple_code (use_stmt) == GIMPLE_COND)
     {
       ccode = gimple_cond_code (use_stmt);
@@ -3497,11 +3708,6 @@ uaddsub_overflow_check_p (gimple *stmt,
   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
     return 0;
 
-  enum tree_code code = gimple_assign_rhs_code (stmt);
-  tree lhs = gimple_assign_lhs (stmt);
-  tree rhs1 = gimple_assign_rhs1 (stmt);
-  tree rhs2 = gimple_assign_rhs2 (stmt);
-
   switch (ccode)
     {
     case GT_EXPR:
@@ -3547,6 +3753,17 @@ uaddsub_overflow_check_p (gimple *stmt,
 	  return ccode == LT_EXPR ? 1 : -1;
 	}
       break;
+    case EQ_EXPR:
+    case NE_EXPR:
+      /* r = a * b; _1 = r / a; _1 == b
+	 r = a * b; _1 = r / b; _1 == a
+	 r = a * b; _1 = r / a; _1 != b
+	 r = a * b; _1 = r / b; _1 != a.  */
+      if (code == MULT_EXPR
+	  && ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
+	      || (crhs2 == divlhs && crhs1 == multop)))
+	return ccode == NE_EXPR ? 1 : -1;
+      break;
     default:
       break;
     }
@@ -3557,7 +3774,7 @@ uaddsub_overflow_check_p (gimple *stmt,
    x = y - z;
    if (x > y)
    where there are other uses of x and replace it with
-   _7 = SUB_OVERFLOW (y, z);
+   _7 = .SUB_OVERFLOW (y, z);
    x = REALPART_EXPR <_7>;
    _8 = IMAGPART_EXPR <_7>;
    if (_8)
@@ -3571,9 +3788,9 @@ uaddsub_overflow_check_p (gimple *stmt,
    where y and z have unsigned types with maximum max
    and there are other uses of x and all of those cast x
    back to that unsigned type and again replace it with
-   _7 = ADD_OVERFLOW (y, z);
+   _7 = .ADD_OVERFLOW (y, z);
    _9 = REALPART_EXPR <_7>;
-   _8 = IMAGPART_EXPR <_8>;
+   _8 = IMAGPART_EXPR <_7>;
    if (_8)
    and replace (utype) x with _9.
 
@@ -3581,13 +3798,34 @@ uaddsub_overflow_check_p (gimple *stmt,
    x = ~z;
    if (y > x)
    and replace it with
-   _7 = ADD_OVERFLOW (y, z);
-   _8 = IMAGPART_EXPR <_8>;
-   if (_8)  */
+   _7 = .ADD_OVERFLOW (y, z);
+   _8 = IMAGPART_EXPR <_7>;
+   if (_8)
+
+   And also recognize:
+   z = x * y;
+   if (x != 0)
+     goto <bb 3>; [50.00%]
+   else
+     goto <bb 4>; [50.00%]
+
+   <bb 3> [local count: 536870913]:
+   _2 = z / x;
+   _9 = _2 != y;
+   _10 = (int) _9;
+
+   <bb 4> [local count: 1073741824]:
+   # iftmp.0_3 = PHI <_10(3), 0(2)>
+   and replace it with
+   _7 = .MUL_OVERFLOW (x, y);
+   z = IMAGPART_EXPR <_7>;
+   _8 = IMAGPART_EXPR <_7>;
+   _9 = _8 != 0;
+   iftmp.0_3 = (int) _9;  */
 
 static bool
-match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
-			enum tree_code code)
+match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
+		      enum tree_code code, bool *cfg_changed)
 {
   tree lhs = gimple_assign_lhs (stmt);
   tree type = TREE_TYPE (lhs);
@@ -3602,11 +3840,13 @@ match_uaddsub_overflow (gimple_stmt_iter
 
   gcc_checking_assert (code == PLUS_EXPR
 		       || code == MINUS_EXPR
+		       || code == MULT_EXPR
 		       || code == BIT_NOT_EXPR);
   if (!INTEGRAL_TYPE_P (type)
       || !TYPE_UNSIGNED (type)
       || has_zero_uses (lhs)
       || (code != PLUS_EXPR
+	  && code != MULT_EXPR
 	  && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
 			    TYPE_MODE (type)) == CODE_FOR_nothing))
     return false;
@@ -3620,7 +3860,7 @@ match_uaddsub_overflow (gimple_stmt_iter
 	continue;
 
       tree other = NULL_TREE;
-      if (uaddsub_overflow_check_p (stmt, use_stmt, NULL_TREE, &other))
+      if (arith_overflow_check_p (stmt, use_stmt, NULL_TREE, &other))
 	{
 	  if (code == BIT_NOT_EXPR)
 	    {
@@ -3643,9 +3883,12 @@ match_uaddsub_overflow (gimple_stmt_iter
 
   tree maxval = NULL_TREE;
   if (!ovf_use_seen
-      || (code == BIT_NOT_EXPR ? use_seen : !use_seen)
+      || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
       || (code == PLUS_EXPR
 	  && optab_handler (uaddv4_optab,
+			    TYPE_MODE (type)) == CODE_FOR_nothing)
+      || (code == MULT_EXPR
+	  && optab_handler (umulv4_optab,
 			    TYPE_MODE (type)) == CODE_FOR_nothing))
     {
       if (code != PLUS_EXPR)
@@ -3704,7 +3947,7 @@ match_uaddsub_overflow (gimple_stmt_iter
 	  if (is_gimple_debug (use_stmt))
 	    continue;
 
-	  if (uaddsub_overflow_check_p (stmt, use_stmt, maxval, NULL))
+	  if (arith_overflow_check_p (stmt, use_stmt, maxval, NULL))
 	    {
 	      ovf_use_seen = true;
 	      use_bb = gimple_bb (use_stmt);
@@ -3824,13 +4067,16 @@ match_uaddsub_overflow (gimple_stmt_iter
     *gsi = gsi_for_stmt (cond_stmt);
 
   tree ctype = build_complex_type (type);
-  gcall *g = gimple_build_call_internal (code != MINUS_EXPR
+  gcall *g = gimple_build_call_internal (code == MULT_EXPR
+					 ? IFN_MUL_OVERFLOW
+					 : code != MINUS_EXPR
 					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
 					 2, rhs1, rhs2);
   tree ctmp = make_ssa_name (ctype);
   gimple_call_set_lhs (g, ctmp);
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
   tree new_lhs = maxval ? make_ssa_name (type) : lhs;
+  gimple *mul_stmts[3] = { NULL, NULL, NULL };
   gassign *g2;
   if (code != BIT_NOT_EXPR)
     {
@@ -3844,6 +4090,11 @@ match_uaddsub_overflow (gimple_stmt_iter
 	}
       else
 	gsi_replace (gsi, g2, true);
+      if (code == MULT_EXPR)
+	{
+	  mul_stmts[0] = g;
+	  mul_stmts[1] = g2;
+	}
     }
   tree ovf = make_ssa_name (type);
   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
@@ -3852,13 +4103,16 @@ match_uaddsub_overflow (gimple_stmt_iter
     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
   else
     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
+  if (code == MULT_EXPR)
+    mul_stmts[2] = g2;
 
   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
     {
       if (is_gimple_debug (use_stmt))
 	continue;
 
-      int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt, maxval, NULL);
+      gimple *orig_use_stmt = use_stmt;
+      int ovf_use = arith_overflow_check_p (stmt, use_stmt, maxval, NULL);
       if (ovf_use == 0)
 	{
 	  gcc_assert (code != BIT_NOT_EXPR);
@@ -3901,6 +4155,14 @@ match_uaddsub_overflow (gimple_stmt_iter
 	    }
 	}
       update_stmt (use_stmt);
+      if (code == MULT_EXPR && use_stmt != orig_use_stmt)
+	{
+	  gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
+	  maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
+					 cfg_changed);
+	  gsi_remove (&gsi2, true);
+	  release_ssa_name (gimple_assign_lhs (orig_use_stmt));
+	}
     }
   if (maxval)
     {
@@ -4238,16 +4500,17 @@ math_opts_dom_walker::after_dom_children
 		  release_defs (stmt);
 		  continue;
 		}
+	      match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
 	      break;
 
 	    case PLUS_EXPR:
 	    case MINUS_EXPR:
 	      if (!convert_plusminus_to_widen (&gsi, stmt, code))
-		match_uaddsub_overflow (&gsi, stmt, code);
+		match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
 	      break;
 
 	    case BIT_NOT_EXPR:
-	      if (match_uaddsub_overflow (&gsi, stmt, code))
+	      if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
 		continue;
 	      break;
 
--- gcc/testsuite/gcc.target/i386/pr95852-1.c.jj	2021-01-08 13:52:54.222694377 +0100
+++ gcc/testsuite/gcc.target/i386/pr95852-1.c	2021-01-08 13:52:30.924957853 +0100
@@ -0,0 +1,266 @@
+/* PR tree-optimization/95852 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -masm=att" } */
+/* { dg-final { scan-tree-dump-times " = \.MUL_OVERFLOW " 32 "optimized" } } */
+/* { dg-final { scan-assembler-times "\tmull\t" 32 } } */
+/* { dg-final { scan-assembler-times "\tseto\t" 8 } } */
+/* { dg-final { scan-assembler-times "\tsetno\t" 8 } } */
+/* { dg-final { scan-assembler-times "\tjn\?o\t" 16 } } */
+
+unsigned fn (void);
+
+int
+f1 (unsigned x, unsigned y, unsigned *res)
+{
+  *res = x * y;
+  return x && (*res / x) != y;
+}
+
+unsigned
+f2 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (x && (r / x) != y)
+    return fn ();
+  return r;
+}
+
+int
+f3 (unsigned x, unsigned y, unsigned *res)
+{
+  *res = x * y;
+  return !x || (*res / x) == y;
+}
+
+unsigned
+f4 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (!x || (r / x) == y)
+    return fn ();
+  return r;
+}
+
+int
+f5 (int x, int y, int *res)
+{
+  *res = (unsigned) x * y;
+  return x && ((unsigned) *res / x) != (unsigned) y;
+}
+
+int
+f6 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (x && ((unsigned) r / x) != (unsigned) y)
+    return fn ();
+  return r;
+}
+
+int
+f7 (int x, int y, int *res)
+{
+  *res = (unsigned) x * y;
+  return !x || ((unsigned) *res / x) == (unsigned) y;
+}
+
+int
+f8 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (!x || ((unsigned) r / x) == (unsigned) y)
+    return fn ();
+  return r;
+}
+
+int
+f9 (unsigned x, unsigned y, unsigned *res)
+{
+  *res = x * y;
+  return y && (*res / y) != x;
+}
+
+unsigned
+f10 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (y && (r / y) != x)
+    return fn ();
+  return r;
+}
+
+int
+f11 (unsigned x, unsigned y, unsigned *res)
+{
+  *res = x * y;
+  return !y || (*res / y) == x;
+}
+
+unsigned
+f12 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (!y || (r / y) == x)
+    return fn ();
+  return r;
+}
+
+int
+f13 (int x, int y, int *res)
+{
+  *res = (unsigned) x * y;
+  return y && ((unsigned) *res / y) != (unsigned) x;
+}
+
+int
+f14 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (y && ((unsigned) r / y) != (unsigned) x)
+    return fn ();
+  return r;
+}
+
+int
+f15 (int x, int y, int *res)
+{
+  *res = (unsigned) x * y;
+  return !y || ((unsigned) *res / y) == (unsigned) x;
+}
+
+int
+f16 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (!y || ((unsigned) r / y) == (unsigned) x)
+    return fn ();
+  return r;
+}
+
+int
+f17 (unsigned x, unsigned *res)
+{
+  *res = x * 35U;
+  return x && (*res / x) != 35U;
+}
+
+unsigned
+f18 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if (x && (r / x) != 35U)
+    return fn ();
+  return r;
+}
+
+int
+f19 (unsigned x, unsigned *res)
+{
+  *res = x * 35U;
+  return !x || (*res / x) == 35U;
+}
+
+unsigned
+f20 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if (!x || (r / x) == 35U)
+    return fn ();
+  return r;
+}
+
+int
+f21 (int x, int *res)
+{
+  *res = (unsigned) x * 35;
+  return x && ((unsigned) *res / x) != 35U;
+}
+
+int
+f22 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (x && ((unsigned) r / x) != 35U)
+    return fn ();
+  return r;
+}
+
+int
+f23 (int x, int *res)
+{
+  *res = (unsigned) x * 35;
+  return !x || ((unsigned) *res / x) == 35U;
+}
+
+int
+f24 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (!x || ((unsigned) r / x) == 35U)
+    return fn ();
+  return r;
+}
+
+int
+f25 (unsigned x, unsigned *res)
+{
+  *res = x * 35U;
+  return (*res / 35U) != x;
+}
+
+unsigned
+f26 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if ((r / 35U) != x)
+    return fn ();
+  return r;
+}
+
+int
+f27 (unsigned x, unsigned *res)
+{
+  *res = x * 35U;
+  return !35U || (*res / 35U) == x;
+}
+
+unsigned
+f28 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if ((r / 35U) == x)
+    return fn ();
+  return r;
+}
+
+int
+f29 (int x, int *res)
+{
+  *res = (unsigned) x * 35;
+  return 35 && ((unsigned) *res / 35) != (unsigned) x;
+}
+
+int
+f30 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (((unsigned) r / 35) != (unsigned) x)
+    return fn ();
+  return r;
+}
+
+int
+f31 (int x, int *res)
+{
+  *res = (unsigned) x * 35;
+  return ((unsigned) *res / 35) == (unsigned) x;
+}
+
+int
+f32 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (((unsigned) r / 35) == (unsigned) x)
+    return fn ();
+  return r;
+}
--- gcc/testsuite/gcc.target/i386/pr95852-2.c.jj	2021-01-08 14:40:06.438745208 +0100
+++ gcc/testsuite/gcc.target/i386/pr95852-2.c	2021-01-08 14:12:59.416071946 +0100
@@ -0,0 +1,266 @@
+/* PR tree-optimization/95852 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -masm=att" } */
+/* { dg-final { scan-tree-dump-times " = \.MUL_OVERFLOW " 32 "optimized" } } */
+/* { dg-final { scan-assembler-times "\tmull\t" 32 } } */
+/* { dg-final { scan-assembler-times "\tseto\t" 8 } } */
+/* { dg-final { scan-assembler-times "\tsetno\t" 8 } } */
+/* { dg-final { scan-assembler-times "\tjn\?o\t" 16 } } */
+
+unsigned fn (void);
+
+int
+f1 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  return x && (r / x) != y;
+}
+
+unsigned
+f2 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (x && (r / x) != y)
+    return fn ();
+  return 0;
+}
+
+int
+f3 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  return !x || (r / x) == y;
+}
+
+unsigned
+f4 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (!x || (r / x) == y)
+    return fn ();
+  return 0;
+}
+
+int
+f5 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  return x && ((unsigned) r / x) != (unsigned) y;
+}
+
+int
+f6 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (x && ((unsigned) r / x) != (unsigned) y)
+    return fn ();
+  return 0;
+}
+
+int
+f7 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  return !x || ((unsigned) r / x) == (unsigned) y;
+}
+
+int
+f8 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (!x || ((unsigned) r / x) == (unsigned) y)
+    return fn ();
+  return 0;
+}
+
+int
+f9 (unsigned x, unsigned y)
+{
+  unsigned r = x * y;
+  return y && (r / y) != x;
+}
+
+unsigned
+f10 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (y && (r / y) != x)
+    return fn ();
+  return 0;
+}
+
+int
+f11 (unsigned x, unsigned y)
+{
+  unsigned r = x * y;
+  return !y || (r / y) == x;
+}
+
+unsigned
+f12 (unsigned x, unsigned y)
+{
+  unsigned int r = x * y;
+  if (!y || (r / y) == x)
+    return fn ();
+  return 0;
+}
+
+int
+f13 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  return y && ((unsigned) r / y) != (unsigned) x;
+}
+
+int
+f14 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (y && ((unsigned) r / y) != (unsigned) x)
+    return fn ();
+  return 0;
+}
+
+int
+f15 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  return !y || ((unsigned) r / y) == (unsigned) x;
+}
+
+int
+f16 (int x, int y)
+{
+  int r = (unsigned) x * y;
+  if (!y || ((unsigned) r / y) == (unsigned) x)
+    return fn ();
+  return 0;
+}
+
+int
+f17 (unsigned x)
+{
+  unsigned r = x * 35U;
+  return x && (r / x) != 35U;
+}
+
+unsigned
+f18 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if (x && (r / x) != 35U)
+    return fn ();
+  return 0;
+}
+
+int
+f19 (unsigned x)
+{
+  unsigned r = x * 35U;
+  return !x || (r / x) == 35U;
+}
+
+unsigned
+f20 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if (!x || (r / x) == 35U)
+    return fn ();
+  return 0;
+}
+
+int
+f21 (int x)
+{
+  int r = (unsigned) x * 35;
+  return x && ((unsigned) r / x) != 35U;
+}
+
+int
+f22 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (x && ((unsigned) r / x) != 35U)
+    return fn ();
+  return 0;
+}
+
+int
+f23 (int x)
+{
+  int r = (unsigned) x * 35;
+  return !x || ((unsigned) r / x) == 35U;
+}
+
+int
+f24 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (!x || ((unsigned) r / x) == 35U)
+    return fn ();
+  return 0;
+}
+
+int
+f25 (unsigned x)
+{
+  unsigned r = x * 35U;
+  return (r / 35U) != x;
+}
+
+unsigned
+f26 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if ((r / 35U) != x)
+    return fn ();
+  return 0;
+}
+
+int
+f27 (unsigned x)
+{
+  unsigned r = x * 35U;
+  return !35U || (r / 35U) == x;
+}
+
+unsigned
+f28 (unsigned x)
+{
+  unsigned int r = x * 35U;
+  if ((r / 35U) == x)
+    return fn ();
+  return 0;
+}
+
+int
+f29 (int x)
+{
+  int r = (unsigned) x * 35;
+  return 35 && ((unsigned) r / 35) != (unsigned) x;
+}
+
+int
+f30 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (((unsigned) r / 35) != (unsigned) x)
+    return fn ();
+  return 0;
+}
+
+int
+f31 (int x)
+{
+  int r = (unsigned) x * 35;
+  return ((unsigned) r / 35) == (unsigned) x;
+}
+
+int
+f32 (int x)
+{
+  int r = (unsigned) x * 35;
+  if (((unsigned) r / 35) == (unsigned) x)
+    return fn ();
+  return 0;
+}

	Jakub


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] widening_mul: Pattern recognize unsigned multiplication with overflow check [PR95852]
  2021-01-09  9:15 [PATCH] widening_mul: Pattern recognize unsigned multiplication with overflow check [PR95852] Jakub Jelinek
@ 2021-01-11  8:23 ` Richard Biener
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2021-01-11  8:23 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Sat, 9 Jan 2021, Jakub Jelinek wrote:

> Hi!
>
> The following patch pattern recognizes some forms of multiplication followed
> by overflow check through division by one of the operands compared to the
> other one, with optional removal of guarding non-zero check for that operand
> if possible.  The patterns are replaced with effectively
> __builtin_mul_overflow or __builtin_mul_overflow_p.  The testcases cover 64
> different forms of that.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2021-01-08  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR tree-optimization/95852
> 	* tree-ssa-math-opts.c (maybe_optimize_guarding_check): New function.
> 	(uaddsub_overflow_check_p): Renamed to ...
> 	(arith_overflow_check_p): ... this.  Handle also multiplication
> 	with overflow check.
> 	(match_uaddsub_overflow): Renamed to ...
> 	(match_arith_overflow): ... this.  Add cfg_changed argument.  Handle
> 	also multiplication with overflow check.  Adjust function comment.
> 	(math_opts_dom_walker::after_dom_children): Adjust callers.  Call
> 	match_arith_overflow also for MULT_EXPR.
>
> 	* gcc.target/i386/pr95852-1.c: New test.
>
> --- gcc/tree-ssa-math-opts.c.jj	2021-01-08 12:27:46.066177063 +0100
> +++ gcc/tree-ssa-math-opts.c	2021-01-08 14:39:11.872358339 +0100
> @@ -3451,17 +3451,228 @@ convert_mult_to_fma (gimple *mul_stmt, t
> }
>
>
> -/* Helper function of match_uaddsub_overflow.  Return 1
> +/* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
> +   a check for non-zero like:
> +   _1 = x_4(D) * y_5(D);
> +   *res_7(D) = _1;
> +   if (x_4(D) != 0)
> +     goto <bb 3>; [50.00%]
> +   else
> +     goto <bb 4>; [50.00%]
> +
> +   <bb 3> [local count: 536870913]:
> +   _2 = _1 / x_4(D);
> +   _9 = _2 != y_5(D);
> +   _10 = (int) _9;
> +
> +   <bb 4> [local count: 1073741824]:
> +   # iftmp.0_3 = PHI <_10(3), 0(2)>
> +   then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
> +   optimize the x_4(D) != 0 condition to 1.  */
> +
> +static void
> +maybe_optimize_guarding_check (gimple **mul_stmts, gimple *cond_stmt,
> +			       gimple *div_stmt, bool *cfg_changed)
> +{
> +  basic_block bb = gimple_bb (cond_stmt);
> +  if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
> +    return;
> +  edge pred_edge = single_pred_edge (bb);
> +  basic_block pred_bb = pred_edge->src;
> +  if (EDGE_COUNT (pred_bb->succs) != 2)
> +    return;
> +  edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
> +  edge other_succ_edge = NULL;
> +  if (gimple_code (cond_stmt) == GIMPLE_COND)
> +    {
> +      if (EDGE_COUNT (bb->succs) != 2)
> +	return;
> +      other_succ_edge = EDGE_SUCC (bb, 0);
> +      if (gimple_cond_code (cond_stmt) == NE_EXPR)
> +	{
> +	  if (other_succ_edge->flags & EDGE_TRUE_VALUE)
> +	    other_succ_edge = EDGE_SUCC (bb, 1);
> +	}
> +      else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
> +	other_succ_edge = EDGE_SUCC (bb, 0);
> +      if (other_edge->dest != other_succ_edge->dest)
> +	return;
> +    }
> +  else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
> +    return;
> +  gimple *zero_cond = last_stmt (pred_bb);
> +  if (zero_cond == NULL
> +      || gimple_code (zero_cond) != GIMPLE_COND
> +      || (gimple_cond_code (zero_cond)
> +	  != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
> +      || !integer_zerop (gimple_cond_rhs (zero_cond)))
> +    return;
> +  tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
> +  if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
> +    return;
> +  if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
> +    {
> +      /* Allow the divisor to be result of a same precision cast
> +	 from zero_cond_lhs.  */
> +      tree rhs2 = gimple_assign_rhs2 (div_stmt);
> +      if (TREE_CODE (rhs2) != SSA_NAME)
> +	return;
> +      gimple *g = SSA_NAME_DEF_STMT (rhs2);
> +      if (!gimple_assign_cast_p (g)
> +	  || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
> +	  || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
> +	  || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
> +	      != TYPE_PRECISION (TREE_TYPE (rhs2))))
> +	return;
> +    }
> +  gimple_stmt_iterator gsi = gsi_for_stmt (div_stmt);
> +  gsi_prev_nondebug (&gsi);
> +  if (!gsi_end_p (gsi))
> +    {
> +      /* If original mul_stmt has a single use, allow it in the same bb,
> +	 we are looking then just at __builtin_mul_overflow_p.
> +	 Though, in that case the original mul_stmt will be replaced
> +	 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
> +      for (int i = 2; i >= 0; --i)
> +	{
> +	  if (gsi_stmt (gsi) != mul_stmts[i])
> +	    return;
> +	  gsi_prev_nondebug (&gsi);
> +	}
> +      /* Allow up to 2 extra casts.  Given the way we check PHIs,
> +	 nothing from this bb should be consumed by any other bb
> +	 anyway.  */
> +      for (int i = 0; i < 2 && !gsi_end_p (gsi); i++)
> +	{
> +	  gimple *g = gsi_stmt (gsi);
> +	  if (!gimple_assign_cast_p (g))
> +	    return;
> +	  gsi_prev_nondebug (&gsi);
> +	}
> +      if (!gsi_end_p (gsi))
> +	return;
> +    }
> +  gsi = gsi_for_stmt (div_stmt);
> +  gsi_next_nondebug (&gsi);
> +  if (gsi_stmt (gsi) != cond_stmt)
> +    return;
> +  if (gimple_code (cond_stmt) == GIMPLE_COND)
> +    {
> +      basic_block succ_bb = other_edge->dest;
> +      for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
> +	   gsi_next (&gpi))
> +	{
> +	  gphi *phi = gpi.phi ();
> +	  tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
> +	  tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
> +	  if (!operand_equal_p (v1, v2, 0))
> +	    return;
> +	}
> +    }
> +  else
> +    {
> +      tree lhs = gimple_assign_lhs (cond_stmt);
> +      if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
> +	return;
> +      gsi_next_nondebug (&gsi);
> +      if (!gsi_end_p (gsi))
> +	{
> +	  if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
> +	    return;
> +	  gimple *cast_stmt = gsi_stmt (gsi);
> +	  if (!gimple_assign_cast_p (cast_stmt))
> +	    return;
> +	  tree new_lhs = gimple_assign_lhs (cast_stmt);
> +	  gsi_next_nondebug (&gsi);
> +	  if (!gsi_end_p (gsi)
> +	      || !new_lhs
> +	      || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
> +	      || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
> +	    return;
> +	  lhs = new_lhs;
> +	}
> +      edge succ_edge = single_succ_edge (bb);
> +      basic_block succ_bb = succ_edge->dest;
> +      gsi = gsi_start_phis (succ_bb);
> +      if (gsi_end_p (gsi))
> +	return;
> +      gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
> +      gsi_next (&gsi);
> +      if (!gsi_end_p (gsi))
> +	return;
> +      if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
> +	return;
> +      tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
> +      if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
> +	{
> +	  tree cond = gimple_assign_rhs1 (cond_stmt);
> +	  if (TREE_CODE (cond) == NE_EXPR)
> +	    {
> +	      if (!operand_equal_p (other_val,
> +				    gimple_assign_rhs3 (cond_stmt), 0))
> +		return;
> +	    }
> +	  else if (!operand_equal_p (other_val,
> +				     gimple_assign_rhs2 (cond_stmt), 0))
> +	    return;
> +	}
> +      else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
> +	{
> +	  if (!integer_zerop (other_val))
> +	    return;
> +	}
> +      else if (!integer_onep (other_val))
> +	return;
> +    }
> +  gcond *zero_gcond = as_a <gcond *> (zero_cond);
> +  if (pred_edge->flags & EDGE_TRUE_VALUE)
> +    gimple_cond_make_true (zero_gcond);
> +  else
> +    gimple_cond_make_false (zero_gcond);
> +  update_stmt (zero_cond);
> +  *cfg_changed = true;
> +}
> +
> +/* Helper function of match_arith_overflow.  Return 1
>    if USE_STMT is unsigned overflow check ovf != 0 for
>    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
>    and 0 otherwise.  */
>
> static int
> -uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt, tree maxval,
> -			  tree *other)
> +arith_overflow_check_p (gimple *stmt, gimple *&use_stmt, tree maxval,
> +			tree *other)
> {
>   enum tree_code ccode = ERROR_MARK;
>   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
> +  enum tree_code code = gimple_assign_rhs_code (stmt);
> +  tree lhs = gimple_assign_lhs (stmt);
> +  tree rhs1 = gimple_assign_rhs1 (stmt);
> +  tree rhs2 = gimple_assign_rhs2 (stmt);
> +  tree multop = NULL_TREE, divlhs = NULL_TREE;
> +
> +  if (code == MULT_EXPR)
> +    {
> +      if (!is_gimple_assign (use_stmt))
> +	return 0;
> +      if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
> +	return 0;
> +      if (gimple_assign_rhs1 (use_stmt) != lhs)
> +	return 0;
> +      if (gimple_assign_rhs2 (use_stmt) == rhs1)
> +	multop = rhs2;
> +      else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
> +	multop = rhs1;
> +      else
> +	return 0;
> +      if (stmt_ends_bb_p (use_stmt))
> +	return 0;
> +      divlhs = gimple_assign_lhs (use_stmt);
> +      if (!divlhs)
> +	return 0;
> +      use_operand_p use;
> +      if (!single_imm_use (divlhs, &use, &use_stmt))
> +	return 0;
> +    }
>   if (gimple_code (use_stmt) == GIMPLE_COND)
>     {
>       ccode = gimple_cond_code (use_stmt);
> @@ -3497,11 +3708,6 @@ uaddsub_overflow_check_p (gimple *stmt,
>   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
>     return 0;
>
> -  enum tree_code code = gimple_assign_rhs_code (stmt);
> -  tree lhs = gimple_assign_lhs (stmt);
> -  tree rhs1 = gimple_assign_rhs1 (stmt);
> -  tree rhs2 = gimple_assign_rhs2 (stmt);
> -
>   switch (ccode)
>     {
>     case GT_EXPR:
> @@ -3547,6 +3753,17 @@ uaddsub_overflow_check_p (gimple *stmt,
> 	  return ccode == LT_EXPR ? 1 : -1;
> 	}
>       break;
> +    case EQ_EXPR:
> +    case NE_EXPR:
> +      /* r = a * b; _1 = r / a; _1 == b
> +	 r = a * b; _1 = r / b; _1 == a
> +	 r = a * b; _1 = r / a; _1 != b
> +	 r = a * b; _1 = r / b; _1 != a.  */
> +      if (code == MULT_EXPR
> +	  && ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
> +	      || (crhs2 == divlhs && crhs1 == multop)))
> +	return ccode == NE_EXPR ? 1 : -1;
> +      break;
>     default:
>       break;
>     }
> @@ -3557,7 +3774,7 @@ uaddsub_overflow_check_p (gimple *stmt,
>    x = y - z;
>    if (x > y)
>    where there are other uses of x and replace it with
> -   _7 = SUB_OVERFLOW (y, z);
> +   _7 = .SUB_OVERFLOW (y, z);
>    x = REALPART_EXPR <_7>;
>    _8 = IMAGPART_EXPR <_7>;
>    if (_8)
> @@ -3571,9 +3788,9 @@ uaddsub_overflow_check_p (gimple *stmt,
>    where y and z have unsigned types with maximum max
>    and there are other uses of x and all of those cast x
>    back to that unsigned type and again replace it with
> -   _7 = ADD_OVERFLOW (y, z);
> +   _7 = .ADD_OVERFLOW (y, z);
>    _9 = REALPART_EXPR <_7>;
> -   _8 = IMAGPART_EXPR <_8>;
> +   _8 = IMAGPART_EXPR <_7>;
>    if (_8)
>    and replace (utype) x with _9.
>
> @@ -3581,13 +3798,34 @@ uaddsub_overflow_check_p (gimple *stmt,
>    x = ~z;
>    if (y > x)
>    and replace it with
> -   _7 = ADD_OVERFLOW (y, z);
> -   _8 = IMAGPART_EXPR <_8>;
> -   if (_8)  */
> +   _7 = .ADD_OVERFLOW (y, z);
> +   _8 = IMAGPART_EXPR <_7>;
> +   if (_8)
> +
> +   And also recognize:
> +   z = x * y;
> +   if (x != 0)
> +     goto <bb 3>; [50.00%]
> +   else
> +     goto <bb 4>; [50.00%]
> +
> +   <bb 3> [local count: 536870913]:
> +   _2 = z / x;
> +   _9 = _2 != y;
> +   _10 = (int) _9;
> +
> +   <bb 4> [local count: 1073741824]:
> +   # iftmp.0_3 = PHI <_10(3), 0(2)>
> +   and replace it with
> +   _7 = .MUL_OVERFLOW (x, y);
> +   z = IMAGPART_EXPR <_7>;
> +   _8 = IMAGPART_EXPR <_7>;
> +   _9 = _8 != 0;
> +   iftmp.0_3 = (int) _9;  */
>
> static bool
> -match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
> -			enum tree_code code)
> +match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
> +		      enum tree_code code, bool *cfg_changed)
> {
>   tree lhs = gimple_assign_lhs (stmt);
>   tree type = TREE_TYPE (lhs);
> @@ -3602,11 +3840,13 @@ match_uaddsub_overflow (gimple_stmt_iter
>
>   gcc_checking_assert (code == PLUS_EXPR
> 		       || code == MINUS_EXPR
> +		       || code == MULT_EXPR
> 		       || code == BIT_NOT_EXPR);
>   if (!INTEGRAL_TYPE_P (type)
>       || !TYPE_UNSIGNED (type)
>       || has_zero_uses (lhs)
>       || (code != PLUS_EXPR
> +	  && code != MULT_EXPR
> 	  && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
> 			    TYPE_MODE (type)) == CODE_FOR_nothing))
>     return false;
> @@ -3620,7 +3860,7 @@ match_uaddsub_overflow (gimple_stmt_iter
> 	continue;
>
>       tree other = NULL_TREE;
> -      if (uaddsub_overflow_check_p (stmt, use_stmt, NULL_TREE, &other))
> +      if (arith_overflow_check_p (stmt, use_stmt, NULL_TREE, &other))
> 	{
> 	  if (code == BIT_NOT_EXPR)
> 	    {
> @@ -3643,9 +3883,12 @@ match_uaddsub_overflow (gimple_stmt_iter
>
>   tree maxval = NULL_TREE;
>   if (!ovf_use_seen
> -      || (code == BIT_NOT_EXPR ? use_seen : !use_seen)
> +      || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
>       || (code == PLUS_EXPR
> 	  && optab_handler (uaddv4_optab,
> +			    TYPE_MODE (type)) == CODE_FOR_nothing)
> +      || (code == MULT_EXPR
> +	  && optab_handler (umulv4_optab,
> 			    TYPE_MODE (type)) == CODE_FOR_nothing))
>     {
>       if (code != PLUS_EXPR)
> @@ -3704,7 +3947,7 @@ match_uaddsub_overflow (gimple_stmt_iter
> 	  if (is_gimple_debug (use_stmt))
> 	    continue;
>
> -	  if (uaddsub_overflow_check_p (stmt, use_stmt, maxval, NULL))
> +	  if (arith_overflow_check_p (stmt, use_stmt, maxval, NULL))
> 	    {
> 	      ovf_use_seen = true;
> 	      use_bb = gimple_bb (use_stmt);
> @@ -3824,13 +4067,16 @@ match_uaddsub_overflow (gimple_stmt_iter
>     *gsi = gsi_for_stmt (cond_stmt);
>
>   tree ctype = build_complex_type (type);
> -  gcall *g = gimple_build_call_internal (code != MINUS_EXPR
> +  gcall *g = gimple_build_call_internal (code == MULT_EXPR
> +					 ? IFN_MUL_OVERFLOW
> +					 : code != MINUS_EXPR
> 					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
> 					 2, rhs1, rhs2);
>   tree ctmp = make_ssa_name (ctype);
>   gimple_call_set_lhs (g, ctmp);
>   gsi_insert_before (gsi, g, GSI_SAME_STMT);
>   tree new_lhs = maxval ? make_ssa_name (type) : lhs;
> +  gimple *mul_stmts[3] = { NULL, NULL, NULL };
>   gassign *g2;
>   if (code != BIT_NOT_EXPR)
>     {
> @@ -3844,6 +4090,11 @@ match_uaddsub_overflow (gimple_stmt_iter
> 	}
>       else
> 	gsi_replace (gsi, g2, true);
> +      if (code == MULT_EXPR)
> +	{
> +	  mul_stmts[0] = g;
> +	  mul_stmts[1] = g2;
> +	}
>     }
>   tree ovf = make_ssa_name (type);
>   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
> @@ -3852,13 +4103,16 @@ match_uaddsub_overflow (gimple_stmt_iter
>     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
>   else
>     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
> +  if (code == MULT_EXPR)
> +    mul_stmts[2] = g2;
>
>   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
>     {
>       if (is_gimple_debug (use_stmt))
> 	continue;
>
> -      int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt, maxval, NULL);
> +      gimple *orig_use_stmt = use_stmt;
> +      int ovf_use = arith_overflow_check_p (stmt, use_stmt, maxval, NULL);
>       if (ovf_use == 0)
> 	{
> 	  gcc_assert (code != BIT_NOT_EXPR);
> @@ -3901,6 +4155,14 @@ match_uaddsub_overflow (gimple_stmt_iter
> 	    }
> 	}
>       update_stmt (use_stmt);
> +      if (code == MULT_EXPR && use_stmt != orig_use_stmt)
> +	{
> +	  gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
> +	  maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
> +					 cfg_changed);
> +	  gsi_remove (&gsi2, true);
> +	  release_ssa_name (gimple_assign_lhs (orig_use_stmt));
> +	}
>     }
>   if (maxval)
>     {
> @@ -4238,16 +4500,17 @@ math_opts_dom_walker::after_dom_children
> 		  release_defs (stmt);
> 		  continue;
> 		}
> +	      match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
> 	      break;
>
> 	    case PLUS_EXPR:
> 	    case MINUS_EXPR:
> 	      if (!convert_plusminus_to_widen (&gsi, stmt, code))
> -		match_uaddsub_overflow (&gsi, stmt, code);
> +		match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
> 	      break;
>
> 	    case BIT_NOT_EXPR:
> -	      if (match_uaddsub_overflow (&gsi, stmt, code))
> +	      if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
> 		continue;
> 	      break;
>
> --- gcc/testsuite/gcc.target/i386/pr95852-1.c.jj	2021-01-08 13:52:54.222694377 +0100
> +++ gcc/testsuite/gcc.target/i386/pr95852-1.c	2021-01-08 13:52:30.924957853 +0100
> @@ -0,0 +1,266 @@
> +/* PR tree-optimization/95852 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized -masm=att" } */
> +/* { dg-final { scan-tree-dump-times " = \.MUL_OVERFLOW " 32 "optimized" } } */
> +/* { dg-final { scan-assembler-times "\tmull\t" 32 } } */
> +/* { dg-final { scan-assembler-times "\tseto\t" 8 } } */
> +/* { dg-final { scan-assembler-times "\tsetno\t" 8 } } */
> +/* { dg-final { scan-assembler-times "\tjn\?o\t" 16 } } */
> +
> +unsigned fn (void);
> +
> +int
> +f1 (unsigned x, unsigned y, unsigned *res)
> +{
> +  *res = x * y;
> +  return x && (*res / x) != y;
> +}
> +
> +unsigned
> +f2 (unsigned x, unsigned y)
> +{
> +  unsigned int r = x * y;
> +  if (x && (r / x) != y)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f3 (unsigned x, unsigned y, unsigned *res)
> +{
> +  *res = x * y;
> +  return !x || (*res / x) == y;
> +}
> +
> +unsigned
> +f4 (unsigned x, unsigned y)
> +{
> +  unsigned int r = x * y;
> +  if (!x || (r / x) == y)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f5 (int x, int y, int *res)
> +{
> +  *res = (unsigned) x * y;
> +  return x && ((unsigned) *res / x) != (unsigned) y;
> +}
> +
> +int
> +f6 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  if (x && ((unsigned) r / x) != (unsigned) y)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f7 (int x, int y, int *res)
> +{
> +  *res = (unsigned) x * y;
> +  return !x || ((unsigned) *res / x) == (unsigned) y;
> +}
> +
> +int
> +f8 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  if (!x || ((unsigned) r / x) == (unsigned) y)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f9 (unsigned x, unsigned y, unsigned *res)
> +{
> +  *res = x * y;
> +  return y && (*res / y) != x;
> +}
> +
> +unsigned
> +f10 (unsigned x, unsigned y)
> +{
> +  unsigned int r = x * y;
> +  if (y && (r / y) != x)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f11 (unsigned x, unsigned y, unsigned *res)
> +{
> +  *res = x * y;
> +  return !y || (*res / y) == x;
> +}
> +
> +unsigned
> +f12 (unsigned x, unsigned y)
> +{
> +  unsigned int r = x * y;
> +  if (!y || (r / y) == x)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f13 (int x, int y, int *res)
> +{
> +  *res = (unsigned) x * y;
> +  return y && ((unsigned) *res / y) != (unsigned) x;
> +}
> +
> +int
> +f14 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  if (y && ((unsigned) r / y) != (unsigned) x)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f15 (int x, int y, int *res)
> +{
> +  *res = (unsigned) x * y;
> +  return !y || ((unsigned) *res / y) == (unsigned) x;
> +}
> +
> +int
> +f16 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  if (!y || ((unsigned) r / y) == (unsigned) x)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f17 (unsigned x, unsigned *res)
> +{
> +  *res = x * 35U;
> +  return x && (*res / x) != 35U;
> +}
> +
> +unsigned
> +f18 (unsigned x)
> +{
> +  unsigned int r = x * 35U;
> +  if (x && (r / x) != 35U)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f19 (unsigned x, unsigned *res)
> +{
> +  *res = x * 35U;
> +  return !x || (*res / x) == 35U;
> +}
> +
> +unsigned
> +f20 (unsigned x)
> +{
> +  unsigned int r = x * 35U;
> +  if (!x || (r / x) == 35U)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f21 (int x, int *res)
> +{
> +  *res = (unsigned) x * 35;
> +  return x && ((unsigned) *res / x) != 35U;
> +}
> +
> +int
> +f22 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  if (x && ((unsigned) r / x) != 35U)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f23 (int x, int *res)
> +{
> +  *res = (unsigned) x * 35;
> +  return !x || ((unsigned) *res / x) == 35U;
> +}
> +
> +int
> +f24 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  if (!x || ((unsigned) r / x) == 35U)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f25 (unsigned x, unsigned *res)
> +{
> +  *res = x * 35U;
> +  return (*res / 35U) != x;
> +}
> +
> +unsigned
> +f26 (unsigned x)
> +{
> +  unsigned int r = x * 35U;
> +  if ((r / 35U) != x)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f27 (unsigned x, unsigned *res)
> +{
> +  *res = x * 35U;
> +  return !35U || (*res / 35U) == x;
> +}
> +
> +unsigned
> +f28 (unsigned x)
> +{
> +  unsigned int r = x * 35U;
> +  if ((r / 35U) == x)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f29 (int x, int *res)
> +{
> +  *res = (unsigned) x * 35;
> +  return 35 && ((unsigned) *res / 35) != (unsigned) x;
> +}
> +
> +int
> +f30 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  if (((unsigned) r / 35) != (unsigned) x)
> +    return fn ();
> +  return r;
> +}
> +
> +int
> +f31 (int x, int *res)
> +{
> +  *res = (unsigned) x * 35;
> +  return ((unsigned) *res / 35) == (unsigned) x;
> +}
> +
> +int
> +f32 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  if (((unsigned) r / 35) == (unsigned) x)
> +    return fn ();
> +  return r;
> +}
> --- gcc/testsuite/gcc.target/i386/pr95852-2.c.jj	2021-01-08 14:40:06.438745208 +0100
> +++ gcc/testsuite/gcc.target/i386/pr95852-2.c	2021-01-08 14:12:59.416071946 +0100
> @@ -0,0 +1,266 @@
> +/* PR tree-optimization/95852 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized -masm=att" } */
> +/* { dg-final { scan-tree-dump-times " = \.MUL_OVERFLOW " 32 "optimized" } } */
> +/* { dg-final { scan-assembler-times "\tmull\t" 32 } } */
> +/* { dg-final { scan-assembler-times "\tseto\t" 8 } } */
> +/* { dg-final { scan-assembler-times "\tsetno\t" 8 } } */
> +/* { dg-final { scan-assembler-times "\tjn\?o\t" 16 } } */
> +
> +unsigned fn (void);
> +
> +int
> +f1 (unsigned x, unsigned y)
> +{
> +  unsigned int r = x * y;
> +  return x && (r / x) != y;
> +}
> +
> +unsigned
> +f2 (unsigned x, unsigned y)
> +{
> +  unsigned int r = x * y;
> +  if (x && (r / x) != y)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f3 (unsigned x, unsigned y)
> +{
> +  unsigned int r = x * y;
> +  return !x || (r / x) == y;
> +}
> +
> +unsigned
> +f4 (unsigned x, unsigned y)
> +{
> +  unsigned int r = x * y;
> +  if (!x || (r / x) == y)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f5 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  return x && ((unsigned) r / x) != (unsigned) y;
> +}
> +
> +int
> +f6 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  if (x && ((unsigned) r / x) != (unsigned) y)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f7 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  return !x || ((unsigned) r / x) == (unsigned) y;
> +}
> +
> +int
> +f8 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  if (!x || ((unsigned) r / x) == (unsigned) y)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f9 (unsigned x, unsigned y)
> +{
> +  unsigned r = x * y;
> +  return y && (r / y) != x;
> +}
> +
> +unsigned
> +f10 (unsigned x, unsigned y)
> +{
> +  unsigned int r = x * y;
> +  if (y && (r / y) != x)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f11 (unsigned x, unsigned y)
> +{
> +  unsigned r = x * y;
> +  return !y || (r / y) == x;
> +}
> +
> +unsigned
> +f12 (unsigned x, unsigned y)
> +{
> +  unsigned int r = x * y;
> +  if (!y || (r / y) == x)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f13 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  return y && ((unsigned) r / y) != (unsigned) x;
> +}
> +
> +int
> +f14 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  if (y && ((unsigned) r / y) != (unsigned) x)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f15 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  return !y || ((unsigned) r / y) == (unsigned) x;
> +}
> +
> +int
> +f16 (int x, int y)
> +{
> +  int r = (unsigned) x * y;
> +  if (!y || ((unsigned) r / y) == (unsigned) x)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f17 (unsigned x)
> +{
> +  unsigned r = x * 35U;
> +  return x && (r / x) != 35U;
> +}
> +
> +unsigned
> +f18 (unsigned x)
> +{
> +  unsigned int r = x * 35U;
> +  if (x && (r / x) != 35U)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f19 (unsigned x)
> +{
> +  unsigned r = x * 35U;
> +  return !x || (r / x) == 35U;
> +}
> +
> +unsigned
> +f20 (unsigned x)
> +{
> +  unsigned int r = x * 35U;
> +  if (!x || (r / x) == 35U)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f21 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  return x && ((unsigned) r / x) != 35U;
> +}
> +
> +int
> +f22 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  if (x && ((unsigned) r / x) != 35U)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f23 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  return !x || ((unsigned) r / x) == 35U;
> +}
> +
> +int
> +f24 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  if (!x || ((unsigned) r / x) == 35U)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f25 (unsigned x)
> +{
> +  unsigned r = x * 35U;
> +  return (r / 35U) != x;
> +}
> +
> +unsigned
> +f26 (unsigned x)
> +{
> +  unsigned int r = x * 35U;
> +  if ((r / 35U) != x)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f27 (unsigned x)
> +{
> +  unsigned r = x * 35U;
> +  return !35U || (r / 35U) == x;
> +}
> +
> +unsigned
> +f28 (unsigned x)
> +{
> +  unsigned int r = x * 35U;
> +  if ((r / 35U) == x)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f29 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  return 35 && ((unsigned) r / 35) != (unsigned) x;
> +}
> +
> +int
> +f30 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  if (((unsigned) r / 35) != (unsigned) x)
> +    return fn ();
> +  return 0;
> +}
> +
> +int
> +f31 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  return ((unsigned) r / 35) == (unsigned) x;
> +}
> +
> +int
> +f32 (int x)
> +{
> +  int r = (unsigned) x * 35;
> +  if (((unsigned) r / 35) == (unsigned) x)
> +    return fn ();
> +  return 0;
> +}
>
> 	Jakub
>
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-01-11  8:23 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-09  9:15 [PATCH] widening_mul: Pattern recognize unsigned multiplication with overflow check [PR95852] Jakub Jelinek
2021-01-11  8:23 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).