public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
@ 2015-11-24 20:55 Jakub Jelinek
  2015-11-25  8:11 ` Marc Glisse
  2015-11-25  8:45 ` Richard Biener
  0 siblings, 2 replies; 13+ messages in thread
From: Jakub Jelinek @ 2015-11-24 20:55 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches, Richard Henderson

Hi!

This is the GIMPLE side of Richard's i?86 uadd/usub overflow
testing improvements.  If unsigned addition or subtraction
result is used both normally and in a GIMPLE_COND/COND_EXPR/tcc_comparison
that tests if unsigned overflow happened, the patch replaces it shortly
before expansion with {ADD,SUB}_OVERFLOW, so that RTL expansion can generate
better code on it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2015-11-24  Jakub Jelinek  <jakub@redhat.com>

	PR target/67089
	* tree-ssa-math-opts.c (uaddsub_overflow_check_p,
	match_uaddsub_overflow): New functions.
	(pass_optimize_widening_mul::execute): Call match_uaddsub_overflow.

	* gcc.dg/pr67089-1.c: New test.
	* gcc.dg/pr67089-2.c: New test.
	* gcc.dg/pr67089-3.c: New test.
	* gcc.dg/pr67089-4.c: New test.
	* gcc.dg/pr67089-5.c: New test.
	* gcc.dg/pr67089-6.c: New test.
	* gcc.dg/pr67089-7.c: New test.

--- gcc/tree-ssa-math-opts.c.jj	2015-11-18 11:19:23.000000000 +0100
+++ gcc/tree-ssa-math-opts.c	2015-11-24 17:00:10.825900958 +0100
@@ -3491,6 +3491,189 @@ convert_mult_to_fma (gimple *mul_stmt, t
   return true;
 }
 
+
+/* Helper function of match_uaddsub_overflow.  Return 1
+   if USE_STMT is unsigned overflow check ovf != 0 for
+   STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
+   and 0 otherwise.  */
+
+static int
+uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
+{
+  enum tree_code ccode = ERROR_MARK;
+  tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
+  if (gimple_code (use_stmt) == GIMPLE_COND)
+    {
+      ccode = gimple_cond_code (use_stmt);
+      crhs1 = gimple_cond_lhs (use_stmt);
+      crhs2 = gimple_cond_rhs (use_stmt);
+    }
+  else if (is_gimple_assign (use_stmt))
+    {
+      if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
+	{
+	  ccode = gimple_assign_rhs_code (use_stmt);
+	  crhs1 = gimple_assign_rhs1 (use_stmt);
+	  crhs2 = gimple_assign_rhs2 (use_stmt);
+	}
+      else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
+	{
+	  tree cond = gimple_assign_rhs1 (use_stmt);
+	  if (COMPARISON_CLASS_P (cond))
+	    {
+	      ccode = TREE_CODE (cond);
+	      crhs1 = TREE_OPERAND (cond, 0);
+	      crhs2 = TREE_OPERAND (cond, 1);
+	    }
+	  else
+	    return 0;
+	}
+      else
+	return 0;
+    }
+  else
+    return 0;
+
+  if (TREE_CODE_CLASS (ccode) != tcc_comparison)
+    return 0;
+
+  enum tree_code code = gimple_assign_rhs_code (stmt);
+  tree lhs = gimple_assign_lhs (stmt);
+  tree rhs1 = gimple_assign_rhs1 (stmt);
+  tree rhs2 = gimple_assign_rhs2 (stmt);
+
+  switch (ccode)
+    {
+    case GT_EXPR:
+    case LE_EXPR:
+      /* r = a - b; r > a or r <= a
+	 r = a + b; a > r or a <= r or b > r or b <= r.  */
+      if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
+	  || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
+	      && crhs2 == lhs))
+	return ccode == GT_EXPR ? 1 : -1;
+      break;
+    case LT_EXPR:
+    case GE_EXPR:
+      /* r = a - b; a < r or a >= r
+	 r = a + b; r < a or r >= a or r < b or r >= b.  */
+      if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
+	  || (code == PLUS_EXPR && crhs1 == lhs
+	      && (crhs2 == rhs1 || crhs2 == rhs2)))
+	return ccode == LT_EXPR ? 1 : -1;
+      break;
+    default:
+      break;
+    }
+  return 0;
+}
+
+/* Recognize for unsigned x
+   x = y - z;
+   if (x > y)
+   where there are other uses of x and replace it with
+   _7 = SUB_OVERFLOW (y, z);
+   x = REALPART_EXPR <_7>;
+   _8 = IMAGPART_EXPR <_7>;
+   if (_8)
+   and similarly for addition.  */
+
+static bool
+match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
+			enum tree_code code)
+{
+  tree lhs = gimple_assign_lhs (stmt);
+  tree type = TREE_TYPE (lhs);
+  use_operand_p use_p;
+  imm_use_iterator iter;
+  bool use_seen = false;
+  bool ovf_use_seen = false;
+  gimple *use_stmt;
+
+  gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
+  if (!INTEGRAL_TYPE_P (type)
+      || !TYPE_UNSIGNED (type)
+      || has_zero_uses (lhs)
+      || has_single_use (lhs)
+      || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
+			TYPE_MODE (type)) == CODE_FOR_nothing)
+    return false;
+
+  FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
+    {
+      use_stmt = USE_STMT (use_p);
+      if (is_gimple_debug (use_stmt))
+	continue;
+
+      if (uaddsub_overflow_check_p (stmt, use_stmt))
+	ovf_use_seen = true;
+      else
+	use_seen = true;
+      if (ovf_use_seen && use_seen)
+	break;
+    }
+
+  if (!ovf_use_seen || !use_seen)
+    return false;
+
+  tree ctype = build_complex_type (type);
+  tree rhs1 = gimple_assign_rhs1 (stmt);
+  tree rhs2 = gimple_assign_rhs2 (stmt);
+  gcall *g = gimple_build_call_internal (code == PLUS_EXPR
+					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
+					 2, rhs1, rhs2);
+  tree ctmp = make_ssa_name (ctype);
+  gimple_call_set_lhs (g, ctmp);
+  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+  gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
+				     build1 (REALPART_EXPR, type, ctmp));
+  gsi_replace (gsi, g2, true);
+  tree ovf = make_ssa_name (type);
+  g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
+			    build1 (IMAGPART_EXPR, type, ctmp));
+  gsi_insert_after (gsi, g2, GSI_NEW_STMT);
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
+    {
+      if (is_gimple_debug (use_stmt))
+	continue;
+
+      int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
+      if (ovf_use == 0)
+	continue;
+      if (gimple_code (use_stmt) == GIMPLE_COND)
+	{
+	  gcond *cond_stmt = as_a <gcond *> (use_stmt);
+	  gimple_cond_set_lhs (cond_stmt, ovf);
+	  gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
+	  gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
+	}
+      else
+	{
+	  gcc_checking_assert (is_gimple_assign (use_stmt));
+	  if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
+	    {
+	      gimple_assign_set_rhs1 (use_stmt, ovf);
+	      gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
+	      gimple_assign_set_rhs_code (use_stmt,
+					  ovf_use == 1 ? NE_EXPR : EQ_EXPR);
+	    }
+	  else
+	    {
+	      gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
+				   == COND_EXPR);
+	      tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
+				  boolean_type_node, ovf,
+				  build_int_cst (type, 0));
+	      gimple_assign_set_rhs1 (use_stmt, cond);
+	    }
+	}
+      update_stmt (use_stmt);
+    }
+  return true;
+}
+
+
 /* Find integer multiplications where the operands are extended from
    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
    where appropriate.  */
@@ -3563,7 +3746,8 @@ pass_optimize_widening_mul::execute (fun
 
 		case PLUS_EXPR:
 		case MINUS_EXPR:
-		  convert_plusminus_to_widen (&gsi, stmt, code);
+		  if (!convert_plusminus_to_widen (&gsi, stmt, code))
+		    match_uaddsub_overflow (&gsi, stmt, code);
 		  break;
 
 		default:;
--- gcc/testsuite/gcc.dg/pr67089-1.c.jj	2015-11-24 18:16:30.817446026 +0100
+++ gcc/testsuite/gcc.dg/pr67089-1.c	2015-11-24 19:03:41.302284096 +0100
@@ -0,0 +1,112 @@
+/* PR target/67089 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+extern void abort (void);
+
+int cnt, d;
+
+__attribute__((noinline, noclone))
+void foo (int x)
+{
+  asm volatile ("" : "+m" (d) : "g" (x) : "memory");
+  cnt++;
+}
+
+#define T(n, type, op, cond) \
+__attribute__((noinline, noclone))	\
+type					\
+f##n (type x, type y)			\
+{					\
+  type r = op;				\
+  cond;					\
+  return r;				\
+}
+
+T (1, unsigned int, x - y, if (r > x) foo (0))
+T (2, unsigned long, x - y, if (r <= x) foo (0))
+T (3, unsigned short, x - y, if (x < r) foo (r))
+T (4, unsigned long long, x - y, if (x >= r) foo (0))
+T (5, unsigned int, x - y, if (r >= x) foo (0))
+T (6, unsigned long, x - y, if (r < x) foo (0))
+T (7, unsigned short, x - y, if (x <= r) foo (r))
+T (8, unsigned long long, x - y, if (d || x > r) foo (0))
+T (9, unsigned int, x - y, if (d || r > x) foo (0))
+T (10, unsigned long, x - y, if (d || r <= x) foo (0))
+T (11, unsigned char, x - y, if (d || x < r) foo (0))
+T (12, unsigned long long, x - y, if (d || x >= r) foo (0))
+T (13, unsigned int, x - y, if (d || r >= x) foo (0))
+T (14, unsigned long, x - y, if (d || r < x) foo (0))
+T (15, unsigned short, x - y, if (d || x <= r) foo (0))
+T (16, unsigned long long, x - y, if (d || x > r) foo (0))
+
+int
+main ()
+{
+  if (f1 (5, 3) != 2U || cnt != 0) abort ();
+  if (f1 (5, 7) != -2U || cnt != 1) abort ();
+  if (f1 (5, 5) != 0U || cnt != 1) abort ();
+  if (f1 (5, 0) != 5U || cnt != 1) abort ();
+  if (f2 (7, 1) != 6UL || cnt != 2) abort ();
+  if (f2 (7, 8) != -1UL || cnt != 2) abort ();
+  if (f2 (9, 9) != 0UL || cnt != 3) abort ();
+  if (f2 (9, 0) != 9UL || cnt != 4) abort ();
+  if (f3 (15, 14) != 1 || cnt != 4) abort ();
+  if (f3 (15, 25) != (unsigned short) -10 || cnt != 5) abort ();
+  if (f3 (15, 15) != 0 || cnt != 5) abort ();
+  if (f3 (15, 0) != 15 || cnt != 5) abort ();
+  if (f4 (9132, 9127) != 5ULL || cnt != 6) abort ();
+  if (f4 (9132, 9137) != -5ULL || cnt != 6) abort ();
+  if (f4 (9132, 9132) != 0 || cnt != 7) abort ();
+  if (f4 (9132, 0) != 9132ULL || cnt != 8) abort ();
+  if (f5 (5, 3) != 2U || cnt != 8) abort ();
+  if (f5 (5, 7) != -2U || cnt != 9) abort ();
+  if (f5 (5, 5) != 0U || cnt != 9) abort ();
+  if (f5 (5, 0) != 5U || cnt != 10) abort ();
+  if (f6 (7, 1) != 6UL || cnt != 11) abort ();
+  if (f6 (7, 8) != -1UL || cnt != 11) abort ();
+  if (f6 (9, 9) != 0UL || cnt != 12) abort ();
+  if (f6 (9, 0) != 9UL || cnt != 12) abort ();
+  if (f7 (15, 14) != 1 || cnt != 12) abort ();
+  if (f7 (15, 25) != (unsigned short) -10 || cnt != 13) abort ();
+  if (f7 (15, 15) != 0 || cnt != 13) abort ();
+  if (f7 (15, 0) != 15 || cnt != 14) abort ();
+  if (f8 (9132, 9127) != 5ULL || cnt != 15) abort ();
+  if (f8 (9132, 9137) != -5ULL || cnt != 15) abort ();
+  if (f8 (9132, 9132) != 0 || cnt != 16) abort ();
+  if (f8 (9132, 0) != 9132ULL || cnt != 16) abort ();
+  cnt = 0;
+  if (f9 (5, 3) != 2U || cnt != 0) abort ();
+  if (f9 (5, 7) != -2U || cnt != 1) abort ();
+  if (f9 (5, 5) != 0U || cnt != 1) abort ();
+  if (f9 (5, 0) != 5U || cnt != 1) abort ();
+  if (f10 (7, 1) != 6UL || cnt != 2) abort ();
+  if (f10 (7, 8) != -1UL || cnt != 2) abort ();
+  if (f10 (9, 9) != 0UL || cnt != 3) abort ();
+  if (f10 (9, 0) != 9UL || cnt != 4) abort ();
+  if (f11 (15, 14) != 1 || cnt != 4) abort ();
+  if (f11 (15, 25) != (unsigned char) -10 || cnt != 5) abort ();
+  if (f11 (15, 15) != 0 || cnt != 5) abort ();
+  if (f11 (15, 0) != 15 || cnt != 5) abort ();
+  if (f12 (9132, 9127) != 5ULL || cnt != 6) abort ();
+  if (f12 (9132, 9137) != -5ULL || cnt != 6) abort ();
+  if (f12 (9132, 9132) != 0 || cnt != 7) abort ();
+  if (f12 (9132, 0) != 9132ULL || cnt != 8) abort ();
+  if (f13 (5, 3) != 2U || cnt != 8) abort ();
+  if (f13 (5, 7) != -2U || cnt != 9) abort ();
+  if (f13 (5, 5) != 0U || cnt != 9) abort ();
+  if (f13 (5, 0) != 5U || cnt != 10) abort ();
+  if (f14 (7, 1) != 6UL || cnt != 11) abort ();
+  if (f14 (7, 8) != -1UL || cnt != 11) abort ();
+  if (f14 (9, 9) != 0UL || cnt != 12) abort ();
+  if (f14 (9, 0) != 9UL || cnt != 12) abort ();
+  if (f15 (15, 14) != 1 || cnt != 12) abort ();
+  if (f15 (15, 25) != (unsigned short) -10 || cnt != 13) abort ();
+  if (f15 (15, 15) != 0 || cnt != 13) abort ();
+  if (f15 (15, 0) != 15 || cnt != 14) abort ();
+  if (f16 (9132, 9127) != 5ULL || cnt != 15) abort ();
+  if (f16 (9132, 9137) != -5ULL || cnt != 15) abort ();
+  if (f16 (9132, 9132) != 0 || cnt != 16) abort ();
+  if (f16 (9132, 0) != 9132ULL || cnt != 16) abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/pr67089-2.c.jj	2015-11-24 18:18:51.804434548 +0100
+++ gcc/testsuite/gcc.dg/pr67089-2.c	2015-11-24 19:03:44.769234628 +0100
@@ -0,0 +1,112 @@
+/* PR target/67089 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+extern void abort (void);
+
+int cnt, d;
+
+__attribute__((noinline, noclone))
+void foo (int x)
+{
+  asm volatile ("" : "+m" (d) : "g" (x) : "memory");
+  cnt++;
+}
+
+#define T(n, type, op, cond) \
+__attribute__((noinline, noclone))	\
+type					\
+f##n (type x, type y)			\
+{					\
+  type r = op;				\
+  cond;					\
+  return r;				\
+}
+
+T (1, unsigned int, x - y, if (r > y) foo (0))
+T (2, unsigned long, x - y, if (r <= y) foo (0))
+T (3, unsigned short, x - y, if (y < r) foo (r))
+T (4, unsigned long long, x - y, if (y >= r) foo (0))
+T (5, unsigned int, x - y, if (r >= y) foo (0))
+T (6, unsigned long, x - y, if (r < y) foo (0))
+T (7, unsigned short, x - y, if (y <= r) foo (r))
+T (8, unsigned long long, x - y, if (d || y > r) foo (0))
+T (9, unsigned int, x - y, if (d || r > y) foo (0))
+T (10, unsigned long, x - y, if (d || r <= y) foo (0))
+T (11, unsigned char, x - y, if (d || y < r) foo (0))
+T (12, unsigned long long, x - y, if (d || y >= r) foo (0))
+T (13, unsigned int, x - y, if (d || r >= y) foo (0))
+T (14, unsigned long, x - y, if (d || r < y) foo (0))
+T (15, unsigned short, x - y, if (d || y <= r) foo (0))
+T (16, unsigned long long, x - y, if (d || y > r) foo (0))
+
+int
+main ()
+{
+  if (f1 (5, 3) != 2U || cnt != 0) abort ();
+  if (f1 (5, 7) != -2U || cnt != 1) abort ();
+  if (f1 (5, 5) != 0U || cnt != 1) abort ();
+  if (f1 (5, 0) != 5U || cnt != 2) abort ();
+  if (f2 (7, 1) != 6UL || cnt != 2) abort ();
+  if (f2 (7, 8) != -1UL || cnt != 2) abort ();
+  if (f2 (9, 9) != 0UL || cnt != 3) abort ();
+  if (f2 (9, 0) != 9UL || cnt != 3) abort ();
+  if (f3 (15, 14) != 1 || cnt != 3) abort ();
+  if (f3 (15, 25) != (unsigned short) -10 || cnt != 4) abort ();
+  if (f3 (15, 15) != 0 || cnt != 4) abort ();
+  if (f3 (15, 0) != 15 || cnt != 5) abort ();
+  if (f4 (9132, 9127) != 5ULL || cnt != 6) abort ();
+  if (f4 (9132, 9137) != -5ULL || cnt != 6) abort ();
+  if (f4 (9132, 9132) != 0 || cnt != 7) abort ();
+  if (f4 (9132, 0) != 9132ULL || cnt != 7) abort ();
+  if (f5 (5, 3) != 2U || cnt != 7) abort ();
+  if (f5 (5, 7) != -2U || cnt != 8) abort ();
+  if (f5 (5, 5) != 0U || cnt != 8) abort ();
+  if (f5 (5, 0) != 5U || cnt != 9) abort ();
+  if (f6 (7, 1) != 6UL || cnt != 9) abort ();
+  if (f6 (7, 8) != -1UL || cnt != 9) abort ();
+  if (f6 (9, 9) != 0UL || cnt != 10) abort ();
+  if (f6 (9, 0) != 9UL || cnt != 10) abort ();
+  if (f7 (15, 14) != 1 || cnt != 10) abort ();
+  if (f7 (15, 25) != (unsigned short) -10 || cnt != 11) abort ();
+  if (f7 (15, 15) != 0 || cnt != 11) abort ();
+  if (f7 (15, 0) != 15 || cnt != 12) abort ();
+  if (f8 (9132, 9127) != 5ULL || cnt != 13) abort ();
+  if (f8 (9132, 9137) != -5ULL || cnt != 13) abort ();
+  if (f8 (9132, 9132) != 0 || cnt != 14) abort ();
+  if (f8 (9132, 0) != 9132ULL || cnt != 14) abort ();
+  cnt = 0;
+  if (f9 (5, 3) != 2U || cnt != 0) abort ();
+  if (f9 (5, 7) != -2U || cnt != 1) abort ();
+  if (f9 (5, 5) != 0U || cnt != 1) abort ();
+  if (f9 (5, 0) != 5U || cnt != 2) abort ();
+  if (f10 (7, 1) != 6UL || cnt != 2) abort ();
+  if (f10 (7, 8) != -1UL || cnt != 2) abort ();
+  if (f10 (9, 9) != 0UL || cnt != 3) abort ();
+  if (f10 (9, 0) != 9UL || cnt != 3) abort ();
+  if (f11 (15, 14) != 1 || cnt != 3) abort ();
+  if (f11 (15, 25) != (unsigned char) -10 || cnt != 4) abort ();
+  if (f11 (15, 15) != 0 || cnt != 4) abort ();
+  if (f11 (15, 0) != 15 || cnt != 5) abort ();
+  if (f12 (9132, 9127) != 5ULL || cnt != 6) abort ();
+  if (f12 (9132, 9137) != -5ULL || cnt != 6) abort ();
+  if (f12 (9132, 9132) != 0 || cnt != 7) abort ();
+  if (f12 (9132, 0) != 9132ULL || cnt != 7) abort ();
+  if (f13 (5, 3) != 2U || cnt != 7) abort ();
+  if (f13 (5, 7) != -2U || cnt != 8) abort ();
+  if (f13 (5, 5) != 0U || cnt != 8) abort ();
+  if (f13 (5, 0) != 5U || cnt != 9) abort ();
+  if (f14 (7, 1) != 6UL || cnt != 9) abort ();
+  if (f14 (7, 8) != -1UL || cnt != 9) abort ();
+  if (f14 (9, 9) != 0UL || cnt != 10) abort ();
+  if (f14 (9, 0) != 9UL || cnt != 10) abort ();
+  if (f15 (15, 14) != 1 || cnt != 10) abort ();
+  if (f15 (15, 25) != (unsigned short) -10 || cnt != 11) abort ();
+  if (f15 (15, 15) != 0 || cnt != 11) abort ();
+  if (f15 (15, 0) != 15 || cnt != 12) abort ();
+  if (f16 (9132, 9127) != 5ULL || cnt != 13) abort ();
+  if (f16 (9132, 9137) != -5ULL || cnt != 13) abort ();
+  if (f16 (9132, 9132) != 0 || cnt != 14) abort ();
+  if (f16 (9132, 0) != 9132ULL || cnt != 14) abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/pr67089-3.c.jj	2015-11-24 18:28:05.788530792 +0100
+++ gcc/testsuite/gcc.dg/pr67089-3.c	2015-11-24 19:03:48.375183177 +0100
@@ -0,0 +1,112 @@
+/* PR target/67089 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+extern void abort (void);
+
+int cnt, d;
+
+__attribute__((noinline, noclone))
+void foo (int x)
+{
+  asm volatile ("" : "+m" (d) : "g" (x) : "memory");
+  cnt++;
+}
+
+#define T(n, type, op, cond) \
+__attribute__((noinline, noclone))	\
+type					\
+f##n (type x, type y)			\
+{					\
+  type r = op;				\
+  cond;					\
+  return r;				\
+}
+
+T (1, unsigned int, x + y, if (r > x) foo (0))
+T (2, unsigned long, x + y, if (r <= x) foo (0))
+T (3, unsigned short, x + y, if (x < r) foo (r))
+T (4, unsigned long long, x + y, if (x >= r) foo (0))
+T (5, unsigned int, x + y, if (r >= x) foo (0))
+T (6, unsigned long, x + y, if (r < x) foo (0))
+T (7, unsigned short, x + y, if (x <= r) foo (r))
+T (8, unsigned long long, x + y, if (d || x > r) foo (0))
+T (9, unsigned int, x + y, if (d || r > x) foo (0))
+T (10, unsigned long, x + y, if (d || r <= x) foo (0))
+T (11, unsigned char, x + y, if (d || x < r) foo (0))
+T (12, unsigned long long, x + y, if (d || x >= r) foo (0))
+T (13, unsigned int, x + y, if (d || r >= x) foo (0))
+T (14, unsigned long, x + y, if (d || r < x) foo (0))
+T (15, unsigned short, x + y, if (d || x <= r) foo (0))
+T (16, unsigned long long, x + y, if (d || x > r) foo (0))
+
+int
+main ()
+{
+  if (f1 (-7U, 0) != -7U || cnt != 0) abort ();
+  if (f1 (-7U, 6) != -1U || cnt != 1) abort ();
+  if (f1 (-7U, 7) != 0U || cnt != 1) abort ();
+  if (f1 (-7U, 8) != 1U || cnt != 1) abort ();
+  if (f2 (-9UL, 0) != -9UL || cnt != 2) abort ();
+  if (f2 (-9UL, 8) != -1UL || cnt != 2) abort ();
+  if (f2 (-9UL, 9) != 0UL || cnt != 3) abort ();
+  if (f2 (-9UL, 10) != 1UL || cnt != 4) abort ();
+  if (f3 (-15, 0) != (unsigned short) -15 || cnt != 4) abort ();
+  if (f3 (-15, 14) != (unsigned short) -1 || cnt != 5) abort ();
+  if (f3 (-15, 15) != 0 || cnt != 5) abort ();
+  if (f3 (-15, 16) != 1 || cnt != 5) abort ();
+  if (f4 (-9132ULL, 0) != -9132ULL || cnt != 6) abort ();
+  if (f4 (-9132ULL, 9131) != -1ULL || cnt != 6) abort ();
+  if (f4 (-9132ULL, 9132) != 0 || cnt != 7) abort ();
+  if (f4 (-9132ULL, 9133) != 1ULL || cnt != 8) abort ();
+  if (f5 (-7U, 0) != -7U || cnt != 9) abort ();
+  if (f5 (-7U, 6) != -1U || cnt != 10) abort ();
+  if (f5 (-7U, 7) != 0U || cnt != 10) abort ();
+  if (f5 (-7U, 8) != 1U || cnt != 10) abort ();
+  if (f6 (-9UL, 0) != -9UL || cnt != 10) abort ();
+  if (f6 (-9UL, 8) != -1UL || cnt != 10) abort ();
+  if (f6 (-9UL, 9) != 0UL || cnt != 11) abort ();
+  if (f6 (-9UL, 10) != 1UL || cnt != 12) abort ();
+  if (f7 (-15, 0) != (unsigned short) -15 || cnt != 13) abort ();
+  if (f7 (-15, 14) != (unsigned short) -1 || cnt != 14) abort ();
+  if (f7 (-15, 15) != 0 || cnt != 14) abort ();
+  if (f7 (-15, 16) != 1 || cnt != 14) abort ();
+  if (f8 (-9132ULL, 0) != -9132ULL || cnt != 14) abort ();
+  if (f8 (-9132ULL, 9131) != -1ULL || cnt != 14) abort ();
+  if (f8 (-9132ULL, 9132) != 0 || cnt != 15) abort ();
+  if (f8 (-9132ULL, 9133) != 1ULL || cnt != 16) abort ();
+  cnt = 0;
+  if (f9 (-7U, 0) != -7U || cnt != 0) abort ();
+  if (f9 (-7U, 6) != -1U || cnt != 1) abort ();
+  if (f9 (-7U, 7) != 0U || cnt != 1) abort ();
+  if (f9 (-7U, 8) != 1U || cnt != 1) abort ();
+  if (f10 (-9UL, 0) != -9UL || cnt != 2) abort ();
+  if (f10 (-9UL, 8) != -1UL || cnt != 2) abort ();
+  if (f10 (-9UL, 9) != 0UL || cnt != 3) abort ();
+  if (f10 (-9UL, 10) != 1UL || cnt != 4) abort ();
+  if (f11 (-15, 0) != (unsigned char) -15 || cnt != 4) abort ();
+  if (f11 (-15, 14) != (unsigned char) -1 || cnt != 5) abort ();
+  if (f11 (-15, 15) != 0 || cnt != 5) abort ();
+  if (f11 (-15, 16) != 1 || cnt != 5) abort ();
+  if (f12 (-9132ULL, 0) != -9132ULL || cnt != 6) abort ();
+  if (f12 (-9132ULL, 9131) != -1ULL || cnt != 6) abort ();
+  if (f12 (-9132ULL, 9132) != 0 || cnt != 7) abort ();
+  if (f12 (-9132ULL, 9133) != 1ULL || cnt != 8) abort ();
+  if (f13 (-7U, 0) != -7U || cnt != 9) abort ();
+  if (f13 (-7U, 6) != -1U || cnt != 10) abort ();
+  if (f13 (-7U, 7) != 0U || cnt != 10) abort ();
+  if (f13 (-7U, 8) != 1U || cnt != 10) abort ();
+  if (f14 (-9UL, 0) != -9UL || cnt != 10) abort ();
+  if (f14 (-9UL, 8) != -1UL || cnt != 10) abort ();
+  if (f14 (-9UL, 9) != 0UL || cnt != 11) abort ();
+  if (f14 (-9UL, 10) != 1UL || cnt != 12) abort ();
+  if (f15 (-15, 0) != (unsigned short) -15 || cnt != 13) abort ();
+  if (f15 (-15, 14) != (unsigned short) -1 || cnt != 14) abort ();
+  if (f15 (-15, 15) != 0 || cnt != 14) abort ();
+  if (f15 (-15, 16) != 1 || cnt != 14) abort ();
+  if (f16 (-9132ULL, 0) != -9132ULL || cnt != 14) abort ();
+  if (f16 (-9132ULL, 9131) != -1ULL || cnt != 14) abort ();
+  if (f16 (-9132ULL, 9132) != 0 || cnt != 15) abort ();
+  if (f16 (-9132ULL, 9133) != 1ULL || cnt != 16) abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/pr67089-4.c.jj	2015-11-24 18:42:04.482600934 +0100
+++ gcc/testsuite/gcc.dg/pr67089-4.c	2015-11-24 19:15:19.155412082 +0100
@@ -0,0 +1,112 @@
+/* PR target/67089 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+extern void abort (void);
+
+int cnt, d;
+
+__attribute__((noinline, noclone))
+void foo (int x)
+{
+  asm volatile ("" : "+m" (d) : "g" (x) : "memory");
+  cnt++;
+}
+
+#define T(n, type, op, cond) \
+__attribute__((noinline, noclone))	\
+type					\
+f##n (type x, type y)			\
+{					\
+  type r = op;				\
+  cond;					\
+  return r;				\
+}
+
+T (1, unsigned int, x + y, if (r > y) foo (0))
+T (2, unsigned long, x + y, if (r <= y) foo (0))
+T (3, unsigned short, x + y, if (y < r) foo (r))
+T (4, unsigned long long, x + y, if (y >= r) foo (0))
+T (5, unsigned int, x + y, if (r >= y) foo (0))
+T (6, unsigned long, x + y, if (r < y) foo (0))
+T (7, unsigned short, x + y, if (y <= r) foo (r))
+T (8, unsigned long long, x + y, if (d || y > r) foo (0))
+T (9, unsigned int, x + y, if (d || r > y) foo (0))
+T (10, unsigned long, x + y, if (d || r <= y) foo (0))
+T (11, unsigned char, x + y, if (d || y < r) foo (0))
+T (12, unsigned long long, x + y, if (d || y >= r) foo (0))
+T (13, unsigned int, x + y, if (d || r >= y) foo (0))
+T (14, unsigned long, x + y, if (d || r < y) foo (0))
+T (15, unsigned short, x + y, if (d || y <= r) foo (0))
+T (16, unsigned long long, x + y, if (d || y > r) foo (0))
+
+int
+main ()
+{
+  if (f1 (-7U, 0) != -7U || cnt != 1) abort ();
+  if (f1 (-7U, 6) != -1U || cnt != 2) abort ();
+  if (f1 (-7U, 7) != 0U || cnt != 2) abort ();
+  if (f1 (-7U, 8) != 1U || cnt != 2) abort ();
+  if (f2 (-9UL, 0) != -9UL || cnt != 2) abort ();
+  if (f2 (-9UL, 8) != -1UL || cnt != 2) abort ();
+  if (f2 (-9UL, 9) != 0UL || cnt != 3) abort ();
+  if (f2 (-9UL, 10) != 1UL || cnt != 4) abort ();
+  if (f3 (-15, 0) != (unsigned short) -15 || cnt != 5) abort ();
+  if (f3 (-15, 14) != (unsigned short) -1 || cnt != 6) abort ();
+  if (f3 (-15, 15) != 0 || cnt != 6) abort ();
+  if (f3 (-15, 16) != 1 || cnt != 6) abort ();
+  if (f4 (-9132ULL, 0) != -9132ULL || cnt != 6) abort ();
+  if (f4 (-9132ULL, 9131) != -1ULL || cnt != 6) abort ();
+  if (f4 (-9132ULL, 9132) != 0 || cnt != 7) abort ();
+  if (f4 (-9132ULL, 9133) != 1ULL || cnt != 8) abort ();
+  if (f5 (-7U, 0) != -7U || cnt != 9) abort ();
+  if (f5 (-7U, 6) != -1U || cnt != 10) abort ();
+  if (f5 (-7U, 7) != 0U || cnt != 10) abort ();
+  if (f5 (-7U, 8) != 1U || cnt != 10) abort ();
+  if (f6 (-9UL, 0) != -9UL || cnt != 10) abort ();
+  if (f6 (-9UL, 8) != -1UL || cnt != 10) abort ();
+  if (f6 (-9UL, 9) != 0UL || cnt != 11) abort ();
+  if (f6 (-9UL, 10) != 1UL || cnt != 12) abort ();
+  if (f7 (-15, 0) != (unsigned short) -15 || cnt != 13) abort ();
+  if (f7 (-15, 14) != (unsigned short) -1 || cnt != 14) abort ();
+  if (f7 (-15, 15) != 0 || cnt != 14) abort ();
+  if (f7 (-15, 16) != 1 || cnt != 14) abort ();
+  if (f8 (-9132ULL, 0) != -9132ULL || cnt != 14) abort ();
+  if (f8 (-9132ULL, 9131) != -1ULL || cnt != 14) abort ();
+  if (f8 (-9132ULL, 9132) != 0 || cnt != 15) abort ();
+  if (f8 (-9132ULL, 9133) != 1ULL || cnt != 16) abort ();
+  cnt = 0;
+  if (f9 (-7U, 0) != -7U || cnt != 1) abort ();
+  if (f9 (-7U, 6) != -1U || cnt != 2) abort ();
+  if (f9 (-7U, 7) != 0U || cnt != 2) abort ();
+  if (f9 (-7U, 8) != 1U || cnt != 2) abort ();
+  if (f10 (-9UL, 0) != -9UL || cnt != 2) abort ();
+  if (f10 (-9UL, 8) != -1UL || cnt != 2) abort ();
+  if (f10 (-9UL, 9) != 0UL || cnt != 3) abort ();
+  if (f10 (-9UL, 10) != 1UL || cnt != 4) abort ();
+  if (f11 (-15, 0) != (unsigned char) -15 || cnt != 5) abort ();
+  if (f11 (-15, 14) != (unsigned char) -1 || cnt != 6) abort ();
+  if (f11 (-15, 15) != 0 || cnt != 6) abort ();
+  if (f11 (-15, 16) != 1 || cnt != 6) abort ();
+  if (f12 (-9132ULL, 0) != -9132ULL || cnt != 6) abort ();
+  if (f12 (-9132ULL, 9131) != -1ULL || cnt != 6) abort ();
+  if (f12 (-9132ULL, 9132) != 0 || cnt != 7) abort ();
+  if (f12 (-9132ULL, 9133) != 1ULL || cnt != 8) abort ();
+  if (f13 (-7U, 0) != -7U || cnt != 9) abort ();
+  if (f13 (-7U, 6) != -1U || cnt != 10) abort ();
+  if (f13 (-7U, 7) != 0U || cnt != 10) abort ();
+  if (f13 (-7U, 8) != 1U || cnt != 10) abort ();
+  if (f14 (-9UL, 0) != -9UL || cnt != 10) abort ();
+  if (f14 (-9UL, 8) != -1UL || cnt != 10) abort ();
+  if (f14 (-9UL, 9) != 0UL || cnt != 11) abort ();
+  if (f14 (-9UL, 10) != 1UL || cnt != 12) abort ();
+  if (f15 (-15, 0) != (unsigned short) -15 || cnt != 13) abort ();
+  if (f15 (-15, 14) != (unsigned short) -1 || cnt != 14) abort ();
+  if (f15 (-15, 15) != 0 || cnt != 14) abort ();
+  if (f15 (-15, 16) != 1 || cnt != 14) abort ();
+  if (f16 (-9132ULL, 0) != -9132ULL || cnt != 14) abort ();
+  if (f16 (-9132ULL, 9131) != -1ULL || cnt != 14) abort ();
+  if (f16 (-9132ULL, 9132) != 0 || cnt != 15) abort ();
+  if (f16 (-9132ULL, 9133) != 1ULL || cnt != 16) abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/pr67089-5.c.jj	2015-11-24 19:03:19.571594157 +0100
+++ gcc/testsuite/gcc.dg/pr67089-5.c	2015-11-24 19:31:29.707645365 +0100
@@ -0,0 +1,82 @@
+/* PR target/67089 */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-loop-if-convert" } */
+
+extern void abort (void);
+
+int cnt;
+unsigned int a[16], b[16], c[16];
+
+__attribute__((noinline, noclone))
+void foo (int x)
+{
+  asm volatile ("" : : "g" (x) : "memory");
+  cnt++;
+}
+
+__attribute__((noinline, noclone)) void
+f0 (unsigned int x)
+{
+  for (int i = 0; i < 16; i++)
+    {
+      unsigned int r = x - a[i];
+      b[i] = r;
+      c[i] = r > x ? 7 : x;
+    }
+}
+
+#define T(n, type, op, cond) \
+__attribute__((noinline, noclone))	\
+type					\
+f##n (type x)				\
+{					\
+  type r = op;				\
+  cond;					\
+  return r;				\
+}
+
+T (1, unsigned int, x - 2U, if (r > x) foo (0))
+T (2, unsigned long, x - 2U, if (r <= x) foo (0))
+T (3, unsigned short, 2U - x, if (r > 2U) foo (0))
+T (4, unsigned char, 2U - x, if (r <= 2U) foo (0))
+T (5, unsigned int, x + -2U, if (r > x) foo (0))
+T (6, unsigned long, x + -2UL, if (r <= x) foo (0))
+T (7, unsigned short, (unsigned short) -2 + x, if (r > (unsigned short) -2) foo (0))
+T (8, unsigned char, (unsigned char) -2 + x, if (r <= (unsigned char) -2) foo (0))
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 16; i++)
+    a[i] = i - 7;
+  f0 (5);
+  for (i = 0; i < 16; i++)
+    if (b[i] != 12U - i || c[i] != 7 - 2 * (i >= 7 && i < 13))
+      abort ();
+  if (f1 (3) != 1 || cnt != 0) abort ();
+  if (f1 (2) != 0 || cnt != 0) abort ();
+  if (f1 (1) != -1U || cnt != 1) abort ();
+  if (f2 (3) != 1 || cnt != 2) abort ();
+  if (f2 (2) != 0 || cnt != 3) abort ();
+  if (f2 (1) != -1UL || cnt != 3) abort ();
+  if (f3 (3) != (unsigned short) -1 || cnt != 4) abort ();
+  if (f3 (2) != 0 || cnt != 4) abort ();
+  if (f3 (1) != 1 || cnt != 4) abort ();
+  if (f4 (3) != (unsigned char) -1 || cnt != 4) abort ();
+  if (f4 (2) != 0 || cnt != 5) abort ();
+  if (f4 (1) != 1 || cnt != 6) abort ();
+  if (f5 (3) != 1 || cnt != 6) abort ();
+  if (f5 (2) != 0 || cnt != 6) abort ();
+  if (f5 (1) != -1U || cnt != 7) abort ();
+  if (f6 (3) != 1 || cnt != 8) abort ();
+  if (f6 (2) != 0 || cnt != 9) abort ();
+  if (f6 (1) != -1UL || cnt != 9) abort ();
+  if (f7 (3) != 1 || cnt != 9) abort ();
+  if (f7 (2) != 0 || cnt != 9) abort ();
+  if (f7 (1) != (unsigned short) -1 || cnt != 10) abort ();
+  if (f8 (3) != 1 || cnt != 11) abort ();
+  if (f8 (2) != 0 || cnt != 12) abort ();
+  if (f8 (1) != (unsigned char) -1 || cnt != 12) abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/pr67089-6.c.jj	2015-11-24 19:16:02.898794422 +0100
+++ gcc/testsuite/gcc.dg/pr67089-6.c	2015-11-24 19:32:09.928077054 +0100
@@ -0,0 +1,62 @@
+/* PR target/67089 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-if-convert -fdump-tree-widening_mul" } */
+
+extern void abort (void);
+
+int cnt;
+unsigned int a[16], b[16], c[16], d;
+void foo (int x);
+
+__attribute__((noinline, noclone)) void
+f0 (unsigned int x)
+{
+  for (int i = 0; i < 16; i++)
+    {
+      unsigned int r = x - a[i];
+      b[i] = r;
+      c[i] = r > x ? 7 : x;
+    }
+}
+
+#define T(n, type, op, cond) \
+__attribute__((noinline, noclone))	\
+type					\
+f##n (type x, type y)			\
+{					\
+  type r = op;				\
+  cond;					\
+  return r;				\
+}
+
+T (1, unsigned int, x - y, if (r > x) foo (0))
+T (2, unsigned long, x - y, if (r <= x) foo (0))
+T (3, unsigned short, x - y, if (x < r) foo (r))
+T (4, unsigned long long, x - y, if (x >= r) foo (0))
+T (5, unsigned int, x - y, if (d || r > x) foo (0))
+T (6, unsigned long, x - y, if (d || r <= x) foo (0))
+T (7, unsigned char, x - y, if (d || x < r) foo (0))
+T (8, unsigned long long, x - y, if (d || x >= r) foo (0))
+T (9, unsigned int, x + y, if (r >= x) foo (0))
+T (10, unsigned long, x + y, if (r < x) foo (0))
+T (11, unsigned short, x + y, if (x <= r) foo (r))
+T (12, unsigned long long, x + y, if (d || x > r) foo (0))
+T (13, unsigned int, x + y, if (d || r >= x) foo (0))
+T (14, unsigned long, x + y, if (d || r < x) foo (0))
+T (15, unsigned short, x + y, if (d || x <= r) foo (0))
+T (16, unsigned long long, x + y, if (d || x > r) foo (0))
+T (17, unsigned int, x + y, if (r >= y) foo (0))
+T (18, unsigned long, x + y, if (r < y) foo (0))
+T (19, unsigned short, x + y, if (y <= r) foo (r))
+T (20, unsigned long long, x + y, if (d || y > r) foo (0))
+T (21, unsigned int, x + y, if (d || r >= y) foo (0))
+T (22, unsigned long, x + y, if (d || r < y) foo (0))
+T (23, unsigned short, x + y, if (d || y <= r) foo (0))
+T (24, unsigned long long, x + y, if (d || y > r) foo (0))
+T (25, unsigned short, 2U - x, if (r > 2U) foo (0))
+T (26, unsigned char, 2U - x, if (r <= 2U) foo (0))
+
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 16 "widening_mul" { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 11 "widening_mul" { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 12 "widening_mul" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 9 "widening_mul" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
--- gcc/testsuite/gcc.dg/pr67089-7.c.jj	2015-11-24 19:32:30.332788737 +0100
+++ gcc/testsuite/gcc.dg/pr67089-7.c	2015-11-24 19:37:12.304804499 +0100
@@ -0,0 +1,62 @@
+/* PR target/67089 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-if-convert -fdump-tree-widening_mul" } */
+
+extern void abort (void);
+
+int cnt, d;
+void foo (int x);
+
+#define T(n, type, op, cond) \
+__attribute__((noinline, noclone))	\
+type					\
+f##n (type x, type y)			\
+{					\
+  type r = op;				\
+  cond;					\
+  return r;				\
+}
+
+T (1, unsigned int, x - y, if (r >= x) foo (0))
+T (2, unsigned long, x - y, if (r < x) foo (0))
+T (3, unsigned short, x - y, if (x <= r) foo (r))
+T (4, unsigned long long, x - y, if (d || x > r) foo (0))
+T (5, unsigned int, x - y, if (d || r >= x) foo (0))
+T (6, unsigned long, x - y, if (d || r < x) foo (0))
+T (7, unsigned short, x - y, if (d || x <= r) foo (0))
+T (8, unsigned long long, x - y, if (d || x > r) foo (0))
+T (9, unsigned int, x - y, if (r > y) foo (0))
+T (10, unsigned long, x - y, if (r <= y) foo (0))
+T (11, unsigned short, x - y, if (y < r) foo (r))
+T (12, unsigned long long, x - y, if (y >= r) foo (0))
+T (13, unsigned int, x - y, if (r >= y) foo (0))
+T (14, unsigned long, x - y, if (r < y) foo (0))
+T (15, unsigned short, x - y, if (y <= r) foo (r))
+T (16, unsigned long long, x - y, if (d || y > r) foo (0))
+T (17, unsigned int, x - y, if (d || r > y) foo (0))
+T (18, unsigned long, x - y, if (d || r <= y) foo (0))
+T (19, unsigned char, x - y, if (d || y < r) foo (0))
+T (20, unsigned long long, x - y, if (d || y >= r) foo (0))
+T (21, unsigned int, x - y, if (d || r >= y) foo (0))
+T (22, unsigned long, x - y, if (d || r < y) foo (0))
+T (23, unsigned short, x - y, if (d || y <= r) foo (0))
+T (24, unsigned long long, x - y, if (d || y > r) foo (0))
+T (25, unsigned int, x + y, if (r > x) foo (0))
+T (26, unsigned long, x + y, if (r <= x) foo (0))
+T (27, unsigned short, x + y, if (x < r) foo (r))
+T (28, unsigned long long, x + y, if (x >= r) foo (0))
+T (29, unsigned int, x + y, if (d || r > x) foo (0))
+T (30, unsigned long, x + y, if (d || r <= x) foo (0))
+T (31, unsigned char, x + y, if (d || x < r) foo (0))
+T (32, unsigned long long, x + y, if (d || x >= r) foo (0))
+T (33, unsigned int, x + y, if (r > y) foo (0))
+T (34, unsigned long, x + y, if (r <= y) foo (0))
+T (35, unsigned short, x + y, if (y < r) foo (r))
+T (36, unsigned long long, x + y, if (y >= r) foo (0))
+T (37, unsigned int, x + y, if (d || r > y) foo (0))
+T (38, unsigned long, x + y, if (d || r <= y) foo (0))
+T (39, unsigned char, x + y, if (d || y < r) foo (0))
+T (40, unsigned long long, x + y, if (d || y >= r) foo (0))
+
+/* { dg-final { scan-tree-dump-not "ADD_OVERFLOW" "widening_mul" } } */
+/* { dg-final { scan-tree-dump-not "SUB_OVERFLOW" "widening_mul" } } */

	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-24 20:55 [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089) Jakub Jelinek
@ 2015-11-25  8:11 ` Marc Glisse
  2015-11-25  8:40   ` Jakub Jelinek
  2015-11-25  8:45 ` Richard Biener
  1 sibling, 1 reply; 13+ messages in thread
From: Marc Glisse @ 2015-11-25  8:11 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Biener, gcc-patches, Richard Henderson

On Tue, 24 Nov 2015, Jakub Jelinek wrote:

> This is the GIMPLE side of Richard's i?86 uadd/usub overflow
> testing improvements.  If unsigned addition or subtraction
> result is used both normally and in a GIMPLE_COND/COND_EXPR/tcc_comparison
> that tests if unsigned overflow happened, the patch replaces it shortly
> before expansion with {ADD,SUB}_OVERFLOW, so that RTL expansion can generate
> better code on it.

If I test a+b<a and don't use a+b anywhere else, don't we also want to use 
the OVERFLOW things so we can expand to test the carry flag? That is, I am 
not convinced we want to punt on has_single_use for add_overflow. For 
sub_overflow with a single use of y-z, I guess y-z>y should become z>y, 
and going through a rewrite with sub_overflow neither helps nor hinders 
that. Actually, writing z>y is something the user is not unlikely to have 
done himself, and walking through the uses of y or z should not be hard, 
so I guess it could make sense to rewrite y-z>y to z>y always in match.pd 
and only look for the second form in math-opts.

I was thinking more match.pd to transform a+b<a and sccvn to somehow CSE 
a+b with add_overflow(a,b), but your patch seems to work well with simpler 
code, that's cool :-)

And it shouldn't be too hard to add a few more later, to detect widening 
operations that are only used for overflow testing, although the form of 
such tests is much less universal among users.

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-25  8:11 ` Marc Glisse
@ 2015-11-25  8:40   ` Jakub Jelinek
  2015-11-25  8:48     ` Richard Biener
  2015-11-25  8:59     ` Marc Glisse
  0 siblings, 2 replies; 13+ messages in thread
From: Jakub Jelinek @ 2015-11-25  8:40 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener, Richard Henderson

On Wed, Nov 25, 2015 at 08:56:45AM +0100, Marc Glisse wrote:
> >This is the GIMPLE side of Richard's i?86 uadd/usub overflow
> >testing improvements.  If unsigned addition or subtraction
> >result is used both normally and in a GIMPLE_COND/COND_EXPR/tcc_comparison
> >that tests if unsigned overflow happened, the patch replaces it shortly
> >before expansion with {ADD,SUB}_OVERFLOW, so that RTL expansion can generate
> >better code on it.
> 
> If I test a+b<a and don't use a+b anywhere else, don't we also want to use
> the OVERFLOW things so we can expand to test the carry flag? That is, I am
> not convinced we want to punt on has_single_use for add_overflow. For
> sub_overflow with a single use of y-z, I guess y-z>y should become z>y, and
> going through a rewrite with sub_overflow neither helps nor hinders that.
> Actually, writing z>y is something the user is not unlikely to have done
> himself, and walking through the uses of y or z should not be hard, so I
> guess it could make sense to rewrite y-z>y to z>y always in match.pd and
> only look for the second form in math-opts.

Incremental diff for also handling the single use case if it is overflow
check is below.  But we already generate good code without it for the
x+y<x or x+y<y cases (and they aren't really problematic, as they are single
use), and while it is true that for x-y>x case the incremental patch below
improves the generated code right now, as you said it is better to rewrite
those as y>x and as it is a single use, it is easier to do it in match.pd.
So, I'd prefer to add that transformation and not use {ADD,SUB}_OVERFLOW
for those cases, because we get good enough code without increasing the IL
size, eating more memory etc.

> I was thinking more match.pd to transform a+b<a and sccvn to somehow CSE a+b
> with add_overflow(a,b), but your patch seems to work well with simpler code,
> that's cool :-)
> 
> And it shouldn't be too hard to add a few more later, to detect widening
> operations that are only used for overflow testing, although the form of
> such tests is much less universal among users.

--- gcc/tree-ssa-math-opts.c.jj	2015-11-24 17:00:10.000000000 +0100
+++ gcc/tree-ssa-math-opts.c	2015-11-25 09:25:31.781087597 +0100
@@ -3586,7 +3586,6 @@ match_uaddsub_overflow (gimple_stmt_iter
   tree type = TREE_TYPE (lhs);
   use_operand_p use_p;
   imm_use_iterator iter;
-  bool use_seen = false;
   bool ovf_use_seen = false;
   gimple *use_stmt;
 
@@ -3594,7 +3593,6 @@ match_uaddsub_overflow (gimple_stmt_iter
   if (!INTEGRAL_TYPE_P (type)
       || !TYPE_UNSIGNED (type)
       || has_zero_uses (lhs)
-      || has_single_use (lhs)
       || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
 			TYPE_MODE (type)) == CODE_FOR_nothing)
     return false;
@@ -3606,14 +3604,13 @@ match_uaddsub_overflow (gimple_stmt_iter
 	continue;
 
       if (uaddsub_overflow_check_p (stmt, use_stmt))
-	ovf_use_seen = true;
-      else
-	use_seen = true;
-      if (ovf_use_seen && use_seen)
-	break;
+	{
+	  ovf_use_seen = true;
+	  break;
+	}
     }
 
-  if (!ovf_use_seen || !use_seen)
+  if (!ovf_use_seen)
     return false;
 
   tree ctype = build_complex_type (type);


	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-24 20:55 [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089) Jakub Jelinek
  2015-11-25  8:11 ` Marc Glisse
@ 2015-11-25  8:45 ` Richard Biener
  1 sibling, 0 replies; 13+ messages in thread
From: Richard Biener @ 2015-11-25  8:45 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Richard Henderson

On Tue, 24 Nov 2015, Jakub Jelinek wrote:

> Hi!
> 
> This is the GIMPLE side of Richard's i?86 uadd/usub overflow
> testing improvements.  If unsigned addition or subtraction
> result is used both normally and in a GIMPLE_COND/COND_EXPR/tcc_comparison
> that tests if unsigned overflow happened, the patch replaces it shortly
> before expansion with {ADD,SUB}_OVERFLOW, so that RTL expansion can generate
> better code on it.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2015-11-24  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR target/67089
> 	* tree-ssa-math-opts.c (uaddsub_overflow_check_p,
> 	match_uaddsub_overflow): New functions.
> 	(pass_optimize_widening_mul::execute): Call match_uaddsub_overflow.
> 
> 	* gcc.dg/pr67089-1.c: New test.
> 	* gcc.dg/pr67089-2.c: New test.
> 	* gcc.dg/pr67089-3.c: New test.
> 	* gcc.dg/pr67089-4.c: New test.
> 	* gcc.dg/pr67089-5.c: New test.
> 	* gcc.dg/pr67089-6.c: New test.
> 	* gcc.dg/pr67089-7.c: New test.
> 
> --- gcc/tree-ssa-math-opts.c.jj	2015-11-18 11:19:23.000000000 +0100
> +++ gcc/tree-ssa-math-opts.c	2015-11-24 17:00:10.825900958 +0100
> @@ -3491,6 +3491,189 @@ convert_mult_to_fma (gimple *mul_stmt, t
>    return true;
>  }
>  
> +
> +/* Helper function of match_uaddsub_overflow.  Return 1
> +   if USE_STMT is unsigned overflow check ovf != 0 for
> +   STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
> +   and 0 otherwise.  */
> +
> +static int
> +uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
> +{
> +  enum tree_code ccode = ERROR_MARK;
> +  tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
> +  if (gimple_code (use_stmt) == GIMPLE_COND)
> +    {
> +      ccode = gimple_cond_code (use_stmt);
> +      crhs1 = gimple_cond_lhs (use_stmt);
> +      crhs2 = gimple_cond_rhs (use_stmt);
> +    }
> +  else if (is_gimple_assign (use_stmt))
> +    {
> +      if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
> +	{
> +	  ccode = gimple_assign_rhs_code (use_stmt);
> +	  crhs1 = gimple_assign_rhs1 (use_stmt);
> +	  crhs2 = gimple_assign_rhs2 (use_stmt);
> +	}
> +      else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
> +	{
> +	  tree cond = gimple_assign_rhs1 (use_stmt);
> +	  if (COMPARISON_CLASS_P (cond))
> +	    {
> +	      ccode = TREE_CODE (cond);
> +	      crhs1 = TREE_OPERAND (cond, 0);
> +	      crhs2 = TREE_OPERAND (cond, 1);
> +	    }
> +	  else
> +	    return 0;
> +	}
> +      else
> +	return 0;
> +    }
> +  else
> +    return 0;
> +
> +  if (TREE_CODE_CLASS (ccode) != tcc_comparison)
> +    return 0;
> +
> +  enum tree_code code = gimple_assign_rhs_code (stmt);
> +  tree lhs = gimple_assign_lhs (stmt);
> +  tree rhs1 = gimple_assign_rhs1 (stmt);
> +  tree rhs2 = gimple_assign_rhs2 (stmt);
> +
> +  switch (ccode)
> +    {
> +    case GT_EXPR:
> +    case LE_EXPR:
> +      /* r = a - b; r > a or r <= a
> +	 r = a + b; a > r or a <= r or b > r or b <= r.  */
> +      if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
> +	  || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
> +	      && crhs2 == lhs))
> +	return ccode == GT_EXPR ? 1 : -1;
> +      break;
> +    case LT_EXPR:
> +    case GE_EXPR:
> +      /* r = a - b; a < r or a >= r
> +	 r = a + b; r < a or r >= a or r < b or r >= b.  */
> +      if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
> +	  || (code == PLUS_EXPR && crhs1 == lhs
> +	      && (crhs2 == rhs1 || crhs2 == rhs2)))
> +	return ccode == LT_EXPR ? 1 : -1;
> +      break;
> +    default:
> +      break;
> +    }
> +  return 0;
> +}
> +
> +/* Recognize for unsigned x
> +   x = y - z;
> +   if (x > y)
> +   where there are other uses of x and replace it with
> +   _7 = SUB_OVERFLOW (y, z);
> +   x = REALPART_EXPR <_7>;
> +   _8 = IMAGPART_EXPR <_7>;
> +   if (_8)
> +   and similarly for addition.  */
> +
> +static bool
> +match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
> +			enum tree_code code)
> +{
> +  tree lhs = gimple_assign_lhs (stmt);
> +  tree type = TREE_TYPE (lhs);
> +  use_operand_p use_p;
> +  imm_use_iterator iter;
> +  bool use_seen = false;
> +  bool ovf_use_seen = false;
> +  gimple *use_stmt;
> +
> +  gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
> +  if (!INTEGRAL_TYPE_P (type)
> +      || !TYPE_UNSIGNED (type)
> +      || has_zero_uses (lhs)
> +      || has_single_use (lhs)
> +      || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
> +			TYPE_MODE (type)) == CODE_FOR_nothing)
> +    return false;
> +
> +  FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
> +    {
> +      use_stmt = USE_STMT (use_p);
> +      if (is_gimple_debug (use_stmt))
> +	continue;
> +
> +      if (uaddsub_overflow_check_p (stmt, use_stmt))
> +	ovf_use_seen = true;
> +      else
> +	use_seen = true;
> +      if (ovf_use_seen && use_seen)
> +	break;
> +    }
> +
> +  if (!ovf_use_seen || !use_seen)
> +    return false;
> +
> +  tree ctype = build_complex_type (type);
> +  tree rhs1 = gimple_assign_rhs1 (stmt);
> +  tree rhs2 = gimple_assign_rhs2 (stmt);
> +  gcall *g = gimple_build_call_internal (code == PLUS_EXPR
> +					 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
> +					 2, rhs1, rhs2);
> +  tree ctmp = make_ssa_name (ctype);
> +  gimple_call_set_lhs (g, ctmp);
> +  gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +  gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
> +				     build1 (REALPART_EXPR, type, ctmp));
> +  gsi_replace (gsi, g2, true);
> +  tree ovf = make_ssa_name (type);
> +  g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
> +			    build1 (IMAGPART_EXPR, type, ctmp));
> +  gsi_insert_after (gsi, g2, GSI_NEW_STMT);
> +
> +  FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
> +    {
> +      if (is_gimple_debug (use_stmt))
> +	continue;
> +
> +      int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
> +      if (ovf_use == 0)
> +	continue;
> +      if (gimple_code (use_stmt) == GIMPLE_COND)
> +	{
> +	  gcond *cond_stmt = as_a <gcond *> (use_stmt);
> +	  gimple_cond_set_lhs (cond_stmt, ovf);
> +	  gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
> +	  gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
> +	}
> +      else
> +	{
> +	  gcc_checking_assert (is_gimple_assign (use_stmt));
> +	  if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
> +	    {
> +	      gimple_assign_set_rhs1 (use_stmt, ovf);
> +	      gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
> +	      gimple_assign_set_rhs_code (use_stmt,
> +					  ovf_use == 1 ? NE_EXPR : EQ_EXPR);
> +	    }
> +	  else
> +	    {
> +	      gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
> +				   == COND_EXPR);
> +	      tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
> +				  boolean_type_node, ovf,
> +				  build_int_cst (type, 0));
> +	      gimple_assign_set_rhs1 (use_stmt, cond);
> +	    }
> +	}
> +      update_stmt (use_stmt);
> +    }
> +  return true;
> +}
> +
> +
>  /* Find integer multiplications where the operands are extended from
>     smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
>     where appropriate.  */
> @@ -3563,7 +3746,8 @@ pass_optimize_widening_mul::execute (fun
>  
>  		case PLUS_EXPR:
>  		case MINUS_EXPR:
> -		  convert_plusminus_to_widen (&gsi, stmt, code);
> +		  if (!convert_plusminus_to_widen (&gsi, stmt, code))
> +		    match_uaddsub_overflow (&gsi, stmt, code);
>  		  break;
>  
>  		default:;
> --- gcc/testsuite/gcc.dg/pr67089-1.c.jj	2015-11-24 18:16:30.817446026 +0100
> +++ gcc/testsuite/gcc.dg/pr67089-1.c	2015-11-24 19:03:41.302284096 +0100
> @@ -0,0 +1,112 @@
> +/* PR target/67089 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +extern void abort (void);
> +
> +int cnt, d;
> +
> +__attribute__((noinline, noclone))
> +void foo (int x)
> +{
> +  asm volatile ("" : "+m" (d) : "g" (x) : "memory");
> +  cnt++;
> +}
> +
> +#define T(n, type, op, cond) \
> +__attribute__((noinline, noclone))	\
> +type					\
> +f##n (type x, type y)			\
> +{					\
> +  type r = op;				\
> +  cond;					\
> +  return r;				\
> +}
> +
> +T (1, unsigned int, x - y, if (r > x) foo (0))
> +T (2, unsigned long, x - y, if (r <= x) foo (0))
> +T (3, unsigned short, x - y, if (x < r) foo (r))
> +T (4, unsigned long long, x - y, if (x >= r) foo (0))
> +T (5, unsigned int, x - y, if (r >= x) foo (0))
> +T (6, unsigned long, x - y, if (r < x) foo (0))
> +T (7, unsigned short, x - y, if (x <= r) foo (r))
> +T (8, unsigned long long, x - y, if (d || x > r) foo (0))
> +T (9, unsigned int, x - y, if (d || r > x) foo (0))
> +T (10, unsigned long, x - y, if (d || r <= x) foo (0))
> +T (11, unsigned char, x - y, if (d || x < r) foo (0))
> +T (12, unsigned long long, x - y, if (d || x >= r) foo (0))
> +T (13, unsigned int, x - y, if (d || r >= x) foo (0))
> +T (14, unsigned long, x - y, if (d || r < x) foo (0))
> +T (15, unsigned short, x - y, if (d || x <= r) foo (0))
> +T (16, unsigned long long, x - y, if (d || x > r) foo (0))
> +
> +int
> +main ()
> +{
> +  if (f1 (5, 3) != 2U || cnt != 0) abort ();
> +  if (f1 (5, 7) != -2U || cnt != 1) abort ();
> +  if (f1 (5, 5) != 0U || cnt != 1) abort ();
> +  if (f1 (5, 0) != 5U || cnt != 1) abort ();
> +  if (f2 (7, 1) != 6UL || cnt != 2) abort ();
> +  if (f2 (7, 8) != -1UL || cnt != 2) abort ();
> +  if (f2 (9, 9) != 0UL || cnt != 3) abort ();
> +  if (f2 (9, 0) != 9UL || cnt != 4) abort ();
> +  if (f3 (15, 14) != 1 || cnt != 4) abort ();
> +  if (f3 (15, 25) != (unsigned short) -10 || cnt != 5) abort ();
> +  if (f3 (15, 15) != 0 || cnt != 5) abort ();
> +  if (f3 (15, 0) != 15 || cnt != 5) abort ();
> +  if (f4 (9132, 9127) != 5ULL || cnt != 6) abort ();
> +  if (f4 (9132, 9137) != -5ULL || cnt != 6) abort ();
> +  if (f4 (9132, 9132) != 0 || cnt != 7) abort ();
> +  if (f4 (9132, 0) != 9132ULL || cnt != 8) abort ();
> +  if (f5 (5, 3) != 2U || cnt != 8) abort ();
> +  if (f5 (5, 7) != -2U || cnt != 9) abort ();
> +  if (f5 (5, 5) != 0U || cnt != 9) abort ();
> +  if (f5 (5, 0) != 5U || cnt != 10) abort ();
> +  if (f6 (7, 1) != 6UL || cnt != 11) abort ();
> +  if (f6 (7, 8) != -1UL || cnt != 11) abort ();
> +  if (f6 (9, 9) != 0UL || cnt != 12) abort ();
> +  if (f6 (9, 0) != 9UL || cnt != 12) abort ();
> +  if (f7 (15, 14) != 1 || cnt != 12) abort ();
> +  if (f7 (15, 25) != (unsigned short) -10 || cnt != 13) abort ();
> +  if (f7 (15, 15) != 0 || cnt != 13) abort ();
> +  if (f7 (15, 0) != 15 || cnt != 14) abort ();
> +  if (f8 (9132, 9127) != 5ULL || cnt != 15) abort ();
> +  if (f8 (9132, 9137) != -5ULL || cnt != 15) abort ();
> +  if (f8 (9132, 9132) != 0 || cnt != 16) abort ();
> +  if (f8 (9132, 0) != 9132ULL || cnt != 16) abort ();
> +  cnt = 0;
> +  if (f9 (5, 3) != 2U || cnt != 0) abort ();
> +  if (f9 (5, 7) != -2U || cnt != 1) abort ();
> +  if (f9 (5, 5) != 0U || cnt != 1) abort ();
> +  if (f9 (5, 0) != 5U || cnt != 1) abort ();
> +  if (f10 (7, 1) != 6UL || cnt != 2) abort ();
> +  if (f10 (7, 8) != -1UL || cnt != 2) abort ();
> +  if (f10 (9, 9) != 0UL || cnt != 3) abort ();
> +  if (f10 (9, 0) != 9UL || cnt != 4) abort ();
> +  if (f11 (15, 14) != 1 || cnt != 4) abort ();
> +  if (f11 (15, 25) != (unsigned char) -10 || cnt != 5) abort ();
> +  if (f11 (15, 15) != 0 || cnt != 5) abort ();
> +  if (f11 (15, 0) != 15 || cnt != 5) abort ();
> +  if (f12 (9132, 9127) != 5ULL || cnt != 6) abort ();
> +  if (f12 (9132, 9137) != -5ULL || cnt != 6) abort ();
> +  if (f12 (9132, 9132) != 0 || cnt != 7) abort ();
> +  if (f12 (9132, 0) != 9132ULL || cnt != 8) abort ();
> +  if (f13 (5, 3) != 2U || cnt != 8) abort ();
> +  if (f13 (5, 7) != -2U || cnt != 9) abort ();
> +  if (f13 (5, 5) != 0U || cnt != 9) abort ();
> +  if (f13 (5, 0) != 5U || cnt != 10) abort ();
> +  if (f14 (7, 1) != 6UL || cnt != 11) abort ();
> +  if (f14 (7, 8) != -1UL || cnt != 11) abort ();
> +  if (f14 (9, 9) != 0UL || cnt != 12) abort ();
> +  if (f14 (9, 0) != 9UL || cnt != 12) abort ();
> +  if (f15 (15, 14) != 1 || cnt != 12) abort ();
> +  if (f15 (15, 25) != (unsigned short) -10 || cnt != 13) abort ();
> +  if (f15 (15, 15) != 0 || cnt != 13) abort ();
> +  if (f15 (15, 0) != 15 || cnt != 14) abort ();
> +  if (f16 (9132, 9127) != 5ULL || cnt != 15) abort ();
> +  if (f16 (9132, 9137) != -5ULL || cnt != 15) abort ();
> +  if (f16 (9132, 9132) != 0 || cnt != 16) abort ();
> +  if (f16 (9132, 0) != 9132ULL || cnt != 16) abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr67089-2.c.jj	2015-11-24 18:18:51.804434548 +0100
> +++ gcc/testsuite/gcc.dg/pr67089-2.c	2015-11-24 19:03:44.769234628 +0100
> @@ -0,0 +1,112 @@
> +/* PR target/67089 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +extern void abort (void);
> +
> +int cnt, d;
> +
> +__attribute__((noinline, noclone))
> +void foo (int x)
> +{
> +  asm volatile ("" : "+m" (d) : "g" (x) : "memory");
> +  cnt++;
> +}
> +
> +#define T(n, type, op, cond) \
> +__attribute__((noinline, noclone))	\
> +type					\
> +f##n (type x, type y)			\
> +{					\
> +  type r = op;				\
> +  cond;					\
> +  return r;				\
> +}
> +
> +T (1, unsigned int, x - y, if (r > y) foo (0))
> +T (2, unsigned long, x - y, if (r <= y) foo (0))
> +T (3, unsigned short, x - y, if (y < r) foo (r))
> +T (4, unsigned long long, x - y, if (y >= r) foo (0))
> +T (5, unsigned int, x - y, if (r >= y) foo (0))
> +T (6, unsigned long, x - y, if (r < y) foo (0))
> +T (7, unsigned short, x - y, if (y <= r) foo (r))
> +T (8, unsigned long long, x - y, if (d || y > r) foo (0))
> +T (9, unsigned int, x - y, if (d || r > y) foo (0))
> +T (10, unsigned long, x - y, if (d || r <= y) foo (0))
> +T (11, unsigned char, x - y, if (d || y < r) foo (0))
> +T (12, unsigned long long, x - y, if (d || y >= r) foo (0))
> +T (13, unsigned int, x - y, if (d || r >= y) foo (0))
> +T (14, unsigned long, x - y, if (d || r < y) foo (0))
> +T (15, unsigned short, x - y, if (d || y <= r) foo (0))
> +T (16, unsigned long long, x - y, if (d || y > r) foo (0))
> +
> +int
> +main ()
> +{
> +  if (f1 (5, 3) != 2U || cnt != 0) abort ();
> +  if (f1 (5, 7) != -2U || cnt != 1) abort ();
> +  if (f1 (5, 5) != 0U || cnt != 1) abort ();
> +  if (f1 (5, 0) != 5U || cnt != 2) abort ();
> +  if (f2 (7, 1) != 6UL || cnt != 2) abort ();
> +  if (f2 (7, 8) != -1UL || cnt != 2) abort ();
> +  if (f2 (9, 9) != 0UL || cnt != 3) abort ();
> +  if (f2 (9, 0) != 9UL || cnt != 3) abort ();
> +  if (f3 (15, 14) != 1 || cnt != 3) abort ();
> +  if (f3 (15, 25) != (unsigned short) -10 || cnt != 4) abort ();
> +  if (f3 (15, 15) != 0 || cnt != 4) abort ();
> +  if (f3 (15, 0) != 15 || cnt != 5) abort ();
> +  if (f4 (9132, 9127) != 5ULL || cnt != 6) abort ();
> +  if (f4 (9132, 9137) != -5ULL || cnt != 6) abort ();
> +  if (f4 (9132, 9132) != 0 || cnt != 7) abort ();
> +  if (f4 (9132, 0) != 9132ULL || cnt != 7) abort ();
> +  if (f5 (5, 3) != 2U || cnt != 7) abort ();
> +  if (f5 (5, 7) != -2U || cnt != 8) abort ();
> +  if (f5 (5, 5) != 0U || cnt != 8) abort ();
> +  if (f5 (5, 0) != 5U || cnt != 9) abort ();
> +  if (f6 (7, 1) != 6UL || cnt != 9) abort ();
> +  if (f6 (7, 8) != -1UL || cnt != 9) abort ();
> +  if (f6 (9, 9) != 0UL || cnt != 10) abort ();
> +  if (f6 (9, 0) != 9UL || cnt != 10) abort ();
> +  if (f7 (15, 14) != 1 || cnt != 10) abort ();
> +  if (f7 (15, 25) != (unsigned short) -10 || cnt != 11) abort ();
> +  if (f7 (15, 15) != 0 || cnt != 11) abort ();
> +  if (f7 (15, 0) != 15 || cnt != 12) abort ();
> +  if (f8 (9132, 9127) != 5ULL || cnt != 13) abort ();
> +  if (f8 (9132, 9137) != -5ULL || cnt != 13) abort ();
> +  if (f8 (9132, 9132) != 0 || cnt != 14) abort ();
> +  if (f8 (9132, 0) != 9132ULL || cnt != 14) abort ();
> +  cnt = 0;
> +  if (f9 (5, 3) != 2U || cnt != 0) abort ();
> +  if (f9 (5, 7) != -2U || cnt != 1) abort ();
> +  if (f9 (5, 5) != 0U || cnt != 1) abort ();
> +  if (f9 (5, 0) != 5U || cnt != 2) abort ();
> +  if (f10 (7, 1) != 6UL || cnt != 2) abort ();
> +  if (f10 (7, 8) != -1UL || cnt != 2) abort ();
> +  if (f10 (9, 9) != 0UL || cnt != 3) abort ();
> +  if (f10 (9, 0) != 9UL || cnt != 3) abort ();
> +  if (f11 (15, 14) != 1 || cnt != 3) abort ();
> +  if (f11 (15, 25) != (unsigned char) -10 || cnt != 4) abort ();
> +  if (f11 (15, 15) != 0 || cnt != 4) abort ();
> +  if (f11 (15, 0) != 15 || cnt != 5) abort ();
> +  if (f12 (9132, 9127) != 5ULL || cnt != 6) abort ();
> +  if (f12 (9132, 9137) != -5ULL || cnt != 6) abort ();
> +  if (f12 (9132, 9132) != 0 || cnt != 7) abort ();
> +  if (f12 (9132, 0) != 9132ULL || cnt != 7) abort ();
> +  if (f13 (5, 3) != 2U || cnt != 7) abort ();
> +  if (f13 (5, 7) != -2U || cnt != 8) abort ();
> +  if (f13 (5, 5) != 0U || cnt != 8) abort ();
> +  if (f13 (5, 0) != 5U || cnt != 9) abort ();
> +  if (f14 (7, 1) != 6UL || cnt != 9) abort ();
> +  if (f14 (7, 8) != -1UL || cnt != 9) abort ();
> +  if (f14 (9, 9) != 0UL || cnt != 10) abort ();
> +  if (f14 (9, 0) != 9UL || cnt != 10) abort ();
> +  if (f15 (15, 14) != 1 || cnt != 10) abort ();
> +  if (f15 (15, 25) != (unsigned short) -10 || cnt != 11) abort ();
> +  if (f15 (15, 15) != 0 || cnt != 11) abort ();
> +  if (f15 (15, 0) != 15 || cnt != 12) abort ();
> +  if (f16 (9132, 9127) != 5ULL || cnt != 13) abort ();
> +  if (f16 (9132, 9137) != -5ULL || cnt != 13) abort ();
> +  if (f16 (9132, 9132) != 0 || cnt != 14) abort ();
> +  if (f16 (9132, 0) != 9132ULL || cnt != 14) abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr67089-3.c.jj	2015-11-24 18:28:05.788530792 +0100
> +++ gcc/testsuite/gcc.dg/pr67089-3.c	2015-11-24 19:03:48.375183177 +0100
> @@ -0,0 +1,112 @@
> +/* PR target/67089 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +extern void abort (void);
> +
> +int cnt, d;
> +
> +__attribute__((noinline, noclone))
> +void foo (int x)
> +{
> +  asm volatile ("" : "+m" (d) : "g" (x) : "memory");
> +  cnt++;
> +}
> +
> +#define T(n, type, op, cond) \
> +__attribute__((noinline, noclone))	\
> +type					\
> +f##n (type x, type y)			\
> +{					\
> +  type r = op;				\
> +  cond;					\
> +  return r;				\
> +}
> +
> +T (1, unsigned int, x + y, if (r > x) foo (0))
> +T (2, unsigned long, x + y, if (r <= x) foo (0))
> +T (3, unsigned short, x + y, if (x < r) foo (r))
> +T (4, unsigned long long, x + y, if (x >= r) foo (0))
> +T (5, unsigned int, x + y, if (r >= x) foo (0))
> +T (6, unsigned long, x + y, if (r < x) foo (0))
> +T (7, unsigned short, x + y, if (x <= r) foo (r))
> +T (8, unsigned long long, x + y, if (d || x > r) foo (0))
> +T (9, unsigned int, x + y, if (d || r > x) foo (0))
> +T (10, unsigned long, x + y, if (d || r <= x) foo (0))
> +T (11, unsigned char, x + y, if (d || x < r) foo (0))
> +T (12, unsigned long long, x + y, if (d || x >= r) foo (0))
> +T (13, unsigned int, x + y, if (d || r >= x) foo (0))
> +T (14, unsigned long, x + y, if (d || r < x) foo (0))
> +T (15, unsigned short, x + y, if (d || x <= r) foo (0))
> +T (16, unsigned long long, x + y, if (d || x > r) foo (0))
> +
> +int
> +main ()
> +{
> +  if (f1 (-7U, 0) != -7U || cnt != 0) abort ();
> +  if (f1 (-7U, 6) != -1U || cnt != 1) abort ();
> +  if (f1 (-7U, 7) != 0U || cnt != 1) abort ();
> +  if (f1 (-7U, 8) != 1U || cnt != 1) abort ();
> +  if (f2 (-9UL, 0) != -9UL || cnt != 2) abort ();
> +  if (f2 (-9UL, 8) != -1UL || cnt != 2) abort ();
> +  if (f2 (-9UL, 9) != 0UL || cnt != 3) abort ();
> +  if (f2 (-9UL, 10) != 1UL || cnt != 4) abort ();
> +  if (f3 (-15, 0) != (unsigned short) -15 || cnt != 4) abort ();
> +  if (f3 (-15, 14) != (unsigned short) -1 || cnt != 5) abort ();
> +  if (f3 (-15, 15) != 0 || cnt != 5) abort ();
> +  if (f3 (-15, 16) != 1 || cnt != 5) abort ();
> +  if (f4 (-9132ULL, 0) != -9132ULL || cnt != 6) abort ();
> +  if (f4 (-9132ULL, 9131) != -1ULL || cnt != 6) abort ();
> +  if (f4 (-9132ULL, 9132) != 0 || cnt != 7) abort ();
> +  if (f4 (-9132ULL, 9133) != 1ULL || cnt != 8) abort ();
> +  if (f5 (-7U, 0) != -7U || cnt != 9) abort ();
> +  if (f5 (-7U, 6) != -1U || cnt != 10) abort ();
> +  if (f5 (-7U, 7) != 0U || cnt != 10) abort ();
> +  if (f5 (-7U, 8) != 1U || cnt != 10) abort ();
> +  if (f6 (-9UL, 0) != -9UL || cnt != 10) abort ();
> +  if (f6 (-9UL, 8) != -1UL || cnt != 10) abort ();
> +  if (f6 (-9UL, 9) != 0UL || cnt != 11) abort ();
> +  if (f6 (-9UL, 10) != 1UL || cnt != 12) abort ();
> +  if (f7 (-15, 0) != (unsigned short) -15 || cnt != 13) abort ();
> +  if (f7 (-15, 14) != (unsigned short) -1 || cnt != 14) abort ();
> +  if (f7 (-15, 15) != 0 || cnt != 14) abort ();
> +  if (f7 (-15, 16) != 1 || cnt != 14) abort ();
> +  if (f8 (-9132ULL, 0) != -9132ULL || cnt != 14) abort ();
> +  if (f8 (-9132ULL, 9131) != -1ULL || cnt != 14) abort ();
> +  if (f8 (-9132ULL, 9132) != 0 || cnt != 15) abort ();
> +  if (f8 (-9132ULL, 9133) != 1ULL || cnt != 16) abort ();
> +  cnt = 0;
> +  if (f9 (-7U, 0) != -7U || cnt != 0) abort ();
> +  if (f9 (-7U, 6) != -1U || cnt != 1) abort ();
> +  if (f9 (-7U, 7) != 0U || cnt != 1) abort ();
> +  if (f9 (-7U, 8) != 1U || cnt != 1) abort ();
> +  if (f10 (-9UL, 0) != -9UL || cnt != 2) abort ();
> +  if (f10 (-9UL, 8) != -1UL || cnt != 2) abort ();
> +  if (f10 (-9UL, 9) != 0UL || cnt != 3) abort ();
> +  if (f10 (-9UL, 10) != 1UL || cnt != 4) abort ();
> +  if (f11 (-15, 0) != (unsigned char) -15 || cnt != 4) abort ();
> +  if (f11 (-15, 14) != (unsigned char) -1 || cnt != 5) abort ();
> +  if (f11 (-15, 15) != 0 || cnt != 5) abort ();
> +  if (f11 (-15, 16) != 1 || cnt != 5) abort ();
> +  if (f12 (-9132ULL, 0) != -9132ULL || cnt != 6) abort ();
> +  if (f12 (-9132ULL, 9131) != -1ULL || cnt != 6) abort ();
> +  if (f12 (-9132ULL, 9132) != 0 || cnt != 7) abort ();
> +  if (f12 (-9132ULL, 9133) != 1ULL || cnt != 8) abort ();
> +  if (f13 (-7U, 0) != -7U || cnt != 9) abort ();
> +  if (f13 (-7U, 6) != -1U || cnt != 10) abort ();
> +  if (f13 (-7U, 7) != 0U || cnt != 10) abort ();
> +  if (f13 (-7U, 8) != 1U || cnt != 10) abort ();
> +  if (f14 (-9UL, 0) != -9UL || cnt != 10) abort ();
> +  if (f14 (-9UL, 8) != -1UL || cnt != 10) abort ();
> +  if (f14 (-9UL, 9) != 0UL || cnt != 11) abort ();
> +  if (f14 (-9UL, 10) != 1UL || cnt != 12) abort ();
> +  if (f15 (-15, 0) != (unsigned short) -15 || cnt != 13) abort ();
> +  if (f15 (-15, 14) != (unsigned short) -1 || cnt != 14) abort ();
> +  if (f15 (-15, 15) != 0 || cnt != 14) abort ();
> +  if (f15 (-15, 16) != 1 || cnt != 14) abort ();
> +  if (f16 (-9132ULL, 0) != -9132ULL || cnt != 14) abort ();
> +  if (f16 (-9132ULL, 9131) != -1ULL || cnt != 14) abort ();
> +  if (f16 (-9132ULL, 9132) != 0 || cnt != 15) abort ();
> +  if (f16 (-9132ULL, 9133) != 1ULL || cnt != 16) abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr67089-4.c.jj	2015-11-24 18:42:04.482600934 +0100
> +++ gcc/testsuite/gcc.dg/pr67089-4.c	2015-11-24 19:15:19.155412082 +0100
> @@ -0,0 +1,112 @@
> +/* PR target/67089 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +extern void abort (void);
> +
> +int cnt, d;
> +
> +__attribute__((noinline, noclone))
> +void foo (int x)
> +{
> +  asm volatile ("" : "+m" (d) : "g" (x) : "memory");
> +  cnt++;
> +}
> +
> +#define T(n, type, op, cond) \
> +__attribute__((noinline, noclone))	\
> +type					\
> +f##n (type x, type y)			\
> +{					\
> +  type r = op;				\
> +  cond;					\
> +  return r;				\
> +}
> +
> +T (1, unsigned int, x + y, if (r > y) foo (0))
> +T (2, unsigned long, x + y, if (r <= y) foo (0))
> +T (3, unsigned short, x + y, if (y < r) foo (r))
> +T (4, unsigned long long, x + y, if (y >= r) foo (0))
> +T (5, unsigned int, x + y, if (r >= y) foo (0))
> +T (6, unsigned long, x + y, if (r < y) foo (0))
> +T (7, unsigned short, x + y, if (y <= r) foo (r))
> +T (8, unsigned long long, x + y, if (d || y > r) foo (0))
> +T (9, unsigned int, x + y, if (d || r > y) foo (0))
> +T (10, unsigned long, x + y, if (d || r <= y) foo (0))
> +T (11, unsigned char, x + y, if (d || y < r) foo (0))
> +T (12, unsigned long long, x + y, if (d || y >= r) foo (0))
> +T (13, unsigned int, x + y, if (d || r >= y) foo (0))
> +T (14, unsigned long, x + y, if (d || r < y) foo (0))
> +T (15, unsigned short, x + y, if (d || y <= r) foo (0))
> +T (16, unsigned long long, x + y, if (d || y > r) foo (0))
> +
> +int
> +main ()
> +{
> +  if (f1 (-7U, 0) != -7U || cnt != 1) abort ();
> +  if (f1 (-7U, 6) != -1U || cnt != 2) abort ();
> +  if (f1 (-7U, 7) != 0U || cnt != 2) abort ();
> +  if (f1 (-7U, 8) != 1U || cnt != 2) abort ();
> +  if (f2 (-9UL, 0) != -9UL || cnt != 2) abort ();
> +  if (f2 (-9UL, 8) != -1UL || cnt != 2) abort ();
> +  if (f2 (-9UL, 9) != 0UL || cnt != 3) abort ();
> +  if (f2 (-9UL, 10) != 1UL || cnt != 4) abort ();
> +  if (f3 (-15, 0) != (unsigned short) -15 || cnt != 5) abort ();
> +  if (f3 (-15, 14) != (unsigned short) -1 || cnt != 6) abort ();
> +  if (f3 (-15, 15) != 0 || cnt != 6) abort ();
> +  if (f3 (-15, 16) != 1 || cnt != 6) abort ();
> +  if (f4 (-9132ULL, 0) != -9132ULL || cnt != 6) abort ();
> +  if (f4 (-9132ULL, 9131) != -1ULL || cnt != 6) abort ();
> +  if (f4 (-9132ULL, 9132) != 0 || cnt != 7) abort ();
> +  if (f4 (-9132ULL, 9133) != 1ULL || cnt != 8) abort ();
> +  if (f5 (-7U, 0) != -7U || cnt != 9) abort ();
> +  if (f5 (-7U, 6) != -1U || cnt != 10) abort ();
> +  if (f5 (-7U, 7) != 0U || cnt != 10) abort ();
> +  if (f5 (-7U, 8) != 1U || cnt != 10) abort ();
> +  if (f6 (-9UL, 0) != -9UL || cnt != 10) abort ();
> +  if (f6 (-9UL, 8) != -1UL || cnt != 10) abort ();
> +  if (f6 (-9UL, 9) != 0UL || cnt != 11) abort ();
> +  if (f6 (-9UL, 10) != 1UL || cnt != 12) abort ();
> +  if (f7 (-15, 0) != (unsigned short) -15 || cnt != 13) abort ();
> +  if (f7 (-15, 14) != (unsigned short) -1 || cnt != 14) abort ();
> +  if (f7 (-15, 15) != 0 || cnt != 14) abort ();
> +  if (f7 (-15, 16) != 1 || cnt != 14) abort ();
> +  if (f8 (-9132ULL, 0) != -9132ULL || cnt != 14) abort ();
> +  if (f8 (-9132ULL, 9131) != -1ULL || cnt != 14) abort ();
> +  if (f8 (-9132ULL, 9132) != 0 || cnt != 15) abort ();
> +  if (f8 (-9132ULL, 9133) != 1ULL || cnt != 16) abort ();
> +  cnt = 0;
> +  if (f9 (-7U, 0) != -7U || cnt != 1) abort ();
> +  if (f9 (-7U, 6) != -1U || cnt != 2) abort ();
> +  if (f9 (-7U, 7) != 0U || cnt != 2) abort ();
> +  if (f9 (-7U, 8) != 1U || cnt != 2) abort ();
> +  if (f10 (-9UL, 0) != -9UL || cnt != 2) abort ();
> +  if (f10 (-9UL, 8) != -1UL || cnt != 2) abort ();
> +  if (f10 (-9UL, 9) != 0UL || cnt != 3) abort ();
> +  if (f10 (-9UL, 10) != 1UL || cnt != 4) abort ();
> +  if (f11 (-15, 0) != (unsigned char) -15 || cnt != 5) abort ();
> +  if (f11 (-15, 14) != (unsigned char) -1 || cnt != 6) abort ();
> +  if (f11 (-15, 15) != 0 || cnt != 6) abort ();
> +  if (f11 (-15, 16) != 1 || cnt != 6) abort ();
> +  if (f12 (-9132ULL, 0) != -9132ULL || cnt != 6) abort ();
> +  if (f12 (-9132ULL, 9131) != -1ULL || cnt != 6) abort ();
> +  if (f12 (-9132ULL, 9132) != 0 || cnt != 7) abort ();
> +  if (f12 (-9132ULL, 9133) != 1ULL || cnt != 8) abort ();
> +  if (f13 (-7U, 0) != -7U || cnt != 9) abort ();
> +  if (f13 (-7U, 6) != -1U || cnt != 10) abort ();
> +  if (f13 (-7U, 7) != 0U || cnt != 10) abort ();
> +  if (f13 (-7U, 8) != 1U || cnt != 10) abort ();
> +  if (f14 (-9UL, 0) != -9UL || cnt != 10) abort ();
> +  if (f14 (-9UL, 8) != -1UL || cnt != 10) abort ();
> +  if (f14 (-9UL, 9) != 0UL || cnt != 11) abort ();
> +  if (f14 (-9UL, 10) != 1UL || cnt != 12) abort ();
> +  if (f15 (-15, 0) != (unsigned short) -15 || cnt != 13) abort ();
> +  if (f15 (-15, 14) != (unsigned short) -1 || cnt != 14) abort ();
> +  if (f15 (-15, 15) != 0 || cnt != 14) abort ();
> +  if (f15 (-15, 16) != 1 || cnt != 14) abort ();
> +  if (f16 (-9132ULL, 0) != -9132ULL || cnt != 14) abort ();
> +  if (f16 (-9132ULL, 9131) != -1ULL || cnt != 14) abort ();
> +  if (f16 (-9132ULL, 9132) != 0 || cnt != 15) abort ();
> +  if (f16 (-9132ULL, 9133) != 1ULL || cnt != 16) abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr67089-5.c.jj	2015-11-24 19:03:19.571594157 +0100
> +++ gcc/testsuite/gcc.dg/pr67089-5.c	2015-11-24 19:31:29.707645365 +0100
> @@ -0,0 +1,82 @@
> +/* PR target/67089 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -ftree-loop-if-convert" } */
> +
> +extern void abort (void);
> +
> +int cnt;
> +unsigned int a[16], b[16], c[16];
> +
> +__attribute__((noinline, noclone))
> +void foo (int x)
> +{
> +  asm volatile ("" : : "g" (x) : "memory");
> +  cnt++;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f0 (unsigned int x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    {
> +      unsigned int r = x - a[i];
> +      b[i] = r;
> +      c[i] = r > x ? 7 : x;
> +    }
> +}
> +
> +#define T(n, type, op, cond) \
> +__attribute__((noinline, noclone))	\
> +type					\
> +f##n (type x)				\
> +{					\
> +  type r = op;				\
> +  cond;					\
> +  return r;				\
> +}
> +
> +T (1, unsigned int, x - 2U, if (r > x) foo (0))
> +T (2, unsigned long, x - 2U, if (r <= x) foo (0))
> +T (3, unsigned short, 2U - x, if (r > 2U) foo (0))
> +T (4, unsigned char, 2U - x, if (r <= 2U) foo (0))
> +T (5, unsigned int, x + -2U, if (r > x) foo (0))
> +T (6, unsigned long, x + -2UL, if (r <= x) foo (0))
> +T (7, unsigned short, (unsigned short) -2 + x, if (r > (unsigned short) -2) foo (0))
> +T (8, unsigned char, (unsigned char) -2 + x, if (r <= (unsigned char) -2) foo (0))
> +
> +int
> +main ()
> +{
> +  int i;
> +  for (i = 0; i < 16; i++)
> +    a[i] = i - 7;
> +  f0 (5);
> +  for (i = 0; i < 16; i++)
> +    if (b[i] != 12U - i || c[i] != 7 - 2 * (i >= 7 && i < 13))
> +      abort ();
> +  if (f1 (3) != 1 || cnt != 0) abort ();
> +  if (f1 (2) != 0 || cnt != 0) abort ();
> +  if (f1 (1) != -1U || cnt != 1) abort ();
> +  if (f2 (3) != 1 || cnt != 2) abort ();
> +  if (f2 (2) != 0 || cnt != 3) abort ();
> +  if (f2 (1) != -1UL || cnt != 3) abort ();
> +  if (f3 (3) != (unsigned short) -1 || cnt != 4) abort ();
> +  if (f3 (2) != 0 || cnt != 4) abort ();
> +  if (f3 (1) != 1 || cnt != 4) abort ();
> +  if (f4 (3) != (unsigned char) -1 || cnt != 4) abort ();
> +  if (f4 (2) != 0 || cnt != 5) abort ();
> +  if (f4 (1) != 1 || cnt != 6) abort ();
> +  if (f5 (3) != 1 || cnt != 6) abort ();
> +  if (f5 (2) != 0 || cnt != 6) abort ();
> +  if (f5 (1) != -1U || cnt != 7) abort ();
> +  if (f6 (3) != 1 || cnt != 8) abort ();
> +  if (f6 (2) != 0 || cnt != 9) abort ();
> +  if (f6 (1) != -1UL || cnt != 9) abort ();
> +  if (f7 (3) != 1 || cnt != 9) abort ();
> +  if (f7 (2) != 0 || cnt != 9) abort ();
> +  if (f7 (1) != (unsigned short) -1 || cnt != 10) abort ();
> +  if (f8 (3) != 1 || cnt != 11) abort ();
> +  if (f8 (2) != 0 || cnt != 12) abort ();
> +  if (f8 (1) != (unsigned char) -1 || cnt != 12) abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr67089-6.c.jj	2015-11-24 19:16:02.898794422 +0100
> +++ gcc/testsuite/gcc.dg/pr67089-6.c	2015-11-24 19:32:09.928077054 +0100
> @@ -0,0 +1,62 @@
> +/* PR target/67089 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-loop-if-convert -fdump-tree-widening_mul" } */
> +
> +extern void abort (void);
> +
> +int cnt;
> +unsigned int a[16], b[16], c[16], d;
> +void foo (int x);
> +
> +__attribute__((noinline, noclone)) void
> +f0 (unsigned int x)
> +{
> +  for (int i = 0; i < 16; i++)
> +    {
> +      unsigned int r = x - a[i];
> +      b[i] = r;
> +      c[i] = r > x ? 7 : x;
> +    }
> +}
> +
> +#define T(n, type, op, cond) \
> +__attribute__((noinline, noclone))	\
> +type					\
> +f##n (type x, type y)			\
> +{					\
> +  type r = op;				\
> +  cond;					\
> +  return r;				\
> +}
> +
> +T (1, unsigned int, x - y, if (r > x) foo (0))
> +T (2, unsigned long, x - y, if (r <= x) foo (0))
> +T (3, unsigned short, x - y, if (x < r) foo (r))
> +T (4, unsigned long long, x - y, if (x >= r) foo (0))
> +T (5, unsigned int, x - y, if (d || r > x) foo (0))
> +T (6, unsigned long, x - y, if (d || r <= x) foo (0))
> +T (7, unsigned char, x - y, if (d || x < r) foo (0))
> +T (8, unsigned long long, x - y, if (d || x >= r) foo (0))
> +T (9, unsigned int, x + y, if (r >= x) foo (0))
> +T (10, unsigned long, x + y, if (r < x) foo (0))
> +T (11, unsigned short, x + y, if (x <= r) foo (r))
> +T (12, unsigned long long, x + y, if (d || x > r) foo (0))
> +T (13, unsigned int, x + y, if (d || r >= x) foo (0))
> +T (14, unsigned long, x + y, if (d || r < x) foo (0))
> +T (15, unsigned short, x + y, if (d || x <= r) foo (0))
> +T (16, unsigned long long, x + y, if (d || x > r) foo (0))
> +T (17, unsigned int, x + y, if (r >= y) foo (0))
> +T (18, unsigned long, x + y, if (r < y) foo (0))
> +T (19, unsigned short, x + y, if (y <= r) foo (r))
> +T (20, unsigned long long, x + y, if (d || y > r) foo (0))
> +T (21, unsigned int, x + y, if (d || r >= y) foo (0))
> +T (22, unsigned long, x + y, if (d || r < y) foo (0))
> +T (23, unsigned short, x + y, if (d || y <= r) foo (0))
> +T (24, unsigned long long, x + y, if (d || y > r) foo (0))
> +T (25, unsigned short, 2U - x, if (r > 2U) foo (0))
> +T (26, unsigned char, 2U - x, if (r <= 2U) foo (0))
> +
> +/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 16 "widening_mul" { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */
> +/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 11 "widening_mul" { target { { i?86-*-* x86_64-*-* } && { ! ia32 } } } } } */
> +/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 12 "widening_mul" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
> +/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 9 "widening_mul" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
> --- gcc/testsuite/gcc.dg/pr67089-7.c.jj	2015-11-24 19:32:30.332788737 +0100
> +++ gcc/testsuite/gcc.dg/pr67089-7.c	2015-11-24 19:37:12.304804499 +0100
> @@ -0,0 +1,62 @@
> +/* PR target/67089 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-loop-if-convert -fdump-tree-widening_mul" } */
> +
> +extern void abort (void);
> +
> +int cnt, d;
> +void foo (int x);
> +
> +#define T(n, type, op, cond) \
> +__attribute__((noinline, noclone))	\
> +type					\
> +f##n (type x, type y)			\
> +{					\
> +  type r = op;				\
> +  cond;					\
> +  return r;				\
> +}
> +
> +T (1, unsigned int, x - y, if (r >= x) foo (0))
> +T (2, unsigned long, x - y, if (r < x) foo (0))
> +T (3, unsigned short, x - y, if (x <= r) foo (r))
> +T (4, unsigned long long, x - y, if (d || x > r) foo (0))
> +T (5, unsigned int, x - y, if (d || r >= x) foo (0))
> +T (6, unsigned long, x - y, if (d || r < x) foo (0))
> +T (7, unsigned short, x - y, if (d || x <= r) foo (0))
> +T (8, unsigned long long, x - y, if (d || x > r) foo (0))
> +T (9, unsigned int, x - y, if (r > y) foo (0))
> +T (10, unsigned long, x - y, if (r <= y) foo (0))
> +T (11, unsigned short, x - y, if (y < r) foo (r))
> +T (12, unsigned long long, x - y, if (y >= r) foo (0))
> +T (13, unsigned int, x - y, if (r >= y) foo (0))
> +T (14, unsigned long, x - y, if (r < y) foo (0))
> +T (15, unsigned short, x - y, if (y <= r) foo (r))
> +T (16, unsigned long long, x - y, if (d || y > r) foo (0))
> +T (17, unsigned int, x - y, if (d || r > y) foo (0))
> +T (18, unsigned long, x - y, if (d || r <= y) foo (0))
> +T (19, unsigned char, x - y, if (d || y < r) foo (0))
> +T (20, unsigned long long, x - y, if (d || y >= r) foo (0))
> +T (21, unsigned int, x - y, if (d || r >= y) foo (0))
> +T (22, unsigned long, x - y, if (d || r < y) foo (0))
> +T (23, unsigned short, x - y, if (d || y <= r) foo (0))
> +T (24, unsigned long long, x - y, if (d || y > r) foo (0))
> +T (25, unsigned int, x + y, if (r > x) foo (0))
> +T (26, unsigned long, x + y, if (r <= x) foo (0))
> +T (27, unsigned short, x + y, if (x < r) foo (r))
> +T (28, unsigned long long, x + y, if (x >= r) foo (0))
> +T (29, unsigned int, x + y, if (d || r > x) foo (0))
> +T (30, unsigned long, x + y, if (d || r <= x) foo (0))
> +T (31, unsigned char, x + y, if (d || x < r) foo (0))
> +T (32, unsigned long long, x + y, if (d || x >= r) foo (0))
> +T (33, unsigned int, x + y, if (r > y) foo (0))
> +T (34, unsigned long, x + y, if (r <= y) foo (0))
> +T (35, unsigned short, x + y, if (y < r) foo (r))
> +T (36, unsigned long long, x + y, if (y >= r) foo (0))
> +T (37, unsigned int, x + y, if (d || r > y) foo (0))
> +T (38, unsigned long, x + y, if (d || r <= y) foo (0))
> +T (39, unsigned char, x + y, if (d || y < r) foo (0))
> +T (40, unsigned long long, x + y, if (d || y >= r) foo (0))
> +
> +/* { dg-final { scan-tree-dump-not "ADD_OVERFLOW" "widening_mul" } } */
> +/* { dg-final { scan-tree-dump-not "SUB_OVERFLOW" "widening_mul" } } */
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-25  8:40   ` Jakub Jelinek
@ 2015-11-25  8:48     ` Richard Biener
  2015-11-25  8:59     ` Marc Glisse
  1 sibling, 0 replies; 13+ messages in thread
From: Richard Biener @ 2015-11-25  8:48 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Richard Henderson

On Wed, 25 Nov 2015, Jakub Jelinek wrote:

> On Wed, Nov 25, 2015 at 08:56:45AM +0100, Marc Glisse wrote:
> > >This is the GIMPLE side of Richard's i?86 uadd/usub overflow
> > >testing improvements.  If unsigned addition or subtraction
> > >result is used both normally and in a GIMPLE_COND/COND_EXPR/tcc_comparison
> > >that tests if unsigned overflow happened, the patch replaces it shortly
> > >before expansion with {ADD,SUB}_OVERFLOW, so that RTL expansion can generate
> > >better code on it.
> > 
> > If I test a+b<a and don't use a+b anywhere else, don't we also want to use
> > the OVERFLOW things so we can expand to test the carry flag? That is, I am
> > not convinced we want to punt on has_single_use for add_overflow. For
> > sub_overflow with a single use of y-z, I guess y-z>y should become z>y, and
> > going through a rewrite with sub_overflow neither helps nor hinders that.
> > Actually, writing z>y is something the user is not unlikely to have done
> > himself, and walking through the uses of y or z should not be hard, so I
> > guess it could make sense to rewrite y-z>y to z>y always in match.pd and
> > only look for the second form in math-opts.
> 
> Incremental diff for also handling the single use case if it is overflow
> check is below.  But we already generate good code without it for the
> x+y<x or x+y<y cases (and they aren't really problematic, as they are single
> use), and while it is true that for x-y>x case the incremental patch below
> improves the generated code right now, as you said it is better to rewrite
> those as y>x and as it is a single use, it is easier to do it in match.pd.
> So, I'd prefer to add that transformation and not use {ADD,SUB}_OVERFLOW
> for those cases, because we get good enough code without increasing the IL
> size, eating more memory etc.
> 
> > I was thinking more match.pd to transform a+b<a and sccvn to somehow CSE a+b
> > with add_overflow(a,b), but your patch seems to work well with simpler code,
> > that's cool :-)

Yeah, I think the current patch aids RTL expansion best.

> > And it shouldn't be too hard to add a few more later, to detect widening
> > operations that are only used for overflow testing, although the form of
> > such tests is much less universal among users.

> --- gcc/tree-ssa-math-opts.c.jj	2015-11-24 17:00:10.000000000 +0100
> +++ gcc/tree-ssa-math-opts.c	2015-11-25 09:25:31.781087597 +0100
> @@ -3586,7 +3586,6 @@ match_uaddsub_overflow (gimple_stmt_iter
>    tree type = TREE_TYPE (lhs);
>    use_operand_p use_p;
>    imm_use_iterator iter;
> -  bool use_seen = false;
>    bool ovf_use_seen = false;
>    gimple *use_stmt;
>  
> @@ -3594,7 +3593,6 @@ match_uaddsub_overflow (gimple_stmt_iter
>    if (!INTEGRAL_TYPE_P (type)
>        || !TYPE_UNSIGNED (type)
>        || has_zero_uses (lhs)
> -      || has_single_use (lhs)
>        || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
>  			TYPE_MODE (type)) == CODE_FOR_nothing)
>      return false;
> @@ -3606,14 +3604,13 @@ match_uaddsub_overflow (gimple_stmt_iter
>  	continue;
>  
>        if (uaddsub_overflow_check_p (stmt, use_stmt))
> -	ovf_use_seen = true;
> -      else
> -	use_seen = true;
> -      if (ovf_use_seen && use_seen)
> -	break;
> +	{
> +	  ovf_use_seen = true;
> +	  break;
> +	}
>      }
>  
> -  if (!ovf_use_seen || !use_seen)
> +  if (!ovf_use_seen)
>      return false;
>  
>    tree ctype = build_complex_type (type);

Ok with me as well.

Thanks,
Richard.

> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-25  8:40   ` Jakub Jelinek
  2015-11-25  8:48     ` Richard Biener
@ 2015-11-25  8:59     ` Marc Glisse
  2015-11-25  9:08       ` Jakub Jelinek
  1 sibling, 1 reply; 13+ messages in thread
From: Marc Glisse @ 2015-11-25  8:59 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Richard Biener, Richard Henderson

On Wed, 25 Nov 2015, Jakub Jelinek wrote:

> On Wed, Nov 25, 2015 at 08:56:45AM +0100, Marc Glisse wrote:
>>> This is the GIMPLE side of Richard's i?86 uadd/usub overflow
>>> testing improvements.  If unsigned addition or subtraction
>>> result is used both normally and in a GIMPLE_COND/COND_EXPR/tcc_comparison
>>> that tests if unsigned overflow happened, the patch replaces it shortly
>>> before expansion with {ADD,SUB}_OVERFLOW, so that RTL expansion can generate
>>> better code on it.
>>
>> If I test a+b<a and don't use a+b anywhere else, don't we also want to use
>> the OVERFLOW things so we can expand to test the carry flag? That is, I am
>> not convinced we want to punt on has_single_use for add_overflow. For
>> sub_overflow with a single use of y-z, I guess y-z>y should become z>y, and
>> going through a rewrite with sub_overflow neither helps nor hinders that.
>> Actually, writing z>y is something the user is not unlikely to have done
>> himself, and walking through the uses of y or z should not be hard, so I
>> guess it could make sense to rewrite y-z>y to z>y always in match.pd and
>> only look for the second form in math-opts.
>
> Incremental diff for also handling the single use case if it is overflow
> check is below.  But we already generate good code without it for the
> x+y<x or x+y<y cases (and they aren't really problematic, as they are single
> use), and while it is true that for x-y>x case the incremental patch below
> improves the generated code right now, as you said it is better to rewrite
> those as y>x and as it is a single use, it is easier to do it in match.pd.
> So, I'd prefer to add that transformation and not use {ADD,SUB}_OVERFLOW
> for those cases, because we get good enough code without increasing the IL
> size, eating more memory etc.

I guess it got lost in my text, but if a user writes:

unsigned diff = a - b;
if (b > a) { /* overflow */ ... }
else { ... }

Your patch will not detect it. It seems that replacing x-y>x with y>x 
could be done whether it is single use or not, and we could look for the 
pattern above instead of the one you currently have for sub_overflow. The 
main change would be that now it isn't obvious where to insert the 
sub_overflow, since one operation doesn't obviously dominate the other :-(

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-25  8:59     ` Marc Glisse
@ 2015-11-25  9:08       ` Jakub Jelinek
  2015-11-25  9:12         ` Richard Biener
  2015-11-25 11:23         ` Marc Glisse
  0 siblings, 2 replies; 13+ messages in thread
From: Jakub Jelinek @ 2015-11-25  9:08 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener, Richard Henderson

On Wed, Nov 25, 2015 at 09:58:15AM +0100, Marc Glisse wrote:
> I guess it got lost in my text, but if a user writes:
> 
> unsigned diff = a - b;
> if (b > a) { /* overflow */ ... }
> else { ... }
> 
> Your patch will not detect it. It seems that replacing x-y>x with y>x could

Sorry, already committed the patch (without incremental that hasn't been
tested anyway).
It is true that the patch does not detect this, but it is harder that way.
What if it is
if (b > a) ...
// Huge amount of code
r = a - b;
?  Trying to emit the subtraction above the comparison would then very
likely increase register preassure.  So, I'd really prefer doing x-y>x to y>x
only for single use.

	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-25  9:08       ` Jakub Jelinek
@ 2015-11-25  9:12         ` Richard Biener
  2015-11-25 11:23         ` Marc Glisse
  1 sibling, 0 replies; 13+ messages in thread
From: Richard Biener @ 2015-11-25  9:12 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Richard Henderson

On Wed, 25 Nov 2015, Jakub Jelinek wrote:

> On Wed, Nov 25, 2015 at 09:58:15AM +0100, Marc Glisse wrote:
> > I guess it got lost in my text, but if a user writes:
> > 
> > unsigned diff = a - b;
> > if (b > a) { /* overflow */ ... }
> > else { ... }
> > 
> > Your patch will not detect it. It seems that replacing x-y>x with y>x could
> 
> Sorry, already committed the patch (without incremental that hasn't been
> tested anyway).
> It is true that the patch does not detect this, but it is harder that way.
> What if it is
> if (b > a) ...
> // Huge amount of code
> r = a - b;
> ?  Trying to emit the subtraction above the comparison would then very
> likely increase register preassure.  So, I'd really prefer doing x-y>x to y>x
> only for single use.

I think that's ok for now.  For the above case you'd need to do sth
similar to what cse sincos / divmod do, with an added "cost" check
(same basic-block?).

Richard.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-25  9:08       ` Jakub Jelinek
  2015-11-25  9:12         ` Richard Biener
@ 2015-11-25 11:23         ` Marc Glisse
  2015-11-25 11:29           ` Jakub Jelinek
  2015-12-04 21:45           ` Marc Glisse
  1 sibling, 2 replies; 13+ messages in thread
From: Marc Glisse @ 2015-11-25 11:23 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Richard Biener, Richard Henderson

On Wed, 25 Nov 2015, Jakub Jelinek wrote:

> On Wed, Nov 25, 2015 at 09:58:15AM +0100, Marc Glisse wrote:
>> I guess it got lost in my text, but if a user writes:
>>
>> unsigned diff = a - b;
>> if (b > a) { /* overflow */ ... }
>> else { ... }
>>
>> Your patch will not detect it. It seems that replacing x-y>x with y>x could
>
> Sorry, already committed the patch (without incremental that hasn't been
> tested anyway).

Sorry, I never meant to imply that your patch was wrong in any way or 
should be blocked, I like it. I was only discussing possible future 
improvements...

> It is true that the patch does not detect this, but it is harder that way.
> What if it is
> if (b > a) ...
> // Huge amount of code
> r = a - b;
> ?  Trying to emit the subtraction above the comparison would then very
> likely increase register preassure.

The same is true whether we write it b > a or (a - b) > a (I don't think 
PRE + SCCVN avoid increasing register pressure).

> So, I'd really prefer doing x-y>x to y>x only for single use.

Ok (for now).

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-25 11:23         ` Marc Glisse
@ 2015-11-25 11:29           ` Jakub Jelinek
  2015-11-25 21:27             ` Marc Glisse
  2015-12-04 21:45           ` Marc Glisse
  1 sibling, 1 reply; 13+ messages in thread
From: Jakub Jelinek @ 2015-11-25 11:29 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener, Richard Henderson

On Wed, Nov 25, 2015 at 12:19:04PM +0100, Marc Glisse wrote:
> On Wed, 25 Nov 2015, Jakub Jelinek wrote:
> 
> >On Wed, Nov 25, 2015 at 09:58:15AM +0100, Marc Glisse wrote:
> >>I guess it got lost in my text, but if a user writes:
> >>
> >>unsigned diff = a - b;
> >>if (b > a) { /* overflow */ ... }
> >>else { ... }
> >>
> >>Your patch will not detect it. It seems that replacing x-y>x with y>x could
> >
> >Sorry, already committed the patch (without incremental that hasn't been
> >tested anyway).
> 
> Sorry, I never meant to imply that your patch was wrong in any way or should
> be blocked, I like it. I was only discussing possible future improvements...

BTW, the primary reason for the patch has been a code quality regression,
and I bet that is only for the case of if (diff > a), otherwise combiner
with the problematic subtraction with overflow patterns wouldn't be able to
find anything.  rth fixed it only for the case where users explicitly use
the new __builtin_*_overflow builtins, and the patch has been trying to fix
the regression even when not written that way.

> The same is true whether we write it b > a or (a - b) > a (I don't think PRE
> + SCCVN avoid increasing register pressure).
> 
> >So, I'd really prefer doing x-y>x to y>x only for single use.
> 
> Ok (for now).

Do you plan to work on that (my match.pd experience is smaller than yours),
or should I add to my todo list?

	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-25 11:29           ` Jakub Jelinek
@ 2015-11-25 21:27             ` Marc Glisse
  2015-11-26  9:14               ` Richard Biener
  0 siblings, 1 reply; 13+ messages in thread
From: Marc Glisse @ 2015-11-25 21:27 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Richard Biener, Richard Henderson

On Wed, 25 Nov 2015, Jakub Jelinek wrote:

>> The same is true whether we write it b > a or (a - b) > a (I don't think PRE
>> + SCCVN avoid increasing register pressure).
>>
>>> So, I'd really prefer doing x-y>x to y>x only for single use.
>>
>> Ok (for now).
>
> Do you plan to work on that (my match.pd experience is smaller than yours),
> or should I add to my todo list?

Are we talking stage 3 or next stage 1? If you want something for stage 3, 
I think you'll have to do it, it shouldn't be much longer than

(for cmp (gt le)
  (simplify
   (cmp (minus:s @0 @1) @0)
   (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
    (cmp @1 @0))))

and a similar one for x<y-x. (I don't think TYPE_UNSIGNED needs protection 
against floats or whatever, but I could be wrong)

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-25 21:27             ` Marc Glisse
@ 2015-11-26  9:14               ` Richard Biener
  0 siblings, 0 replies; 13+ messages in thread
From: Richard Biener @ 2015-11-26  9:14 UTC (permalink / raw)
  To: Marc Glisse; +Cc: Jakub Jelinek, gcc-patches, Richard Henderson

On Wed, 25 Nov 2015, Marc Glisse wrote:

> On Wed, 25 Nov 2015, Jakub Jelinek wrote:
> 
> > > The same is true whether we write it b > a or (a - b) > a (I don't think
> > > PRE
> > > + SCCVN avoid increasing register pressure).
> > > 
> > > > So, I'd really prefer doing x-y>x to y>x only for single use.
> > > 
> > > Ok (for now).
> > 
> > Do you plan to work on that (my match.pd experience is smaller than yours),
> > or should I add to my todo list?
> 
> Are we talking stage 3 or next stage 1? If you want something for stage 3, I
> think you'll have to do it, it shouldn't be much longer than
> 
> (for cmp (gt le)
>  (simplify
>   (cmp (minus:s @0 @1) @0)
>   (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
>    (cmp @1 @0))))
> 
> and a similar one for x<y-x. (I don't think TYPE_UNSIGNED needs protection
> against floats or whatever, but I could be wrong)

floats should be fine here.  But eventually saturating types need to
be excluded?

Note that the :s on the minus has no effect as the result is a
single operation.  If you want to restrict this nevertheless
you need a && single_use (@2) and capture the minus with
(minus@2 @0 @1) instead.

Richard.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089)
  2015-11-25 11:23         ` Marc Glisse
  2015-11-25 11:29           ` Jakub Jelinek
@ 2015-12-04 21:45           ` Marc Glisse
  1 sibling, 0 replies; 13+ messages in thread
From: Marc Glisse @ 2015-12-04 21:45 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: TEXT/PLAIN, Size: 377 bytes --]

On Wed, 25 Nov 2015, Marc Glisse wrote:

>> So, I'd really prefer doing x-y>x to y>x only for single use.
>
> Ok

Let me post this patch (needs testing on x86, I only tested on ppc which 
does not implement the new optabs) so I can more easily find it again at 
next stage 1.

It produces imag()==0 or imag()!=0 because I think that's what Jakub's 
patch does.

-- 
Marc Glisse

[-- Attachment #2: Type: TEXT/PLAIN, Size: 4904 bytes --]

Index: gcc/match.pd
===================================================================
--- gcc/match.pd	(revision 231300)
+++ gcc/match.pd	(working copy)
@@ -2396,20 +2396,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       && types_match (type, TREE_TYPE (@0)))
   (non_lvalue @0)))
 /* Do not handle
    bool_var == 0 becomes !bool_var or
    bool_var != 1 becomes !bool_var
    here because that only is good in assignment context as long
    as we require a tcc_comparison in GIMPLE_CONDs where we'd
    replace if (x == 0) with tem = ~x; if (tem != 0) which is
    clearly less optimal and which we'll transform again in forwprop.  */
 
+/* To detect overflow in unsigned A - B, A < B is simpler than A - B > A.
+   However, the detection logic for SUB_OVERFLOW in tree-ssa-math-opts.c
+   expects the long form, so we restrict the transformation for now.  */
+(for cmp (gt le)
+ (simplify
+  (cmp (minus@2 @0 @1) @0)
+  (if (single_use (@2)
+       && TYPE_UNSIGNED (TREE_TYPE (@0))
+       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+   (cmp @1 @0))))
+(for cmp (lt ge)
+ (simplify
+  (cmp @0 (minus@2 @0 @1))
+  (if (single_use (@2)
+       && TYPE_UNSIGNED (TREE_TYPE (@0))
+       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+   (cmp @0 @1))))
+/* Testing for overflow is unnecessary if we already know the result.  */
+(for cmp (lt ge)
+     out (ne eq)
+ (simplify
+  (cmp @0 (realpart (IFN_SUB_OVERFLOW@2 @0 @1)))
+  (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
+   (out (imagpart @2) { build_zero_cst (TREE_TYPE (@0)); }))))
+(for cmp (gt le)
+     out (ne eq)
+ (simplify
+  (cmp (realpart (IFN_SUB_OVERFLOW@2 @0 @1)) @0)
+  (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
+   (out (imagpart @2) { build_zero_cst (TREE_TYPE (@0)); }))))
+(for cmp (lt ge)
+     out (ne eq)
+ (simplify
+  (cmp (realpart (IFN_ADD_OVERFLOW@2 @0 @1)) @0)
+  (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
+   (out (imagpart @2) { build_zero_cst (TREE_TYPE (@0)); }))))
+(for cmp (gt le)
+     out (ne eq)
+ (simplify
+  (cmp @0 (realpart (IFN_ADD_OVERFLOW@2 @0 @1)))
+  (if (TYPE_UNSIGNED (TREE_TYPE (@0)))
+   (out (imagpart @2) { build_zero_cst (TREE_TYPE (@0)); }))))
 
 /* Simplification of math builtins.  These rules must all be optimizations
    as well as IL simplifications.  If there is a possibility that the new
    form could be a pessimization, the rule should go in the canonicalization
    section that follows this one.
 
    Rules can generally go in this section if they satisfy one of
    the following:
 
    - the rule describes an identity
Index: gcc/testsuite/gcc.dg/tree-ssa/minus-ovf.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/minus-ovf.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/minus-ovf.c	(working copy)
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-optimized" } */
+
+int f(unsigned a, unsigned b) {
+  unsigned remove = a - b;
+  return remove > a;
+}
+
+int g(unsigned a, unsigned b) {
+  unsigned remove = a - b;
+  return remove <= a;
+}
+
+int h(unsigned a, unsigned b) {
+  unsigned remove = a - b;
+  return a < remove;
+}
+
+int i(unsigned a, unsigned b) {
+  unsigned remove = a - b;
+  return a >= remove;
+}
+
+/* { dg-final { scan-tree-dump-not "remove" "optimized" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/overflow-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/overflow-1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/overflow-1.c	(working copy)
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+int carry;
+int f(unsigned a, unsigned b) {
+  unsigned r;
+  carry = __builtin_sub_overflow(a, b, &r);
+  return r > a;
+}
+int g(unsigned a, unsigned b) {
+  unsigned r;
+  carry = __builtin_sub_overflow(a, b, &r);
+  return a < r;
+}
+int h(unsigned a, unsigned b) {
+  unsigned r;
+  carry = __builtin_sub_overflow(a, b, &r);
+  return r <= a;
+}
+int i(unsigned a, unsigned b) {
+  unsigned r;
+  carry = __builtin_sub_overflow(a, b, &r);
+  return a >= r;
+}
+int j(unsigned a, unsigned b) {
+  unsigned r;
+  carry = __builtin_add_overflow(a, b, &r);
+  return r < a;
+}
+int k(unsigned a, unsigned b) {
+  unsigned r;
+  carry = __builtin_add_overflow(a, b, &r);
+  return a > r;
+}
+int l(unsigned a, unsigned b) {
+  unsigned r;
+  carry = __builtin_add_overflow(a, b, &r);
+  return r >= a;
+}
+int m(unsigned a, unsigned b) {
+  unsigned r;
+  carry = __builtin_add_overflow(a, b, &r);
+  return a <= r;
+}
+
+/* { dg-final { scan-tree-dump-not "(le|lt|ge|gt)_expr" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "ADD_OVERFLOW" 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "SUB_OVERFLOW" 4 "optimized" } } */

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2015-12-04 21:45 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-24 20:55 [PATCH] Convert manual unsigned +/- overflow checking into {ADD,SUB}_OVERFLOW (PR target/67089) Jakub Jelinek
2015-11-25  8:11 ` Marc Glisse
2015-11-25  8:40   ` Jakub Jelinek
2015-11-25  8:48     ` Richard Biener
2015-11-25  8:59     ` Marc Glisse
2015-11-25  9:08       ` Jakub Jelinek
2015-11-25  9:12         ` Richard Biener
2015-11-25 11:23         ` Marc Glisse
2015-11-25 11:29           ` Jakub Jelinek
2015-11-25 21:27             ` Marc Glisse
2015-11-26  9:14               ` Richard Biener
2015-12-04 21:45           ` Marc Glisse
2015-11-25  8:45 ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).