public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR66313
@ 2017-05-31 14:09 Richard Biener
  2017-06-13  9:57 ` Richard Sandiford
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2017-05-31 14:09 UTC (permalink / raw)
  To: gcc-patches


So I've come back to PR66313 and found a solution to the tailrecursion
missed optimization when fixing the factoring folding to use an unsigned
type when we're not sure of overflow.

The folding part is identical to my last try from 2015, the tailrecursion
part makes us handle intermittent stmts that were introduced by foldings
that "clobber" our quest walking the single-use chain of stmts between
the call and the return (and failing at all stmts that are not part
of said chain).  A simple solution is to move the stmts that are not
part of the chain and that we can move before the call.  That handles
the leaf conversions that now appear for tree-ssa/tailrecursion-6.c

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2017-05-31  Richard Biener  <rguenther@suse.de>

	PR middle-end/66313
	* fold-const.c (fold_plusminus_mult_expr): If the factored
	factor may be zero use a wrapping type for the inner operation.
	* tree-tailcall.c (independent_of_stmt_p): Pass in to_move bitmap
	and handle moved defs.
	(process_assignment): Properly guard the unary op case.  Return a
	tri-state indicating that moving the stmt before the call may allow
	to continue.  Pass through to_move.
	(find_tail_calls): Handle moving unrelated defs before
	the call.

	* c-c++-common/ubsan/pr66313.c: New testcase.
	* gcc.dg/tree-ssa/loop-15.c: Adjust.

Index: gcc/fold-const.c
===================================================================
*** gcc/fold-const.c.orig	2015-10-29 12:32:33.302782318 +0100
--- gcc/fold-const.c	2015-10-29 14:08:39.936497739 +0100
*************** fold_plusminus_mult_expr (location_t loc
*** 6916,6925 ****
      }
    same = NULL_TREE;
  
!   if (operand_equal_p (arg01, arg11, 0))
!     same = arg01, alt0 = arg00, alt1 = arg10;
!   else if (operand_equal_p (arg00, arg10, 0))
      same = arg00, alt0 = arg01, alt1 = arg11;
    else if (operand_equal_p (arg00, arg11, 0))
      same = arg00, alt0 = arg01, alt1 = arg10;
    else if (operand_equal_p (arg01, arg10, 0))
--- 6916,6926 ----
      }
    same = NULL_TREE;
  
!   /* Prefer factoring a common non-constant.  */
!   if (operand_equal_p (arg00, arg10, 0))
      same = arg00, alt0 = arg01, alt1 = arg11;
+   else if (operand_equal_p (arg01, arg11, 0))
+     same = arg01, alt0 = arg00, alt1 = arg10;
    else if (operand_equal_p (arg00, arg11, 0))
      same = arg00, alt0 = arg01, alt1 = arg10;
    else if (operand_equal_p (arg01, arg10, 0))
*************** fold_plusminus_mult_expr (location_t loc
*** 6974,6987 ****
  	}
      }
  
!   if (same)
      return fold_build2_loc (loc, MULT_EXPR, type,
  			fold_build2_loc (loc, code, type,
  				     fold_convert_loc (loc, type, alt0),
  				     fold_convert_loc (loc, type, alt1)),
  			fold_convert_loc (loc, type, same));
  
!   return NULL_TREE;
  }
  
  /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
--- 6975,7010 ----
  	}
      }
  
!   if (!same)
!     return NULL_TREE;
! 
!   if (! INTEGRAL_TYPE_P (type)
!       || TYPE_OVERFLOW_WRAPS (type)
!       /* We are neither factoring zero nor minus one.  */
!       || TREE_CODE (same) == INTEGER_CST)
      return fold_build2_loc (loc, MULT_EXPR, type,
  			fold_build2_loc (loc, code, type,
  				     fold_convert_loc (loc, type, alt0),
  				     fold_convert_loc (loc, type, alt1)),
  			fold_convert_loc (loc, type, same));
  
!   /* Same may be zero and thus the operation 'code' may overflow.  Likewise
!      same may be minus one and thus the multiplication may overflow.  Perform
!      the operations in an unsigned type.  */
!   tree utype = unsigned_type_for (type);
!   tree tem = fold_build2_loc (loc, code, utype,
! 			      fold_convert_loc (loc, utype, alt0),
! 			      fold_convert_loc (loc, utype, alt1));
!   /* If the sum evaluated to a constant that is not -INF the multiplication
!      cannot overflow.  */
!   if (TREE_CODE (tem) == INTEGER_CST
!       && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
!     return fold_build2_loc (loc, MULT_EXPR, type,
! 			    fold_convert (type, tem), same);
! 
!   return fold_convert_loc (loc, type,
! 			   fold_build2_loc (loc, MULT_EXPR, utype, tem,
! 					    fold_convert_loc (loc, utype, same)));
  }
  
  /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
Index: gcc/testsuite/c-c++-common/ubsan/pr66313.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/c-c++-common/ubsan/pr66313.c	2015-10-29 14:08:39.969498105 +0100
***************
*** 0 ****
--- 1,26 ----
+ /* { dg-do run } */
+ /* { dg-options "-fsanitize=undefined -fsanitize-undefined-trap-on-error" } */
+ 
+ int __attribute__((noinline,noclone))
+ f(int a, int b, int c)
+ {
+   return a * b + a * c;
+ }
+ int __attribute__((noinline,noclone))
+ g(int a)
+ {
+   return a * (__INT_MAX__/2) + a * (__INT_MAX__/2 + 2);
+ }
+ int __attribute__((noinline,noclone))
+ h(int a, int b)
+ {
+   return a * (__INT_MAX__/2 + 1) + b * (__INT_MAX__/2 + 1);
+ }
+ int main()
+ {
+   volatile int tem = f(0, __INT_MAX__, __INT_MAX__);
+   tem = f(-1, __INT_MAX__/2 + 1, __INT_MAX__/2 + 1);
+   tem = g(-1);
+   tem = h(-1, -1);
+   return 0;
+ }
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/loop-15.c.orig	2015-10-29 12:32:33.302782318 +0100
--- gcc/testsuite/gcc.dg/tree-ssa/loop-15.c	2015-10-29 14:08:39.989498327 +0100
*************** int bla(void)
*** 19,26 ****
  }
  
  /* Since the loop is removed, there should be no addition.  */
! /* { dg-final { scan-tree-dump-times "\\+" 0 "optimized" } } */
! /* { dg-final { scan-tree-dump-times "n_. \\* n_." 1 "optimized" } } */
  
  /* The if from the loop header copying remains in the code.  */
  /* { dg-final { scan-tree-dump-times "if " 1 "optimized" } } */
--- 19,26 ----
  }
  
  /* Since the loop is removed, there should be no addition.  */
! /* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */
! /* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
  
  /* The if from the loop header copying remains in the code.  */
  /* { dg-final { scan-tree-dump-times "if " 1 "optimized" } } */


Index: gcc/tree-tailcall.c
===================================================================
--- gcc/tree-tailcall.c	(revision 248722)
+++ gcc/tree-tailcall.c	(working copy)
@@ -184,7 +184,8 @@ suitable_for_tail_call_opt_p (void)
    containing the value of EXPR at GSI.  */
 
 static tree
-independent_of_stmt_p (tree expr, gimple *at, gimple_stmt_iterator gsi)
+independent_of_stmt_p (tree expr, gimple *at, gimple_stmt_iterator gsi,
+		       bitmap to_move)
 {
   basic_block bb, call_bb, at_bb;
   edge e;
@@ -196,6 +197,9 @@ independent_of_stmt_p (tree expr, gimple
   if (TREE_CODE (expr) != SSA_NAME)
     return NULL_TREE;
 
+  if (bitmap_bit_p (to_move, SSA_NAME_VERSION (expr)))
+    return expr;
+
   /* Mark the blocks in the chain leading to the end.  */
   at_bb = gimple_bb (at);
   call_bb = gimple_bb (gsi_stmt (gsi));
@@ -250,13 +254,16 @@ independent_of_stmt_p (tree expr, gimple
   return expr;
 }
 
+enum par { FAIL, OK, TRY_MOVE };
+
 /* Simulates the effect of an assignment STMT on the return value of the tail
    recursive CALL passed in ASS_VAR.  M and A are the multiplicative and the
    additive factor for the real return value.  */
 
-static bool
-process_assignment (gassign *stmt, gimple_stmt_iterator call, tree *m,
-		    tree *a, tree *ass_var)
+static par
+process_assignment (gassign *stmt,
+		    gimple_stmt_iterator call, tree *m,
+		    tree *a, tree *ass_var, bitmap to_move)
 {
   tree op0, op1 = NULL_TREE, non_ass_var = NULL_TREE;
   tree dest = gimple_assign_lhs (stmt);
@@ -276,7 +283,7 @@ process_assignment (gassign *stmt, gimpl
       if (gimple_assign_cast_p (stmt))
 	{
 	  if (TYPE_MODE (TREE_TYPE (dest)) != TYPE_MODE (TREE_TYPE (src_var)))
-	    return false;
+	    return FAIL;
 
 	  /* Even if the type modes are the same, if the precision of the
 	     type is smaller than mode's precision,
@@ -284,11 +291,11 @@ process_assignment (gassign *stmt, gimpl
 	  if (INTEGRAL_TYPE_P (TREE_TYPE (dest))
 	      && (GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (dest)))
 		  > TYPE_PRECISION (TREE_TYPE (dest))))
-	    return false;
+	    return FAIL;
 	}
 
       *ass_var = dest;
-      return true;
+      return OK;
     }
 
   switch (rhs_class)
@@ -303,7 +310,7 @@ process_assignment (gassign *stmt, gimpl
       break;
 
     default:
-      return false;
+      return FAIL;
     }
 
   /* Accumulator optimizations will reverse the order of operations.
@@ -311,42 +318,45 @@ process_assignment (gassign *stmt, gimpl
      that addition and multiplication are associative.  */
   if (!flag_associative_math)
     if (FLOAT_TYPE_P (TREE_TYPE (DECL_RESULT (current_function_decl))))
-      return false;
+      return FAIL;
 
-  if (rhs_class == GIMPLE_UNARY_RHS)
+  if (rhs_class == GIMPLE_UNARY_RHS
+      && op0 == *ass_var)
     ;
   else if (op0 == *ass_var
-	   && (non_ass_var = independent_of_stmt_p (op1, stmt, call)))
+	   && (non_ass_var = independent_of_stmt_p (op1, stmt, call,
+						    to_move)))
     ;
   else if (op1 == *ass_var
-	   && (non_ass_var = independent_of_stmt_p (op0, stmt, call)))
+	   && (non_ass_var = independent_of_stmt_p (op0, stmt, call,
+						    to_move)))
     ;
   else
-    return false;
+    return TRY_MOVE;
 
   switch (code)
     {
     case PLUS_EXPR:
       *a = non_ass_var;
       *ass_var = dest;
-      return true;
+      return OK;
 
     case POINTER_PLUS_EXPR:
       if (op0 != *ass_var)
-	return false;
+	return FAIL;
       *a = non_ass_var;
       *ass_var = dest;
-      return true;
+      return OK;
 
     case MULT_EXPR:
       *m = non_ass_var;
       *ass_var = dest;
-      return true;
+      return OK;
 
     case NEGATE_EXPR:
       *m = build_minus_one_cst (TREE_TYPE (op0));
       *ass_var = dest;
-      return true;
+      return OK;
 
     case MINUS_EXPR:
       if (*ass_var == op0)
@@ -358,12 +368,10 @@ process_assignment (gassign *stmt, gimpl
         }
 
       *ass_var = dest;
-      return true;
-
-      /* TODO -- Handle POINTER_PLUS_EXPR.  */
+      return OK;
 
     default:
-      return false;
+      return FAIL;
     }
 }
 
@@ -523,6 +531,7 @@ find_tail_calls (basic_block bb, struct
      since we are running after dce.  */
   m = NULL_TREE;
   a = NULL_TREE;
+  auto_bitmap to_move;
 
   abb = bb;
   agsi = gsi;
@@ -540,27 +549,36 @@ find_tail_calls (basic_block bb, struct
 	}
 
       stmt = gsi_stmt (agsi);
-
-      if (gimple_code (stmt) == GIMPLE_LABEL
-	  || gimple_code (stmt) == GIMPLE_NOP)
-	continue;
-
       if (gimple_code (stmt) == GIMPLE_RETURN)
 	break;
 
-      if (gimple_clobber_p (stmt))
-	continue;
-
-      if (is_gimple_debug (stmt))
+      if (gimple_code (stmt) == GIMPLE_LABEL
+	  || gimple_code (stmt) == GIMPLE_NOP
+	  || gimple_clobber_p (stmt)
+	  || is_gimple_debug (stmt))
 	continue;
 
       if (gimple_code (stmt) != GIMPLE_ASSIGN)
 	return;
 
       /* This is a gimple assign. */
-      if (! process_assignment (as_a <gassign *> (stmt), gsi, &tmp_m,
-				&tmp_a, &ass_var))
+      par ret = process_assignment (as_a <gassign *> (stmt), gsi,
+				    &tmp_m, &tmp_a, &ass_var, to_move);
+      if (ret == FAIL)
 	return;
+      else if (ret == TRY_MOVE)
+	{
+	  if (! tail_recursion)
+	    return;
+	  for (unsigned opno = 1; opno < gimple_num_ops (stmt); ++opno)
+	    {
+	      tree op = gimple_op (stmt, opno);
+	      if (independent_of_stmt_p (op, stmt, gsi, to_move) != op)
+		return;
+	    }
+	  bitmap_set_bit (to_move, SSA_NAME_VERSION (gimple_assign_lhs (stmt)));
+	  continue;
+	}
 
       if (tmp_a)
 	{
@@ -601,6 +619,19 @@ find_tail_calls (basic_block bb, struct
   if (m && POINTER_TYPE_P (TREE_TYPE (DECL_RESULT (current_function_decl))))
     return;
 
+  /* Move queued defs.  */
+  if (tail_recursion)
+    {
+      bitmap_iterator bi;
+      unsigned i;
+      EXECUTE_IF_SET_IN_BITMAP (to_move, 0, i, bi)
+	{
+	  stmt = SSA_NAME_DEF_STMT (ssa_name (i));
+	  gimple_stmt_iterator mgsi = gsi_for_stmt (stmt);
+	  gsi_move_before (&mgsi, &gsi);
+	}
+    }
+
   nw = XNEW (struct tailcall);
 
   nw->call_gsi = gsi;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2017-05-31 14:09 [PATCH] Fix PR66313 Richard Biener
@ 2017-06-13  9:57 ` Richard Sandiford
  2017-06-13 10:25   ` Richard Biener
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Sandiford @ 2017-06-13  9:57 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Richard Biener <rguenther@suse.de> writes:
> So I've come back to PR66313 and found a solution to the tailrecursion
> missed optimization when fixing the factoring folding to use an unsigned
> type when we're not sure of overflow.
>
> The folding part is identical to my last try from 2015, the tailrecursion
> part makes us handle intermittent stmts that were introduced by foldings
> that "clobber" our quest walking the single-use chain of stmts between
> the call and the return (and failing at all stmts that are not part
> of said chain).  A simple solution is to move the stmts that are not
> part of the chain and that we can move before the call.  That handles
> the leaf conversions that now appear for tree-ssa/tailrecursion-6.c
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Richard.
>
> 2017-05-31  Richard Biener  <rguenther@suse.de>
>
> 	PR middle-end/66313
> 	* fold-const.c (fold_plusminus_mult_expr): If the factored
> 	factor may be zero use a wrapping type for the inner operation.
> 	* tree-tailcall.c (independent_of_stmt_p): Pass in to_move bitmap
> 	and handle moved defs.
> 	(process_assignment): Properly guard the unary op case.  Return a
> 	tri-state indicating that moving the stmt before the call may allow
> 	to continue.  Pass through to_move.
> 	(find_tail_calls): Handle moving unrelated defs before
> 	the call.
>
> 	* c-c++-common/ubsan/pr66313.c: New testcase.
> 	* gcc.dg/tree-ssa/loop-15.c: Adjust.
>
> Index: gcc/fold-const.c
> ===================================================================
> *** gcc/fold-const.c.orig	2015-10-29 12:32:33.302782318 +0100
> --- gcc/fold-const.c	2015-10-29 14:08:39.936497739 +0100
> *************** fold_plusminus_mult_expr (location_t loc
> *** 6916,6925 ****
>       }
>     same = NULL_TREE;
>   
> !   if (operand_equal_p (arg01, arg11, 0))
> !     same = arg01, alt0 = arg00, alt1 = arg10;
> !   else if (operand_equal_p (arg00, arg10, 0))
>       same = arg00, alt0 = arg01, alt1 = arg11;
>     else if (operand_equal_p (arg00, arg11, 0))
>       same = arg00, alt0 = arg01, alt1 = arg10;
>     else if (operand_equal_p (arg01, arg10, 0))
> --- 6916,6926 ----
>       }
>     same = NULL_TREE;
>   
> !   /* Prefer factoring a common non-constant.  */
> !   if (operand_equal_p (arg00, arg10, 0))
>       same = arg00, alt0 = arg01, alt1 = arg11;
> +   else if (operand_equal_p (arg01, arg11, 0))
> +     same = arg01, alt0 = arg00, alt1 = arg10;
>     else if (operand_equal_p (arg00, arg11, 0))
>       same = arg00, alt0 = arg01, alt1 = arg10;
>     else if (operand_equal_p (arg01, arg10, 0))
> *************** fold_plusminus_mult_expr (location_t loc
> *** 6974,6987 ****
>   	}
>       }
>   
> !   if (same)
>       return fold_build2_loc (loc, MULT_EXPR, type,
>   			fold_build2_loc (loc, code, type,
>   				     fold_convert_loc (loc, type, alt0),
>   				     fold_convert_loc (loc, type, alt1)),
>   			fold_convert_loc (loc, type, same));
>   
> !   return NULL_TREE;
>   }
>   
>   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
> --- 6975,7010 ----
>   	}
>       }
>   
> !   if (!same)
> !     return NULL_TREE;
> ! 
> !   if (! INTEGRAL_TYPE_P (type)
> !       || TYPE_OVERFLOW_WRAPS (type)
> !       /* We are neither factoring zero nor minus one.  */
> !       || TREE_CODE (same) == INTEGER_CST)
>       return fold_build2_loc (loc, MULT_EXPR, type,
>   			fold_build2_loc (loc, code, type,
>   				     fold_convert_loc (loc, type, alt0),
>   				     fold_convert_loc (loc, type, alt1)),
>   			fold_convert_loc (loc, type, same));
>   
> !   /* Same may be zero and thus the operation 'code' may overflow.  Likewise
> !      same may be minus one and thus the multiplication may overflow.  Perform
> !      the operations in an unsigned type.  */
> !   tree utype = unsigned_type_for (type);
> !   tree tem = fold_build2_loc (loc, code, utype,
> ! 			      fold_convert_loc (loc, utype, alt0),
> ! 			      fold_convert_loc (loc, utype, alt1));
> !   /* If the sum evaluated to a constant that is not -INF the multiplication
> !      cannot overflow.  */
> !   if (TREE_CODE (tem) == INTEGER_CST
> !       && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
> !     return fold_build2_loc (loc, MULT_EXPR, type,
> ! 			    fold_convert (type, tem), same);
> ! 
> !   return fold_convert_loc (loc, type,
> ! 			   fold_build2_loc (loc, MULT_EXPR, utype, tem,
> ! 					    fold_convert_loc (loc, utype, same)));
>   }
>   
>   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST

Sorry if you already know, but this part means that we can no longer
vectorise:

int
f (int *x, int b1, int b2, int b3)
{
  int foo = 0;
  for (int i1 = 0; i1 < b1; ++i1)
    for (int i2 = 0; i2 < b2; ++i2)
      for (int i3 = 0; i3 < b3; ++i3)
        foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
  return foo;
}

We now convert all the arithmetic in the [...] to unsigned int
and reassociate it so that the "- 1" is applied last.  We then assume
that the overflow in this subtraction is well-defined and that the
&x[...] might not be linear for the inner loop.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2017-06-13  9:57 ` Richard Sandiford
@ 2017-06-13 10:25   ` Richard Biener
  2017-06-13 11:23     ` Richard Sandiford
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2017-06-13 10:25 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: gcc-patches

On Tue, 13 Jun 2017, Richard Sandiford wrote:

> Richard Biener <rguenther@suse.de> writes:
> > So I've come back to PR66313 and found a solution to the tailrecursion
> > missed optimization when fixing the factoring folding to use an unsigned
> > type when we're not sure of overflow.
> >
> > The folding part is identical to my last try from 2015, the tailrecursion
> > part makes us handle intermittent stmts that were introduced by foldings
> > that "clobber" our quest walking the single-use chain of stmts between
> > the call and the return (and failing at all stmts that are not part
> > of said chain).  A simple solution is to move the stmts that are not
> > part of the chain and that we can move before the call.  That handles
> > the leaf conversions that now appear for tree-ssa/tailrecursion-6.c
> >
> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> >
> > Richard.
> >
> > 2017-05-31  Richard Biener  <rguenther@suse.de>
> >
> > 	PR middle-end/66313
> > 	* fold-const.c (fold_plusminus_mult_expr): If the factored
> > 	factor may be zero use a wrapping type for the inner operation.
> > 	* tree-tailcall.c (independent_of_stmt_p): Pass in to_move bitmap
> > 	and handle moved defs.
> > 	(process_assignment): Properly guard the unary op case.  Return a
> > 	tri-state indicating that moving the stmt before the call may allow
> > 	to continue.  Pass through to_move.
> > 	(find_tail_calls): Handle moving unrelated defs before
> > 	the call.
> >
> > 	* c-c++-common/ubsan/pr66313.c: New testcase.
> > 	* gcc.dg/tree-ssa/loop-15.c: Adjust.
> >
> > Index: gcc/fold-const.c
> > ===================================================================
> > *** gcc/fold-const.c.orig	2015-10-29 12:32:33.302782318 +0100
> > --- gcc/fold-const.c	2015-10-29 14:08:39.936497739 +0100
> > *************** fold_plusminus_mult_expr (location_t loc
> > *** 6916,6925 ****
> >       }
> >     same = NULL_TREE;
> >   
> > !   if (operand_equal_p (arg01, arg11, 0))
> > !     same = arg01, alt0 = arg00, alt1 = arg10;
> > !   else if (operand_equal_p (arg00, arg10, 0))
> >       same = arg00, alt0 = arg01, alt1 = arg11;
> >     else if (operand_equal_p (arg00, arg11, 0))
> >       same = arg00, alt0 = arg01, alt1 = arg10;
> >     else if (operand_equal_p (arg01, arg10, 0))
> > --- 6916,6926 ----
> >       }
> >     same = NULL_TREE;
> >   
> > !   /* Prefer factoring a common non-constant.  */
> > !   if (operand_equal_p (arg00, arg10, 0))
> >       same = arg00, alt0 = arg01, alt1 = arg11;
> > +   else if (operand_equal_p (arg01, arg11, 0))
> > +     same = arg01, alt0 = arg00, alt1 = arg10;
> >     else if (operand_equal_p (arg00, arg11, 0))
> >       same = arg00, alt0 = arg01, alt1 = arg10;
> >     else if (operand_equal_p (arg01, arg10, 0))
> > *************** fold_plusminus_mult_expr (location_t loc
> > *** 6974,6987 ****
> >   	}
> >       }
> >   
> > !   if (same)
> >       return fold_build2_loc (loc, MULT_EXPR, type,
> >   			fold_build2_loc (loc, code, type,
> >   				     fold_convert_loc (loc, type, alt0),
> >   				     fold_convert_loc (loc, type, alt1)),
> >   			fold_convert_loc (loc, type, same));
> >   
> > !   return NULL_TREE;
> >   }
> >   
> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
> > --- 6975,7010 ----
> >   	}
> >       }
> >   
> > !   if (!same)
> > !     return NULL_TREE;
> > ! 
> > !   if (! INTEGRAL_TYPE_P (type)
> > !       || TYPE_OVERFLOW_WRAPS (type)
> > !       /* We are neither factoring zero nor minus one.  */
> > !       || TREE_CODE (same) == INTEGER_CST)
> >       return fold_build2_loc (loc, MULT_EXPR, type,
> >   			fold_build2_loc (loc, code, type,
> >   				     fold_convert_loc (loc, type, alt0),
> >   				     fold_convert_loc (loc, type, alt1)),
> >   			fold_convert_loc (loc, type, same));
> >   
> > !   /* Same may be zero and thus the operation 'code' may overflow.  Likewise
> > !      same may be minus one and thus the multiplication may overflow.  Perform
> > !      the operations in an unsigned type.  */
> > !   tree utype = unsigned_type_for (type);
> > !   tree tem = fold_build2_loc (loc, code, utype,
> > ! 			      fold_convert_loc (loc, utype, alt0),
> > ! 			      fold_convert_loc (loc, utype, alt1));
> > !   /* If the sum evaluated to a constant that is not -INF the multiplication
> > !      cannot overflow.  */
> > !   if (TREE_CODE (tem) == INTEGER_CST
> > !       && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
> > !     return fold_build2_loc (loc, MULT_EXPR, type,
> > ! 			    fold_convert (type, tem), same);
> > ! 
> > !   return fold_convert_loc (loc, type,
> > ! 			   fold_build2_loc (loc, MULT_EXPR, utype, tem,
> > ! 					    fold_convert_loc (loc, utype, same)));
> >   }
> >   
> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
> 
> Sorry if you already know, but this part means that we can no longer
> vectorise:
> 
> int
> f (int *x, int b1, int b2, int b3)
> {
>   int foo = 0;
>   for (int i1 = 0; i1 < b1; ++i1)
>     for (int i2 = 0; i2 < b2; ++i2)
>       for (int i3 = 0; i3 < b3; ++i3)
>         foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
>   return foo;
> }
> 
> We now convert all the arithmetic in the [...] to unsigned int
> and reassociate it so that the "- 1" is applied last.  We then assume
> that the overflow in this subtraction is well-defined and that the
> &x[...] might not be linear for the inner loop.

Can you file a PR?

  # i3_44 = PHI <i3_32(6), 0(4)>
  i3.2_7 = (unsigned int) i3_44;
  _9 = i3.2_7 + _34;
  _10 = (int) _9;
  _11 = (long unsigned int) _10;
  _12 = _11 * 4;
  _13 = x_30(D) + _12;
  _14 = *_13;
...
   i3_32 = i3_44 + 1;

so we have *(x_30(D) + 4 * ((sizetype)(int)((unsigned){ 0, + , 1} + X))

It might mean that we need to avoid this re-association.  Not sure how
to detect worthwhile vs. not worthwhile cases during folding.  At least
I see no easy way to recover from it in SCEV analysis unless the
number of iterations is constrained more than in your example.

Ideas welcome ;)

Thanks,
Richard.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2017-06-13 10:25   ` Richard Biener
@ 2017-06-13 11:23     ` Richard Sandiford
  2017-06-13 11:28       ` Bin.Cheng
  2017-06-13 11:48       ` Richard Biener
  0 siblings, 2 replies; 13+ messages in thread
From: Richard Sandiford @ 2017-06-13 11:23 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

Richard Biener <rguenther@suse.de> writes:
> On Tue, 13 Jun 2017, Richard Sandiford wrote:
>> Richard Biener <rguenther@suse.de> writes:
>> > So I've come back to PR66313 and found a solution to the tailrecursion
>> > missed optimization when fixing the factoring folding to use an unsigned
>> > type when we're not sure of overflow.
>> >
>> > The folding part is identical to my last try from 2015, the tailrecursion
>> > part makes us handle intermittent stmts that were introduced by foldings
>> > that "clobber" our quest walking the single-use chain of stmts between
>> > the call and the return (and failing at all stmts that are not part
>> > of said chain).  A simple solution is to move the stmts that are not
>> > part of the chain and that we can move before the call.  That handles
>> > the leaf conversions that now appear for tree-ssa/tailrecursion-6.c
>> >
>> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>> >
>> > Richard.
>> >
>> > 2017-05-31  Richard Biener  <rguenther@suse.de>
>> >
>> > 	PR middle-end/66313
>> > 	* fold-const.c (fold_plusminus_mult_expr): If the factored
>> > 	factor may be zero use a wrapping type for the inner operation.
>> > 	* tree-tailcall.c (independent_of_stmt_p): Pass in to_move bitmap
>> > 	and handle moved defs.
>> > 	(process_assignment): Properly guard the unary op case.  Return a
>> > 	tri-state indicating that moving the stmt before the call may allow
>> > 	to continue.  Pass through to_move.
>> > 	(find_tail_calls): Handle moving unrelated defs before
>> > 	the call.
>> >
>> > 	* c-c++-common/ubsan/pr66313.c: New testcase.
>> > 	* gcc.dg/tree-ssa/loop-15.c: Adjust.
>> >
>> > Index: gcc/fold-const.c
>> > ===================================================================
>> > *** gcc/fold-const.c.orig	2015-10-29 12:32:33.302782318 +0100
>> > --- gcc/fold-const.c	2015-10-29 14:08:39.936497739 +0100
>> > *************** fold_plusminus_mult_expr (location_t loc
>> > *** 6916,6925 ****
>> >       }
>> >     same = NULL_TREE;
>> >   
>> > !   if (operand_equal_p (arg01, arg11, 0))
>> > !     same = arg01, alt0 = arg00, alt1 = arg10;
>> > !   else if (operand_equal_p (arg00, arg10, 0))
>> >       same = arg00, alt0 = arg01, alt1 = arg11;
>> >     else if (operand_equal_p (arg00, arg11, 0))
>> >       same = arg00, alt0 = arg01, alt1 = arg10;
>> >     else if (operand_equal_p (arg01, arg10, 0))
>> > --- 6916,6926 ----
>> >       }
>> >     same = NULL_TREE;
>> >   
>> > !   /* Prefer factoring a common non-constant.  */
>> > !   if (operand_equal_p (arg00, arg10, 0))
>> >       same = arg00, alt0 = arg01, alt1 = arg11;
>> > +   else if (operand_equal_p (arg01, arg11, 0))
>> > +     same = arg01, alt0 = arg00, alt1 = arg10;
>> >     else if (operand_equal_p (arg00, arg11, 0))
>> >       same = arg00, alt0 = arg01, alt1 = arg10;
>> >     else if (operand_equal_p (arg01, arg10, 0))
>> > *************** fold_plusminus_mult_expr (location_t loc
>> > *** 6974,6987 ****
>> >   	}
>> >       }
>> >   
>> > !   if (same)
>> >       return fold_build2_loc (loc, MULT_EXPR, type,
>> >   			fold_build2_loc (loc, code, type,
>> >   				     fold_convert_loc (loc, type, alt0),
>> >   				     fold_convert_loc (loc, type, alt1)),
>> >   			fold_convert_loc (loc, type, same));
>> >   
>> > !   return NULL_TREE;
>> >   }
>> >   
>> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
>> > --- 6975,7010 ----
>> >   	}
>> >       }
>> >   
>> > !   if (!same)
>> > !     return NULL_TREE;
>> > ! 
>> > !   if (! INTEGRAL_TYPE_P (type)
>> > !       || TYPE_OVERFLOW_WRAPS (type)
>> > !       /* We are neither factoring zero nor minus one.  */
>> > !       || TREE_CODE (same) == INTEGER_CST)
>> >       return fold_build2_loc (loc, MULT_EXPR, type,
>> >   			fold_build2_loc (loc, code, type,
>> >   				     fold_convert_loc (loc, type, alt0),
>> >   				     fold_convert_loc (loc, type, alt1)),
>> >   			fold_convert_loc (loc, type, same));
>> >   
>> > !   /* Same may be zero and thus the operation 'code' may overflow.  Likewise
>> > !      same may be minus one and thus the multiplication may overflow.  Perform
>> > !      the operations in an unsigned type.  */
>> > !   tree utype = unsigned_type_for (type);
>> > !   tree tem = fold_build2_loc (loc, code, utype,
>> > ! 			      fold_convert_loc (loc, utype, alt0),
>> > ! 			      fold_convert_loc (loc, utype, alt1));
>> > !   /* If the sum evaluated to a constant that is not -INF the multiplication
>> > !      cannot overflow.  */
>> > !   if (TREE_CODE (tem) == INTEGER_CST
>> > !       && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
>> > !     return fold_build2_loc (loc, MULT_EXPR, type,
>> > ! 			    fold_convert (type, tem), same);
>> > ! 
>> > !   return fold_convert_loc (loc, type,
>> > ! 			   fold_build2_loc (loc, MULT_EXPR, utype, tem,
>> > ! 					    fold_convert_loc (loc, utype, same)));
>> >   }
>> >   
>> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
>> 
>> Sorry if you already know, but this part means that we can no longer
>> vectorise:
>> 
>> int
>> f (int *x, int b1, int b2, int b3)
>> {
>>   int foo = 0;
>>   for (int i1 = 0; i1 < b1; ++i1)
>>     for (int i2 = 0; i2 < b2; ++i2)
>>       for (int i3 = 0; i3 < b3; ++i3)
>>         foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
>>   return foo;
>> }
>> 
>> We now convert all the arithmetic in the [...] to unsigned int
>> and reassociate it so that the "- 1" is applied last.  We then assume
>> that the overflow in this subtraction is well-defined and that the
>> &x[...] might not be linear for the inner loop.
>
> Can you file a PR?

OK, filed as PR81082.

>   # i3_44 = PHI <i3_32(6), 0(4)>
>   i3.2_7 = (unsigned int) i3_44;
>   _9 = i3.2_7 + _34;
>   _10 = (int) _9;
>   _11 = (long unsigned int) _10;
>   _12 = _11 * 4;
>   _13 = x_30(D) + _12;
>   _14 = *_13;
> ...
>    i3_32 = i3_44 + 1;
>
> so we have *(x_30(D) + 4 * ((sizetype)(int)((unsigned){ 0, + , 1} + X))
>
> It might mean that we need to avoid this re-association.  Not sure how
> to detect worthwhile vs. not worthwhile cases during folding.  At least
> I see no easy way to recover from it in SCEV analysis unless the
> number of iterations is constrained more than in your example.

Yeah, in the example that this was reduced from, not reassociating
is far more preferable to changing the types.  But like you say,
I've no idea how we'd know that at this stage.

Thanks,
Richard

> Ideas welcome ;)
>
> Thanks,
> Richard.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2017-06-13 11:23     ` Richard Sandiford
@ 2017-06-13 11:28       ` Bin.Cheng
  2017-06-13 11:49         ` Richard Biener
  2017-06-13 11:48       ` Richard Biener
  1 sibling, 1 reply; 13+ messages in thread
From: Bin.Cheng @ 2017-06-13 11:28 UTC (permalink / raw)
  To: Richard Biener, gcc-patches List, Richard Sandiford

On Tue, Jun 13, 2017 at 12:23 PM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> Richard Biener <rguenther@suse.de> writes:
>> On Tue, 13 Jun 2017, Richard Sandiford wrote:
>>> Richard Biener <rguenther@suse.de> writes:
>>> > So I've come back to PR66313 and found a solution to the tailrecursion
>>> > missed optimization when fixing the factoring folding to use an unsigned
>>> > type when we're not sure of overflow.
>>> >
>>> > The folding part is identical to my last try from 2015, the tailrecursion
>>> > part makes us handle intermittent stmts that were introduced by foldings
>>> > that "clobber" our quest walking the single-use chain of stmts between
>>> > the call and the return (and failing at all stmts that are not part
>>> > of said chain).  A simple solution is to move the stmts that are not
>>> > part of the chain and that we can move before the call.  That handles
>>> > the leaf conversions that now appear for tree-ssa/tailrecursion-6.c
>>> >
>>> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>>> >
>>> > Richard.
>>> >
>>> > 2017-05-31  Richard Biener  <rguenther@suse.de>
>>> >
>>> >    PR middle-end/66313
>>> >    * fold-const.c (fold_plusminus_mult_expr): If the factored
>>> >    factor may be zero use a wrapping type for the inner operation.
>>> >    * tree-tailcall.c (independent_of_stmt_p): Pass in to_move bitmap
>>> >    and handle moved defs.
>>> >    (process_assignment): Properly guard the unary op case.  Return a
>>> >    tri-state indicating that moving the stmt before the call may allow
>>> >    to continue.  Pass through to_move.
>>> >    (find_tail_calls): Handle moving unrelated defs before
>>> >    the call.
>>> >
>>> >    * c-c++-common/ubsan/pr66313.c: New testcase.
>>> >    * gcc.dg/tree-ssa/loop-15.c: Adjust.
>>> >
>>> > Index: gcc/fold-const.c
>>> > ===================================================================
>>> > *** gcc/fold-const.c.orig  2015-10-29 12:32:33.302782318 +0100
>>> > --- gcc/fold-const.c       2015-10-29 14:08:39.936497739 +0100
>>> > *************** fold_plusminus_mult_expr (location_t loc
>>> > *** 6916,6925 ****
>>> >       }
>>> >     same = NULL_TREE;
>>> >
>>> > !   if (operand_equal_p (arg01, arg11, 0))
>>> > !     same = arg01, alt0 = arg00, alt1 = arg10;
>>> > !   else if (operand_equal_p (arg00, arg10, 0))
>>> >       same = arg00, alt0 = arg01, alt1 = arg11;
>>> >     else if (operand_equal_p (arg00, arg11, 0))
>>> >       same = arg00, alt0 = arg01, alt1 = arg10;
>>> >     else if (operand_equal_p (arg01, arg10, 0))
>>> > --- 6916,6926 ----
>>> >       }
>>> >     same = NULL_TREE;
>>> >
>>> > !   /* Prefer factoring a common non-constant.  */
>>> > !   if (operand_equal_p (arg00, arg10, 0))
>>> >       same = arg00, alt0 = arg01, alt1 = arg11;
>>> > +   else if (operand_equal_p (arg01, arg11, 0))
>>> > +     same = arg01, alt0 = arg00, alt1 = arg10;
>>> >     else if (operand_equal_p (arg00, arg11, 0))
>>> >       same = arg00, alt0 = arg01, alt1 = arg10;
>>> >     else if (operand_equal_p (arg01, arg10, 0))
>>> > *************** fold_plusminus_mult_expr (location_t loc
>>> > *** 6974,6987 ****
>>> >    }
>>> >       }
>>> >
>>> > !   if (same)
>>> >       return fold_build2_loc (loc, MULT_EXPR, type,
>>> >                    fold_build2_loc (loc, code, type,
>>> >                                 fold_convert_loc (loc, type, alt0),
>>> >                                 fold_convert_loc (loc, type, alt1)),
>>> >                    fold_convert_loc (loc, type, same));
>>> >
>>> > !   return NULL_TREE;
>>> >   }
>>> >
>>> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
>>> > --- 6975,7010 ----
>>> >    }
>>> >       }
>>> >
>>> > !   if (!same)
>>> > !     return NULL_TREE;
>>> > !
>>> > !   if (! INTEGRAL_TYPE_P (type)
>>> > !       || TYPE_OVERFLOW_WRAPS (type)
>>> > !       /* We are neither factoring zero nor minus one.  */
>>> > !       || TREE_CODE (same) == INTEGER_CST)
>>> >       return fold_build2_loc (loc, MULT_EXPR, type,
>>> >                    fold_build2_loc (loc, code, type,
>>> >                                 fold_convert_loc (loc, type, alt0),
>>> >                                 fold_convert_loc (loc, type, alt1)),
>>> >                    fold_convert_loc (loc, type, same));
>>> >
>>> > !   /* Same may be zero and thus the operation 'code' may overflow.  Likewise
>>> > !      same may be minus one and thus the multiplication may overflow.  Perform
>>> > !      the operations in an unsigned type.  */
>>> > !   tree utype = unsigned_type_for (type);
>>> > !   tree tem = fold_build2_loc (loc, code, utype,
>>> > !                        fold_convert_loc (loc, utype, alt0),
>>> > !                        fold_convert_loc (loc, utype, alt1));
>>> > !   /* If the sum evaluated to a constant that is not -INF the multiplication
>>> > !      cannot overflow.  */
>>> > !   if (TREE_CODE (tem) == INTEGER_CST
>>> > !       && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
>>> > !     return fold_build2_loc (loc, MULT_EXPR, type,
>>> > !                      fold_convert (type, tem), same);
>>> > !
>>> > !   return fold_convert_loc (loc, type,
>>> > !                     fold_build2_loc (loc, MULT_EXPR, utype, tem,
>>> > !                                      fold_convert_loc (loc, utype, same)));
>>> >   }
>>> >
>>> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
>>>
>>> Sorry if you already know, but this part means that we can no longer
>>> vectorise:
>>>
>>> int
>>> f (int *x, int b1, int b2, int b3)
>>> {
>>>   int foo = 0;
>>>   for (int i1 = 0; i1 < b1; ++i1)
>>>     for (int i2 = 0; i2 < b2; ++i2)
>>>       for (int i3 = 0; i3 < b3; ++i3)
>>>         foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
>>>   return foo;
>>> }
>>>
>>> We now convert all the arithmetic in the [...] to unsigned int
>>> and reassociate it so that the "- 1" is applied last.  We then assume
>>> that the overflow in this subtraction is well-defined and that the
>>> &x[...] might not be linear for the inner loop.
>>
>> Can you file a PR?
>
> OK, filed as PR81082.
>
>>   # i3_44 = PHI <i3_32(6), 0(4)>
>>   i3.2_7 = (unsigned int) i3_44;
>>   _9 = i3.2_7 + _34;
>>   _10 = (int) _9;
>>   _11 = (long unsigned int) _10;
>>   _12 = _11 * 4;
>>   _13 = x_30(D) + _12;
>>   _14 = *_13;
>> ...
>>    i3_32 = i3_44 + 1;
>>
>> so we have *(x_30(D) + 4 * ((sizetype)(int)((unsigned){ 0, + , 1} + X))
>>
>> It might mean that we need to avoid this re-association.  Not sure how
>> to detect worthwhile vs. not worthwhile cases during folding.  At least
>> I see no easy way to recover from it in SCEV analysis unless the
>> number of iterations is constrained more than in your example.
>
> Yeah, in the example that this was reduced from, not reassociating
> is far more preferable to changing the types.  But like you say,
> I've no idea how we'd know that at this stage.
Looks like it has become a general problem.  IIRC, loop-im also hoists
signed computation out of loop into unsigned computations.

Thanks,
bin
>
> Thanks,
> Richard
>
>> Ideas welcome ;)
>>
>> Thanks,
>> Richard.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2017-06-13 11:23     ` Richard Sandiford
  2017-06-13 11:28       ` Bin.Cheng
@ 2017-06-13 11:48       ` Richard Biener
  2017-06-13 11:56         ` Bin.Cheng
  1 sibling, 1 reply; 13+ messages in thread
From: Richard Biener @ 2017-06-13 11:48 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: gcc-patches

On Tue, 13 Jun 2017, Richard Sandiford wrote:

> Richard Biener <rguenther@suse.de> writes:
> > On Tue, 13 Jun 2017, Richard Sandiford wrote:
> >> Richard Biener <rguenther@suse.de> writes:
> >> > So I've come back to PR66313 and found a solution to the tailrecursion
> >> > missed optimization when fixing the factoring folding to use an unsigned
> >> > type when we're not sure of overflow.
> >> >
> >> > The folding part is identical to my last try from 2015, the tailrecursion
> >> > part makes us handle intermittent stmts that were introduced by foldings
> >> > that "clobber" our quest walking the single-use chain of stmts between
> >> > the call and the return (and failing at all stmts that are not part
> >> > of said chain).  A simple solution is to move the stmts that are not
> >> > part of the chain and that we can move before the call.  That handles
> >> > the leaf conversions that now appear for tree-ssa/tailrecursion-6.c
> >> >
> >> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> >> >
> >> > Richard.
> >> >
> >> > 2017-05-31  Richard Biener  <rguenther@suse.de>
> >> >
> >> > 	PR middle-end/66313
> >> > 	* fold-const.c (fold_plusminus_mult_expr): If the factored
> >> > 	factor may be zero use a wrapping type for the inner operation.
> >> > 	* tree-tailcall.c (independent_of_stmt_p): Pass in to_move bitmap
> >> > 	and handle moved defs.
> >> > 	(process_assignment): Properly guard the unary op case.  Return a
> >> > 	tri-state indicating that moving the stmt before the call may allow
> >> > 	to continue.  Pass through to_move.
> >> > 	(find_tail_calls): Handle moving unrelated defs before
> >> > 	the call.
> >> >
> >> > 	* c-c++-common/ubsan/pr66313.c: New testcase.
> >> > 	* gcc.dg/tree-ssa/loop-15.c: Adjust.
> >> >
> >> > Index: gcc/fold-const.c
> >> > ===================================================================
> >> > *** gcc/fold-const.c.orig	2015-10-29 12:32:33.302782318 +0100
> >> > --- gcc/fold-const.c	2015-10-29 14:08:39.936497739 +0100
> >> > *************** fold_plusminus_mult_expr (location_t loc
> >> > *** 6916,6925 ****
> >> >       }
> >> >     same = NULL_TREE;
> >> >   
> >> > !   if (operand_equal_p (arg01, arg11, 0))
> >> > !     same = arg01, alt0 = arg00, alt1 = arg10;
> >> > !   else if (operand_equal_p (arg00, arg10, 0))
> >> >       same = arg00, alt0 = arg01, alt1 = arg11;
> >> >     else if (operand_equal_p (arg00, arg11, 0))
> >> >       same = arg00, alt0 = arg01, alt1 = arg10;
> >> >     else if (operand_equal_p (arg01, arg10, 0))
> >> > --- 6916,6926 ----
> >> >       }
> >> >     same = NULL_TREE;
> >> >   
> >> > !   /* Prefer factoring a common non-constant.  */
> >> > !   if (operand_equal_p (arg00, arg10, 0))
> >> >       same = arg00, alt0 = arg01, alt1 = arg11;
> >> > +   else if (operand_equal_p (arg01, arg11, 0))
> >> > +     same = arg01, alt0 = arg00, alt1 = arg10;
> >> >     else if (operand_equal_p (arg00, arg11, 0))
> >> >       same = arg00, alt0 = arg01, alt1 = arg10;
> >> >     else if (operand_equal_p (arg01, arg10, 0))
> >> > *************** fold_plusminus_mult_expr (location_t loc
> >> > *** 6974,6987 ****
> >> >   	}
> >> >       }
> >> >   
> >> > !   if (same)
> >> >       return fold_build2_loc (loc, MULT_EXPR, type,
> >> >   			fold_build2_loc (loc, code, type,
> >> >   				     fold_convert_loc (loc, type, alt0),
> >> >   				     fold_convert_loc (loc, type, alt1)),
> >> >   			fold_convert_loc (loc, type, same));
> >> >   
> >> > !   return NULL_TREE;
> >> >   }
> >> >   
> >> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
> >> > --- 6975,7010 ----
> >> >   	}
> >> >       }
> >> >   
> >> > !   if (!same)
> >> > !     return NULL_TREE;
> >> > ! 
> >> > !   if (! INTEGRAL_TYPE_P (type)
> >> > !       || TYPE_OVERFLOW_WRAPS (type)
> >> > !       /* We are neither factoring zero nor minus one.  */
> >> > !       || TREE_CODE (same) == INTEGER_CST)
> >> >       return fold_build2_loc (loc, MULT_EXPR, type,
> >> >   			fold_build2_loc (loc, code, type,
> >> >   				     fold_convert_loc (loc, type, alt0),
> >> >   				     fold_convert_loc (loc, type, alt1)),
> >> >   			fold_convert_loc (loc, type, same));
> >> >   
> >> > !   /* Same may be zero and thus the operation 'code' may overflow.  Likewise
> >> > !      same may be minus one and thus the multiplication may overflow.  Perform
> >> > !      the operations in an unsigned type.  */
> >> > !   tree utype = unsigned_type_for (type);
> >> > !   tree tem = fold_build2_loc (loc, code, utype,
> >> > ! 			      fold_convert_loc (loc, utype, alt0),
> >> > ! 			      fold_convert_loc (loc, utype, alt1));
> >> > !   /* If the sum evaluated to a constant that is not -INF the multiplication
> >> > !      cannot overflow.  */
> >> > !   if (TREE_CODE (tem) == INTEGER_CST
> >> > !       && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
> >> > !     return fold_build2_loc (loc, MULT_EXPR, type,
> >> > ! 			    fold_convert (type, tem), same);
> >> > ! 
> >> > !   return fold_convert_loc (loc, type,
> >> > ! 			   fold_build2_loc (loc, MULT_EXPR, utype, tem,
> >> > ! 					    fold_convert_loc (loc, utype, same)));
> >> >   }
> >> >   
> >> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
> >> 
> >> Sorry if you already know, but this part means that we can no longer
> >> vectorise:
> >> 
> >> int
> >> f (int *x, int b1, int b2, int b3)
> >> {
> >>   int foo = 0;
> >>   for (int i1 = 0; i1 < b1; ++i1)
> >>     for (int i2 = 0; i2 < b2; ++i2)
> >>       for (int i3 = 0; i3 < b3; ++i3)
> >>         foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
> >>   return foo;
> >> }
> >> 
> >> We now convert all the arithmetic in the [...] to unsigned int
> >> and reassociate it so that the "- 1" is applied last.  We then assume
> >> that the overflow in this subtraction is well-defined and that the
> >> &x[...] might not be linear for the inner loop.
> >
> > Can you file a PR?
> 
> OK, filed as PR81082.
> 
> >   # i3_44 = PHI <i3_32(6), 0(4)>
> >   i3.2_7 = (unsigned int) i3_44;
> >   _9 = i3.2_7 + _34;
> >   _10 = (int) _9;
> >   _11 = (long unsigned int) _10;
> >   _12 = _11 * 4;
> >   _13 = x_30(D) + _12;
> >   _14 = *_13;
> > ...
> >    i3_32 = i3_44 + 1;
> >
> > so we have *(x_30(D) + 4 * ((sizetype)(int)((unsigned){ 0, + , 1} + X))
> >
> > It might mean that we need to avoid this re-association.  Not sure how
> > to detect worthwhile vs. not worthwhile cases during folding.  At least
> > I see no easy way to recover from it in SCEV analysis unless the
> > number of iterations is constrained more than in your example.
> 
> Yeah, in the example that this was reduced from, not reassociating
> is far more preferable to changing the types.  But like you say,
> I've no idea how we'd know that at this stage.

In the past I played with not obfuscating things during FE lowering
so early, namely not expose the * 4 but keep the original types of
array / pointer indexes.  On the original mem-ref branch I had
a IDX_EXPR that allowed to chain those (a widen-multiply-accumulate
op).

That said, the context where this association is not interesting is
address context and the multiplication to the byte offset.

Of course in your case there's also b3 factored out which looks
like a generally interesting transform (but eventually harmful in address
context again).

From the FE we get

*(x + (sizetype) ((long unsigned int) (((i1 * b2) * b3 + i2 * b3) + (i3 + 
-1)) * 4))

and SCEV uses the fact that the mults/adds have undefined behavior
on overflow.  The same missed optimization occurs if you make all those
vars unsigned (with or without the folding in question):

#define U unsigned
int
f (int *x, U int b1, U int b2, U int b3)
{
  int foo = 0;
  for (U int i1 = 0; i1 < b1; ++i1)
    for (U int i2 = 0; i2 < b2; ++i2)
      for (U int i3 = 0; i3 < b3; ++i3)
        foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
  return foo;
}

It works again for unsigned long as there can be no valid object
where the computation could wrap around.

Richard.

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2017-06-13 11:28       ` Bin.Cheng
@ 2017-06-13 11:49         ` Richard Biener
  0 siblings, 0 replies; 13+ messages in thread
From: Richard Biener @ 2017-06-13 11:49 UTC (permalink / raw)
  To: Bin.Cheng; +Cc: gcc-patches List, Richard Sandiford

On Tue, 13 Jun 2017, Bin.Cheng wrote:

> On Tue, Jun 13, 2017 at 12:23 PM, Richard Sandiford
> <richard.sandiford@linaro.org> wrote:
> > Richard Biener <rguenther@suse.de> writes:
> >> On Tue, 13 Jun 2017, Richard Sandiford wrote:
> >>> Richard Biener <rguenther@suse.de> writes:
> >>> > So I've come back to PR66313 and found a solution to the tailrecursion
> >>> > missed optimization when fixing the factoring folding to use an unsigned
> >>> > type when we're not sure of overflow.
> >>> >
> >>> > The folding part is identical to my last try from 2015, the tailrecursion
> >>> > part makes us handle intermittent stmts that were introduced by foldings
> >>> > that "clobber" our quest walking the single-use chain of stmts between
> >>> > the call and the return (and failing at all stmts that are not part
> >>> > of said chain).  A simple solution is to move the stmts that are not
> >>> > part of the chain and that we can move before the call.  That handles
> >>> > the leaf conversions that now appear for tree-ssa/tailrecursion-6.c
> >>> >
> >>> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> >>> >
> >>> > Richard.
> >>> >
> >>> > 2017-05-31  Richard Biener  <rguenther@suse.de>
> >>> >
> >>> >    PR middle-end/66313
> >>> >    * fold-const.c (fold_plusminus_mult_expr): If the factored
> >>> >    factor may be zero use a wrapping type for the inner operation.
> >>> >    * tree-tailcall.c (independent_of_stmt_p): Pass in to_move bitmap
> >>> >    and handle moved defs.
> >>> >    (process_assignment): Properly guard the unary op case.  Return a
> >>> >    tri-state indicating that moving the stmt before the call may allow
> >>> >    to continue.  Pass through to_move.
> >>> >    (find_tail_calls): Handle moving unrelated defs before
> >>> >    the call.
> >>> >
> >>> >    * c-c++-common/ubsan/pr66313.c: New testcase.
> >>> >    * gcc.dg/tree-ssa/loop-15.c: Adjust.
> >>> >
> >>> > Index: gcc/fold-const.c
> >>> > ===================================================================
> >>> > *** gcc/fold-const.c.orig  2015-10-29 12:32:33.302782318 +0100
> >>> > --- gcc/fold-const.c       2015-10-29 14:08:39.936497739 +0100
> >>> > *************** fold_plusminus_mult_expr (location_t loc
> >>> > *** 6916,6925 ****
> >>> >       }
> >>> >     same = NULL_TREE;
> >>> >
> >>> > !   if (operand_equal_p (arg01, arg11, 0))
> >>> > !     same = arg01, alt0 = arg00, alt1 = arg10;
> >>> > !   else if (operand_equal_p (arg00, arg10, 0))
> >>> >       same = arg00, alt0 = arg01, alt1 = arg11;
> >>> >     else if (operand_equal_p (arg00, arg11, 0))
> >>> >       same = arg00, alt0 = arg01, alt1 = arg10;
> >>> >     else if (operand_equal_p (arg01, arg10, 0))
> >>> > --- 6916,6926 ----
> >>> >       }
> >>> >     same = NULL_TREE;
> >>> >
> >>> > !   /* Prefer factoring a common non-constant.  */
> >>> > !   if (operand_equal_p (arg00, arg10, 0))
> >>> >       same = arg00, alt0 = arg01, alt1 = arg11;
> >>> > +   else if (operand_equal_p (arg01, arg11, 0))
> >>> > +     same = arg01, alt0 = arg00, alt1 = arg10;
> >>> >     else if (operand_equal_p (arg00, arg11, 0))
> >>> >       same = arg00, alt0 = arg01, alt1 = arg10;
> >>> >     else if (operand_equal_p (arg01, arg10, 0))
> >>> > *************** fold_plusminus_mult_expr (location_t loc
> >>> > *** 6974,6987 ****
> >>> >    }
> >>> >       }
> >>> >
> >>> > !   if (same)
> >>> >       return fold_build2_loc (loc, MULT_EXPR, type,
> >>> >                    fold_build2_loc (loc, code, type,
> >>> >                                 fold_convert_loc (loc, type, alt0),
> >>> >                                 fold_convert_loc (loc, type, alt1)),
> >>> >                    fold_convert_loc (loc, type, same));
> >>> >
> >>> > !   return NULL_TREE;
> >>> >   }
> >>> >
> >>> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
> >>> > --- 6975,7010 ----
> >>> >    }
> >>> >       }
> >>> >
> >>> > !   if (!same)
> >>> > !     return NULL_TREE;
> >>> > !
> >>> > !   if (! INTEGRAL_TYPE_P (type)
> >>> > !       || TYPE_OVERFLOW_WRAPS (type)
> >>> > !       /* We are neither factoring zero nor minus one.  */
> >>> > !       || TREE_CODE (same) == INTEGER_CST)
> >>> >       return fold_build2_loc (loc, MULT_EXPR, type,
> >>> >                    fold_build2_loc (loc, code, type,
> >>> >                                 fold_convert_loc (loc, type, alt0),
> >>> >                                 fold_convert_loc (loc, type, alt1)),
> >>> >                    fold_convert_loc (loc, type, same));
> >>> >
> >>> > !   /* Same may be zero and thus the operation 'code' may overflow.  Likewise
> >>> > !      same may be minus one and thus the multiplication may overflow.  Perform
> >>> > !      the operations in an unsigned type.  */
> >>> > !   tree utype = unsigned_type_for (type);
> >>> > !   tree tem = fold_build2_loc (loc, code, utype,
> >>> > !                        fold_convert_loc (loc, utype, alt0),
> >>> > !                        fold_convert_loc (loc, utype, alt1));
> >>> > !   /* If the sum evaluated to a constant that is not -INF the multiplication
> >>> > !      cannot overflow.  */
> >>> > !   if (TREE_CODE (tem) == INTEGER_CST
> >>> > !       && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
> >>> > !     return fold_build2_loc (loc, MULT_EXPR, type,
> >>> > !                      fold_convert (type, tem), same);
> >>> > !
> >>> > !   return fold_convert_loc (loc, type,
> >>> > !                     fold_build2_loc (loc, MULT_EXPR, utype, tem,
> >>> > !                                      fold_convert_loc (loc, utype, same)));
> >>> >   }
> >>> >
> >>> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
> >>>
> >>> Sorry if you already know, but this part means that we can no longer
> >>> vectorise:
> >>>
> >>> int
> >>> f (int *x, int b1, int b2, int b3)
> >>> {
> >>>   int foo = 0;
> >>>   for (int i1 = 0; i1 < b1; ++i1)
> >>>     for (int i2 = 0; i2 < b2; ++i2)
> >>>       for (int i3 = 0; i3 < b3; ++i3)
> >>>         foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
> >>>   return foo;
> >>> }
> >>>
> >>> We now convert all the arithmetic in the [...] to unsigned int
> >>> and reassociate it so that the "- 1" is applied last.  We then assume
> >>> that the overflow in this subtraction is well-defined and that the
> >>> &x[...] might not be linear for the inner loop.
> >>
> >> Can you file a PR?
> >
> > OK, filed as PR81082.
> >
> >>   # i3_44 = PHI <i3_32(6), 0(4)>
> >>   i3.2_7 = (unsigned int) i3_44;
> >>   _9 = i3.2_7 + _34;
> >>   _10 = (int) _9;
> >>   _11 = (long unsigned int) _10;
> >>   _12 = _11 * 4;
> >>   _13 = x_30(D) + _12;
> >>   _14 = *_13;
> >> ...
> >>    i3_32 = i3_44 + 1;
> >>
> >> so we have *(x_30(D) + 4 * ((sizetype)(int)((unsigned){ 0, + , 1} + X))
> >>
> >> It might mean that we need to avoid this re-association.  Not sure how
> >> to detect worthwhile vs. not worthwhile cases during folding.  At least
> >> I see no easy way to recover from it in SCEV analysis unless the
> >> number of iterations is constrained more than in your example.
> >
> > Yeah, in the example that this was reduced from, not reassociating
> > is far more preferable to changing the types.  But like you say,
> > I've no idea how we'd know that at this stage.
> Looks like it has become a general problem.  IIRC, loop-im also hoists
> signed computation out of loop into unsigned computations.

Yes, it's the trade off of having signed ops invoke undefined behavior
and not spuriously introducing this ourselves by 1) not doing anything
2) being careful and re-writing to arith not involving undefined behavior.

Richard.

> Thanks,
> bin
> >
> > Thanks,
> > Richard
> >
> >> Ideas welcome ;)
> >>
> >> Thanks,
> >> Richard.
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2017-06-13 11:48       ` Richard Biener
@ 2017-06-13 11:56         ` Bin.Cheng
  2017-06-13 12:03           ` Richard Biener
  0 siblings, 1 reply; 13+ messages in thread
From: Bin.Cheng @ 2017-06-13 11:56 UTC (permalink / raw)
  To: Richard Biener; +Cc: Richard Sandiford, gcc-patches List

On Tue, Jun 13, 2017 at 12:48 PM, Richard Biener <rguenther@suse.de> wrote:
> On Tue, 13 Jun 2017, Richard Sandiford wrote:
>
>> Richard Biener <rguenther@suse.de> writes:
>> > On Tue, 13 Jun 2017, Richard Sandiford wrote:
>> >> Richard Biener <rguenther@suse.de> writes:
>> >> > So I've come back to PR66313 and found a solution to the tailrecursion
>> >> > missed optimization when fixing the factoring folding to use an unsigned
>> >> > type when we're not sure of overflow.
>> >> >
>> >> > The folding part is identical to my last try from 2015, the tailrecursion
>> >> > part makes us handle intermittent stmts that were introduced by foldings
>> >> > that "clobber" our quest walking the single-use chain of stmts between
>> >> > the call and the return (and failing at all stmts that are not part
>> >> > of said chain).  A simple solution is to move the stmts that are not
>> >> > part of the chain and that we can move before the call.  That handles
>> >> > the leaf conversions that now appear for tree-ssa/tailrecursion-6.c
>> >> >
>> >> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>> >> >
>> >> > Richard.
>> >> >
>> >> > 2017-05-31  Richard Biener  <rguenther@suse.de>
>> >> >
>> >> >  PR middle-end/66313
>> >> >  * fold-const.c (fold_plusminus_mult_expr): If the factored
>> >> >  factor may be zero use a wrapping type for the inner operation.
>> >> >  * tree-tailcall.c (independent_of_stmt_p): Pass in to_move bitmap
>> >> >  and handle moved defs.
>> >> >  (process_assignment): Properly guard the unary op case.  Return a
>> >> >  tri-state indicating that moving the stmt before the call may allow
>> >> >  to continue.  Pass through to_move.
>> >> >  (find_tail_calls): Handle moving unrelated defs before
>> >> >  the call.
>> >> >
>> >> >  * c-c++-common/ubsan/pr66313.c: New testcase.
>> >> >  * gcc.dg/tree-ssa/loop-15.c: Adjust.
>> >> >
>> >> > Index: gcc/fold-const.c
>> >> > ===================================================================
>> >> > *** gcc/fold-const.c.orig        2015-10-29 12:32:33.302782318 +0100
>> >> > --- gcc/fold-const.c     2015-10-29 14:08:39.936497739 +0100
>> >> > *************** fold_plusminus_mult_expr (location_t loc
>> >> > *** 6916,6925 ****
>> >> >       }
>> >> >     same = NULL_TREE;
>> >> >
>> >> > !   if (operand_equal_p (arg01, arg11, 0))
>> >> > !     same = arg01, alt0 = arg00, alt1 = arg10;
>> >> > !   else if (operand_equal_p (arg00, arg10, 0))
>> >> >       same = arg00, alt0 = arg01, alt1 = arg11;
>> >> >     else if (operand_equal_p (arg00, arg11, 0))
>> >> >       same = arg00, alt0 = arg01, alt1 = arg10;
>> >> >     else if (operand_equal_p (arg01, arg10, 0))
>> >> > --- 6916,6926 ----
>> >> >       }
>> >> >     same = NULL_TREE;
>> >> >
>> >> > !   /* Prefer factoring a common non-constant.  */
>> >> > !   if (operand_equal_p (arg00, arg10, 0))
>> >> >       same = arg00, alt0 = arg01, alt1 = arg11;
>> >> > +   else if (operand_equal_p (arg01, arg11, 0))
>> >> > +     same = arg01, alt0 = arg00, alt1 = arg10;
>> >> >     else if (operand_equal_p (arg00, arg11, 0))
>> >> >       same = arg00, alt0 = arg01, alt1 = arg10;
>> >> >     else if (operand_equal_p (arg01, arg10, 0))
>> >> > *************** fold_plusminus_mult_expr (location_t loc
>> >> > *** 6974,6987 ****
>> >> >          }
>> >> >       }
>> >> >
>> >> > !   if (same)
>> >> >       return fold_build2_loc (loc, MULT_EXPR, type,
>> >> >                          fold_build2_loc (loc, code, type,
>> >> >                                       fold_convert_loc (loc, type, alt0),
>> >> >                                       fold_convert_loc (loc, type, alt1)),
>> >> >                          fold_convert_loc (loc, type, same));
>> >> >
>> >> > !   return NULL_TREE;
>> >> >   }
>> >> >
>> >> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
>> >> > --- 6975,7010 ----
>> >> >          }
>> >> >       }
>> >> >
>> >> > !   if (!same)
>> >> > !     return NULL_TREE;
>> >> > !
>> >> > !   if (! INTEGRAL_TYPE_P (type)
>> >> > !       || TYPE_OVERFLOW_WRAPS (type)
>> >> > !       /* We are neither factoring zero nor minus one.  */
>> >> > !       || TREE_CODE (same) == INTEGER_CST)
>> >> >       return fold_build2_loc (loc, MULT_EXPR, type,
>> >> >                          fold_build2_loc (loc, code, type,
>> >> >                                       fold_convert_loc (loc, type, alt0),
>> >> >                                       fold_convert_loc (loc, type, alt1)),
>> >> >                          fold_convert_loc (loc, type, same));
>> >> >
>> >> > !   /* Same may be zero and thus the operation 'code' may overflow.  Likewise
>> >> > !      same may be minus one and thus the multiplication may overflow.  Perform
>> >> > !      the operations in an unsigned type.  */
>> >> > !   tree utype = unsigned_type_for (type);
>> >> > !   tree tem = fold_build2_loc (loc, code, utype,
>> >> > !                              fold_convert_loc (loc, utype, alt0),
>> >> > !                              fold_convert_loc (loc, utype, alt1));
>> >> > !   /* If the sum evaluated to a constant that is not -INF the multiplication
>> >> > !      cannot overflow.  */
>> >> > !   if (TREE_CODE (tem) == INTEGER_CST
>> >> > !       && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
>> >> > !     return fold_build2_loc (loc, MULT_EXPR, type,
>> >> > !                            fold_convert (type, tem), same);
>> >> > !
>> >> > !   return fold_convert_loc (loc, type,
>> >> > !                           fold_build2_loc (loc, MULT_EXPR, utype, tem,
>> >> > !                                            fold_convert_loc (loc, utype, same)));
>> >> >   }
>> >> >
>> >> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
>> >>
>> >> Sorry if you already know, but this part means that we can no longer
>> >> vectorise:
>> >>
>> >> int
>> >> f (int *x, int b1, int b2, int b3)
>> >> {
>> >>   int foo = 0;
>> >>   for (int i1 = 0; i1 < b1; ++i1)
>> >>     for (int i2 = 0; i2 < b2; ++i2)
>> >>       for (int i3 = 0; i3 < b3; ++i3)
>> >>         foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
>> >>   return foo;
>> >> }
>> >>
>> >> We now convert all the arithmetic in the [...] to unsigned int
>> >> and reassociate it so that the "- 1" is applied last.  We then assume
>> >> that the overflow in this subtraction is well-defined and that the
>> >> &x[...] might not be linear for the inner loop.
>> >
>> > Can you file a PR?
>>
>> OK, filed as PR81082.
>>
>> >   # i3_44 = PHI <i3_32(6), 0(4)>
>> >   i3.2_7 = (unsigned int) i3_44;
>> >   _9 = i3.2_7 + _34;
>> >   _10 = (int) _9;
>> >   _11 = (long unsigned int) _10;
>> >   _12 = _11 * 4;
>> >   _13 = x_30(D) + _12;
>> >   _14 = *_13;
>> > ...
>> >    i3_32 = i3_44 + 1;
>> >
>> > so we have *(x_30(D) + 4 * ((sizetype)(int)((unsigned){ 0, + , 1} + X))
>> >
>> > It might mean that we need to avoid this re-association.  Not sure how
>> > to detect worthwhile vs. not worthwhile cases during folding.  At least
>> > I see no easy way to recover from it in SCEV analysis unless the
>> > number of iterations is constrained more than in your example.
>>
>> Yeah, in the example that this was reduced from, not reassociating
>> is far more preferable to changing the types.  But like you say,
>> I've no idea how we'd know that at this stage.
>
> In the past I played with not obfuscating things during FE lowering
> so early, namely not expose the * 4 but keep the original types of
> array / pointer indexes.  On the original mem-ref branch I had
> a IDX_EXPR that allowed to chain those (a widen-multiply-accumulate
> op).
>
> That said, the context where this association is not interesting is
> address context and the multiplication to the byte offset.
>
> Of course in your case there's also b3 factored out which looks
> like a generally interesting transform (but eventually harmful in address
> context again).
>
> From the FE we get
>
> *(x + (sizetype) ((long unsigned int) (((i1 * b2) * b3 + i2 * b3) + (i3 +
> -1)) * 4))
>
> and SCEV uses the fact that the mults/adds have undefined behavior
> on overflow.  The same missed optimization occurs if you make all those
> vars unsigned (with or without the folding in question):
But with unsigned type it's not missed optimization because
computation could overflow and result in non-scev address.  In this
case the loop needs to be versioned under overflow/non-overflow to be
parallelized/vectorized, right?  Or split for cases we can easily
infer boundary condition of overflow.

Thanks,
bin
>
> #define U unsigned
> int
> f (int *x, U int b1, U int b2, U int b3)
> {
>   int foo = 0;
>   for (U int i1 = 0; i1 < b1; ++i1)
>     for (U int i2 = 0; i2 < b2; ++i2)
>       for (U int i3 = 0; i3 < b3; ++i3)
>         foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
>   return foo;
> }
>
> It works again for unsigned long as there can be no valid object
> where the computation could wrap around.
>
> Richard.
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2017-06-13 11:56         ` Bin.Cheng
@ 2017-06-13 12:03           ` Richard Biener
  0 siblings, 0 replies; 13+ messages in thread
From: Richard Biener @ 2017-06-13 12:03 UTC (permalink / raw)
  To: Bin.Cheng; +Cc: Richard Sandiford, gcc-patches List

On Tue, 13 Jun 2017, Bin.Cheng wrote:

> On Tue, Jun 13, 2017 at 12:48 PM, Richard Biener <rguenther@suse.de> wrote:
> > On Tue, 13 Jun 2017, Richard Sandiford wrote:
> >
> >> Richard Biener <rguenther@suse.de> writes:
> >> > On Tue, 13 Jun 2017, Richard Sandiford wrote:
> >> >> Richard Biener <rguenther@suse.de> writes:
> >> >> > So I've come back to PR66313 and found a solution to the tailrecursion
> >> >> > missed optimization when fixing the factoring folding to use an unsigned
> >> >> > type when we're not sure of overflow.
> >> >> >
> >> >> > The folding part is identical to my last try from 2015, the tailrecursion
> >> >> > part makes us handle intermittent stmts that were introduced by foldings
> >> >> > that "clobber" our quest walking the single-use chain of stmts between
> >> >> > the call and the return (and failing at all stmts that are not part
> >> >> > of said chain).  A simple solution is to move the stmts that are not
> >> >> > part of the chain and that we can move before the call.  That handles
> >> >> > the leaf conversions that now appear for tree-ssa/tailrecursion-6.c
> >> >> >
> >> >> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> >> >> >
> >> >> > Richard.
> >> >> >
> >> >> > 2017-05-31  Richard Biener  <rguenther@suse.de>
> >> >> >
> >> >> >  PR middle-end/66313
> >> >> >  * fold-const.c (fold_plusminus_mult_expr): If the factored
> >> >> >  factor may be zero use a wrapping type for the inner operation.
> >> >> >  * tree-tailcall.c (independent_of_stmt_p): Pass in to_move bitmap
> >> >> >  and handle moved defs.
> >> >> >  (process_assignment): Properly guard the unary op case.  Return a
> >> >> >  tri-state indicating that moving the stmt before the call may allow
> >> >> >  to continue.  Pass through to_move.
> >> >> >  (find_tail_calls): Handle moving unrelated defs before
> >> >> >  the call.
> >> >> >
> >> >> >  * c-c++-common/ubsan/pr66313.c: New testcase.
> >> >> >  * gcc.dg/tree-ssa/loop-15.c: Adjust.
> >> >> >
> >> >> > Index: gcc/fold-const.c
> >> >> > ===================================================================
> >> >> > *** gcc/fold-const.c.orig        2015-10-29 12:32:33.302782318 +0100
> >> >> > --- gcc/fold-const.c     2015-10-29 14:08:39.936497739 +0100
> >> >> > *************** fold_plusminus_mult_expr (location_t loc
> >> >> > *** 6916,6925 ****
> >> >> >       }
> >> >> >     same = NULL_TREE;
> >> >> >
> >> >> > !   if (operand_equal_p (arg01, arg11, 0))
> >> >> > !     same = arg01, alt0 = arg00, alt1 = arg10;
> >> >> > !   else if (operand_equal_p (arg00, arg10, 0))
> >> >> >       same = arg00, alt0 = arg01, alt1 = arg11;
> >> >> >     else if (operand_equal_p (arg00, arg11, 0))
> >> >> >       same = arg00, alt0 = arg01, alt1 = arg10;
> >> >> >     else if (operand_equal_p (arg01, arg10, 0))
> >> >> > --- 6916,6926 ----
> >> >> >       }
> >> >> >     same = NULL_TREE;
> >> >> >
> >> >> > !   /* Prefer factoring a common non-constant.  */
> >> >> > !   if (operand_equal_p (arg00, arg10, 0))
> >> >> >       same = arg00, alt0 = arg01, alt1 = arg11;
> >> >> > +   else if (operand_equal_p (arg01, arg11, 0))
> >> >> > +     same = arg01, alt0 = arg00, alt1 = arg10;
> >> >> >     else if (operand_equal_p (arg00, arg11, 0))
> >> >> >       same = arg00, alt0 = arg01, alt1 = arg10;
> >> >> >     else if (operand_equal_p (arg01, arg10, 0))
> >> >> > *************** fold_plusminus_mult_expr (location_t loc
> >> >> > *** 6974,6987 ****
> >> >> >          }
> >> >> >       }
> >> >> >
> >> >> > !   if (same)
> >> >> >       return fold_build2_loc (loc, MULT_EXPR, type,
> >> >> >                          fold_build2_loc (loc, code, type,
> >> >> >                                       fold_convert_loc (loc, type, alt0),
> >> >> >                                       fold_convert_loc (loc, type, alt1)),
> >> >> >                          fold_convert_loc (loc, type, same));
> >> >> >
> >> >> > !   return NULL_TREE;
> >> >> >   }
> >> >> >
> >> >> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
> >> >> > --- 6975,7010 ----
> >> >> >          }
> >> >> >       }
> >> >> >
> >> >> > !   if (!same)
> >> >> > !     return NULL_TREE;
> >> >> > !
> >> >> > !   if (! INTEGRAL_TYPE_P (type)
> >> >> > !       || TYPE_OVERFLOW_WRAPS (type)
> >> >> > !       /* We are neither factoring zero nor minus one.  */
> >> >> > !       || TREE_CODE (same) == INTEGER_CST)
> >> >> >       return fold_build2_loc (loc, MULT_EXPR, type,
> >> >> >                          fold_build2_loc (loc, code, type,
> >> >> >                                       fold_convert_loc (loc, type, alt0),
> >> >> >                                       fold_convert_loc (loc, type, alt1)),
> >> >> >                          fold_convert_loc (loc, type, same));
> >> >> >
> >> >> > !   /* Same may be zero and thus the operation 'code' may overflow.  Likewise
> >> >> > !      same may be minus one and thus the multiplication may overflow.  Perform
> >> >> > !      the operations in an unsigned type.  */
> >> >> > !   tree utype = unsigned_type_for (type);
> >> >> > !   tree tem = fold_build2_loc (loc, code, utype,
> >> >> > !                              fold_convert_loc (loc, utype, alt0),
> >> >> > !                              fold_convert_loc (loc, utype, alt1));
> >> >> > !   /* If the sum evaluated to a constant that is not -INF the multiplication
> >> >> > !      cannot overflow.  */
> >> >> > !   if (TREE_CODE (tem) == INTEGER_CST
> >> >> > !       && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
> >> >> > !     return fold_build2_loc (loc, MULT_EXPR, type,
> >> >> > !                            fold_convert (type, tem), same);
> >> >> > !
> >> >> > !   return fold_convert_loc (loc, type,
> >> >> > !                           fold_build2_loc (loc, MULT_EXPR, utype, tem,
> >> >> > !                                            fold_convert_loc (loc, utype, same)));
> >> >> >   }
> >> >> >
> >> >> >   /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
> >> >>
> >> >> Sorry if you already know, but this part means that we can no longer
> >> >> vectorise:
> >> >>
> >> >> int
> >> >> f (int *x, int b1, int b2, int b3)
> >> >> {
> >> >>   int foo = 0;
> >> >>   for (int i1 = 0; i1 < b1; ++i1)
> >> >>     for (int i2 = 0; i2 < b2; ++i2)
> >> >>       for (int i3 = 0; i3 < b3; ++i3)
> >> >>         foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
> >> >>   return foo;
> >> >> }
> >> >>
> >> >> We now convert all the arithmetic in the [...] to unsigned int
> >> >> and reassociate it so that the "- 1" is applied last.  We then assume
> >> >> that the overflow in this subtraction is well-defined and that the
> >> >> &x[...] might not be linear for the inner loop.
> >> >
> >> > Can you file a PR?
> >>
> >> OK, filed as PR81082.
> >>
> >> >   # i3_44 = PHI <i3_32(6), 0(4)>
> >> >   i3.2_7 = (unsigned int) i3_44;
> >> >   _9 = i3.2_7 + _34;
> >> >   _10 = (int) _9;
> >> >   _11 = (long unsigned int) _10;
> >> >   _12 = _11 * 4;
> >> >   _13 = x_30(D) + _12;
> >> >   _14 = *_13;
> >> > ...
> >> >    i3_32 = i3_44 + 1;
> >> >
> >> > so we have *(x_30(D) + 4 * ((sizetype)(int)((unsigned){ 0, + , 1} + X))
> >> >
> >> > It might mean that we need to avoid this re-association.  Not sure how
> >> > to detect worthwhile vs. not worthwhile cases during folding.  At least
> >> > I see no easy way to recover from it in SCEV analysis unless the
> >> > number of iterations is constrained more than in your example.
> >>
> >> Yeah, in the example that this was reduced from, not reassociating
> >> is far more preferable to changing the types.  But like you say,
> >> I've no idea how we'd know that at this stage.
> >
> > In the past I played with not obfuscating things during FE lowering
> > so early, namely not expose the * 4 but keep the original types of
> > array / pointer indexes.  On the original mem-ref branch I had
> > a IDX_EXPR that allowed to chain those (a widen-multiply-accumulate
> > op).
> >
> > That said, the context where this association is not interesting is
> > address context and the multiplication to the byte offset.
> >
> > Of course in your case there's also b3 factored out which looks
> > like a generally interesting transform (but eventually harmful in address
> > context again).
> >
> > From the FE we get
> >
> > *(x + (sizetype) ((long unsigned int) (((i1 * b2) * b3 + i2 * b3) + (i3 +
> > -1)) * 4))
> >
> > and SCEV uses the fact that the mults/adds have undefined behavior
> > on overflow.  The same missed optimization occurs if you make all those
> > vars unsigned (with or without the folding in question):
> But with unsigned type it's not missed optimization because
> computation could overflow and result in non-scev address.

Correct.

> In this
> case the loop needs to be versioned under overflow/non-overflow to be
> parallelized/vectorized, right?  Or split for cases we can easily
> infer boundary condition of overflow.

Yes.

Richard.
 
> Thanks,
> bin
> >
> > #define U unsigned
> > int
> > f (int *x, U int b1, U int b2, U int b3)
> > {
> >   int foo = 0;
> >   for (U int i1 = 0; i1 < b1; ++i1)
> >     for (U int i2 = 0; i2 < b2; ++i2)
> >       for (U int i3 = 0; i3 < b3; ++i3)
> >         foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
> >   return foo;
> > }
> >
> > It works again for unsigned long as there can be no valid object
> > where the computation could wrap around.
> >
> > Richard.
> >
> > --
> > Richard Biener <rguenther@suse.de>
> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2015-10-29  9:49   ` Richard Biener
@ 2015-10-29 18:36     ` Joseph Myers
  0 siblings, 0 replies; 13+ messages in thread
From: Joseph Myers @ 2015-10-29 18:36 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Thu, 29 Oct 2015, Richard Biener wrote:

> We still regress gcc.dg/tree-ssa/tailrecursion-6.c with it though.
> There we have
> 
> int
> foo (int a)
> {
>         if (a)
>                 return a * (2 * (foo (a - 1))) + a + 1;
>         else
>                 return 0;
> }
> 
> and tailrecursion detection requires the expression to be
> in the form (2 * (foo (a - 1)) + 1) * a + 1 but folding
> can't see that (2 * (foo (a - 1)) + 1) might not be INT_MIN
> when a is -1.  Well, I can't trivially either, maybe its
> just because of the recursion or maybe it's because here
> we are sure that C in C * A is odd (2 * sth + 1) or
> simply because we know that C in (B + C)*A is positive
> and non-zero?
> 
> But I guess for the isolated view of the
> a * (2 * X) + a expression we can't factor it using signed
> arithmetic because of the a == 0 case as then the sum
> might trivially overflow (of course here a is not zero
> because of the guard...)

In that isolated view, a transformation a * (2 * X) + a -> 
a * (2 * X + 1) is always safe (because Y + 1 only overflows when Y is 
INT_MAX, which is odd, so adding 1 to 2 * X cannot overflow).  That could 
be generalized to cover a * (C * X) + a * D (for C constant, D positive 
constant) if (INT_MAX % C) >= D, and similarly for negative constants.  
I've no idea if such transformations are useful outside this test.

[Safe = not introducing overflows, but might lose them which is a 
consideration for sanitization.]

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2015-10-27 17:38 ` Joseph Myers
@ 2015-10-29  9:49   ` Richard Biener
  2015-10-29 18:36     ` Joseph Myers
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2015-10-29  9:49 UTC (permalink / raw)
  To: Joseph Myers; +Cc: gcc-patches

On Tue, 27 Oct 2015, Joseph Myers wrote:

> On Tue, 27 Oct 2015, Richard Biener wrote:
> 
> > When factoring a*b + a*c to (b + c)*a we have to guard against the
> > case of a == 0 as after the factoring b + c might overflow in that
> > case.  Fixed by doing the addition in an unsigned type if required.
> 
> The same applies to a == -1 (consider b and c equal to -(INT_MIN/2), when 
> a*b + a*c is INT_MIN but b+c overflows).  In that case, if you avoid b+c 
> overflowing using an unsigned type, but convert back to signed for the 
> multiplication, you get a spurious overflow from the multiplication 
> instead.

Indeed.  So the following is as good as I can get it.

We still regress gcc.dg/tree-ssa/tailrecursion-6.c with it though.
There we have

int
foo (int a)
{
        if (a)
                return a * (2 * (foo (a - 1))) + a + 1;
        else
                return 0;
}

and tailrecursion detection requires the expression to be
in the form (2 * (foo (a - 1)) + 1) * a + 1 but folding
can't see that (2 * (foo (a - 1)) + 1) might not be INT_MIN
when a is -1.  Well, I can't trivially either, maybe its
just because of the recursion or maybe it's because here
we are sure that C in C * A is odd (2 * sth + 1) or
simply because we know that C in (B + C)*A is positive
and non-zero?

But I guess for the isolated view of the
a * (2 * X) + a expression we can't factor it using signed
arithmetic because of the a == 0 case as then the sum
might trivially overflow (of course here a is not zero
because of the guard...)

Bootstrap / regtest in progress on x86_64-unknown-linux-gnu.

More hole-punching appreciated.  Meanwhile I found PR68142...

Richard.

2015-10-27  Richard Biener  <rguenther@suse.de>

	PR middle-end/66313
	* fold-const.c (fold_plusminus_mult_expr): If the factored
	factor may be zero use a wrapping type for the inner operation.

	* c-c++-common/ubsan/pr66313.c: New testcase.

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c.orig	2015-10-29 09:27:42.942302445 +0100
+++ gcc/fold-const.c	2015-10-29 10:35:42.794378063 +0100
@@ -6916,10 +6916,11 @@ fold_plusminus_mult_expr (location_t loc
     }
   same = NULL_TREE;
 
-  if (operand_equal_p (arg01, arg11, 0))
-    same = arg01, alt0 = arg00, alt1 = arg10;
-  else if (operand_equal_p (arg00, arg10, 0))
+  /* Prefer factoring a common non-constant.  */
+  if (operand_equal_p (arg00, arg10, 0))
     same = arg00, alt0 = arg01, alt1 = arg11;
+  else if (operand_equal_p (arg01, arg11, 0))
+    same = arg01, alt0 = arg00, alt1 = arg10;
   else if (operand_equal_p (arg00, arg11, 0))
     same = arg00, alt0 = arg01, alt1 = arg10;
   else if (operand_equal_p (arg01, arg10, 0))
@@ -6974,14 +6975,36 @@ fold_plusminus_mult_expr (location_t loc
 	}
     }
 
-  if (same)
+  if (!same)
+    return NULL_TREE;
+
+  if (! INTEGRAL_TYPE_P (type)
+      || TYPE_OVERFLOW_WRAPS (type)
+      /* We are neither factoring zero nor minus one.  */
+      || TREE_CODE (same) == INTEGER_CST)
     return fold_build2_loc (loc, MULT_EXPR, type,
 			fold_build2_loc (loc, code, type,
 				     fold_convert_loc (loc, type, alt0),
 				     fold_convert_loc (loc, type, alt1)),
 			fold_convert_loc (loc, type, same));
 
-  return NULL_TREE;
+  /* Same may be zero and thus the operation 'code' may overflow.  Likewise
+     same may be minus one and thus the multiplication may overflow.  Perform
+     the operations in an unsigned type.  */
+  tree utype = unsigned_type_for (type);
+  tree tem = fold_build2_loc (loc, code, utype,
+			      fold_convert_loc (loc, utype, alt0),
+			      fold_convert_loc (loc, utype, alt1));
+  /* If the sum evaluated to a constant that is not -INF the multiplication
+     cannot overflow.  */
+  if (TREE_CODE (tem) == INTEGER_CST
+      && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
+    return fold_build2_loc (loc, MULT_EXPR, type,
+			    fold_convert (type, tem), same);
+
+  return fold_convert_loc (loc, type,
+			   fold_build2_loc (loc, MULT_EXPR, utype, tem,
+					    fold_convert_loc (loc, utype, same)));
 }
 
 /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
@@ -7835,6 +7858,7 @@ fold_unary_loc (location_t loc, enum tre
 
     case VIEW_CONVERT_EXPR:
       if (TREE_CODE (op0) == MEM_REF)
+	/* ???  Bogus for aligned types.  */
 	return fold_build2_loc (loc, MEM_REF, type,
 				TREE_OPERAND (op0, 0), TREE_OPERAND (op0, 1));
 
Index: gcc/testsuite/c-c++-common/ubsan/pr66313.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ gcc/testsuite/c-c++-common/ubsan/pr66313.c	2015-10-29 10:37:11.256355126 +0100
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-fsanitize=undefined -fsanitize-undefined-trap-on-error" } */
+
+int __attribute__((noinline,noclone))
+f(int a, int b, int c)
+{
+  return a * b + a * c;
+}
+int __attribute__((noinline,noclone))
+g(int a)
+{
+  return a * (__INT_MAX__/2) + a * (__INT_MAX__/2 + 2);
+}
+int __attribute__((noinline,noclone))
+h(int a, int b)
+{
+  return a * (__INT_MAX__/2 + 1) + b * (__INT_MAX__/2 + 1);
+}
+int main()
+{
+  volatile int tem = f(0, __INT_MAX__, __INT_MAX__);
+  tem = f(-1, __INT_MAX__/2 + 1, __INT_MAX__/2 + 1);
+  tem = g(-1);
+  tem = h(-1, -1);
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/loop-15.c.orig	2015-06-09 15:45:27.035343301 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/loop-15.c	2015-10-29 09:29:19.591367630 +0100
@@ -19,8 +19,8 @@ int bla(void)
 }
 
 /* Since the loop is removed, there should be no addition.  */
-/* { dg-final { scan-tree-dump-times "\\+" 0 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "n_. \\* n_." 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
 
 /* The if from the loop header copying remains in the code.  */
 /* { dg-final { scan-tree-dump-times "if " 1 "optimized" } } */

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix PR66313
  2015-10-27 14:28 Richard Biener
@ 2015-10-27 17:38 ` Joseph Myers
  2015-10-29  9:49   ` Richard Biener
  0 siblings, 1 reply; 13+ messages in thread
From: Joseph Myers @ 2015-10-27 17:38 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Tue, 27 Oct 2015, Richard Biener wrote:

> When factoring a*b + a*c to (b + c)*a we have to guard against the
> case of a == 0 as after the factoring b + c might overflow in that
> case.  Fixed by doing the addition in an unsigned type if required.

The same applies to a == -1 (consider b and c equal to -(INT_MIN/2), when 
a*b + a*c is INT_MIN but b+c overflows).  In that case, if you avoid b+c 
overflowing using an unsigned type, but convert back to signed for the 
multiplication, you get a spurious overflow from the multiplication 
instead.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] Fix PR66313
@ 2015-10-27 14:28 Richard Biener
  2015-10-27 17:38 ` Joseph Myers
  0 siblings, 1 reply; 13+ messages in thread
From: Richard Biener @ 2015-10-27 14:28 UTC (permalink / raw)
  To: gcc-patches


When factoring a*b + a*c to (b + c)*a we have to guard against the
case of a == 0 as after the factoring b + c might overflow in that
case.  Fixed by doing the addition in an unsigned type if required.

Bootstrap / regtest pending on x86_64-unknown-linux-gnu.

Richard.

2015-10-27  Richard Biener  <rguenther@suse.de>

	PR middle-end/66313
	* fold-const.c (fold_plusminus_mult_expr): If the factored
	factor may be zero use a wrapping type for the inner operation.

	* c-c++-common/ubsan/pr66313.c: New testcase.

Index: gcc/fold-const.c
===================================================================
*** gcc/fold-const.c	(revision 229404)
--- gcc/fold-const.c	(working copy)
*************** fold_plusminus_mult_expr (location_t loc
*** 6976,6989 ****
  	}
      }
  
!   if (same)
      return fold_build2_loc (loc, MULT_EXPR, type,
  			fold_build2_loc (loc, code, type,
  				     fold_convert_loc (loc, type, alt0),
  				     fold_convert_loc (loc, type, alt1)),
  			fold_convert_loc (loc, type, same));
  
!   return NULL_TREE;
  }
  
  /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
--- 6989,7016 ----
  	}
      }
  
!   if (!same)
!     return NULL_TREE;
! 
!   if (! INTEGRAL_TYPE_P (type)
!       || TYPE_OVERFLOW_WRAPS (type)
!       || TREE_CODE (same) == INTEGER_CST)
      return fold_build2_loc (loc, MULT_EXPR, type,
  			fold_build2_loc (loc, code, type,
  				     fold_convert_loc (loc, type, alt0),
  				     fold_convert_loc (loc, type, alt1)),
  			fold_convert_loc (loc, type, same));
  
!   /* Same may be zero and thus the operation 'code' may overflow.  Perform
!      the addition in an unsigned type.  */
!   tree utype = unsigned_type_for ( type);
!   return fold_build2_loc (loc, MULT_EXPR, type,
! 			  fold_convert_loc
! 			    (loc, type,
! 			     fold_build2_loc (loc, code, utype,
! 					  fold_convert_loc (loc, utype, alt0),
! 					  fold_convert_loc (loc, utype, alt1))),
! 			  fold_convert_loc (loc, type, same));
  }
  
  /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
Index: gcc/testsuite/c-c++-common/ubsan/pr66313.c
===================================================================
*** gcc/testsuite/c-c++-common/ubsan/pr66313.c	(revision 0)
--- gcc/testsuite/c-c++-common/ubsan/pr66313.c	(working copy)
***************
*** 0 ****
--- 1,12 ----
+ /* { dg-do run } */
+ /* { dg-options "-fsanitize=undefined -fsanitize-undefined-trap-on-error" } */
+ 
+ int __attribute__((noinline,noclone))
+ f(int a, int b, int c)
+ {
+   return a * b + a * c;
+ }
+ int main()
+ {
+   return f(0, __INT_MAX__, __INT_MAX__);
+ }

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-06-13 12:03 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-31 14:09 [PATCH] Fix PR66313 Richard Biener
2017-06-13  9:57 ` Richard Sandiford
2017-06-13 10:25   ` Richard Biener
2017-06-13 11:23     ` Richard Sandiford
2017-06-13 11:28       ` Bin.Cheng
2017-06-13 11:49         ` Richard Biener
2017-06-13 11:48       ` Richard Biener
2017-06-13 11:56         ` Bin.Cheng
2017-06-13 12:03           ` Richard Biener
  -- strict thread matches above, loose matches on Subject: below --
2015-10-27 14:28 Richard Biener
2015-10-27 17:38 ` Joseph Myers
2015-10-29  9:49   ` Richard Biener
2015-10-29 18:36     ` Joseph Myers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).