public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Add value range info for affine combination to improve store motion (PR83403)
@ 2020-04-28  6:17 Xionghu Luo
  2020-04-28  7:01 ` Richard Biener
  0 siblings, 1 reply; 9+ messages in thread
From: Xionghu Luo @ 2020-04-28  6:17 UTC (permalink / raw)
  To: gcc-patches
  Cc: segher, dje.gcc, wschmidt, guojiufu, linkw, rguenther, ook,
	amker.cheng, Xionghu Luo

From: Xionghu Luo <luoxhu@linux.ibm.com>

Get and propagate value range info to convert expressions with convert
operation on PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow.  i.e.:

(long unsigned int)((unsigned  int)n * 10 + 1)
=>
(long unsigned int)((unsigned  int) n * (long unsigned int)10 + (long unsigned int)1)

With this patch for affine combination, load/store motion could detect
more address refs independency and promote some memory expressions to
registers within loop.

PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.

Bootstrap and regression tested pass on Power8-LE.  Any comments?
Thanks.

gcc/ChangeLog

	2020-04-28  Xiong Hu Luo  <luoxhu@linux.ibm.com>

	PR tree-optimization/83403
	* tree-affine.c (aff_combination_convert): New parameter
	value_range.
	(expr_to_aff_combination): Use range info to check CONVERT
	expr on PLUS_EXPR/MINUS_EXPR/MULT_EXPR.
	(tree_to_aff_combination): Likewise.
	(aff_combination_expand): Get and propagate range info.
	* tree-affine.h: Include value-range.h.
	(aff_combination_convert): New parameter value_range.
	(tree_to_aff_combination): Likewise.
	(aff_combination_expand): Likewise.
---
 gcc/tree-affine.c | 66 +++++++++++++++++++++++++++--------------------
 gcc/tree-affine.h |  8 +++---
 2 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
index 0eb8db1b086..63f8acd4c73 100644
--- a/gcc/tree-affine.c
+++ b/gcc/tree-affine.c
@@ -220,7 +220,7 @@ aff_combination_add (aff_tree *comb1, aff_tree *comb2)
 /* Converts affine combination COMB to TYPE.  */
 
 void
-aff_combination_convert (aff_tree *comb, tree type)
+aff_combination_convert (aff_tree *comb, tree type, value_range *vr)
 {
   unsigned i, j;
   tree comb_type = comb->type;
@@ -228,7 +228,7 @@ aff_combination_convert (aff_tree *comb, tree type)
   if  (TYPE_PRECISION (type) > TYPE_PRECISION (comb_type))
     {
       tree val = fold_convert (type, aff_combination_to_tree (comb));
-      tree_to_aff_combination (val, type, comb);
+      tree_to_aff_combination (val, type, comb, vr);
       return;
     }
 
@@ -263,8 +263,8 @@ aff_combination_convert (aff_tree *comb, tree type)
    true when that was successful and returns the combination in COMB.  */
 
 static bool
-expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
-			 tree op0, tree op1 = NULL_TREE)
+expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, tree op0,
+			 tree op1 = NULL_TREE, value_range *vr = NULL)
 {
   aff_tree tmp;
   poly_int64 bitpos, bitsize, bytepos;
@@ -279,8 +279,8 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
 
     case PLUS_EXPR:
     case MINUS_EXPR:
-      tree_to_aff_combination (op0, type, comb);
-      tree_to_aff_combination (op1, type, &tmp);
+      tree_to_aff_combination (op0, type, comb, vr);
+      tree_to_aff_combination (op1, type, &tmp, vr);
       if (code == MINUS_EXPR)
 	aff_combination_scale (&tmp, -1);
       aff_combination_add (comb, &tmp);
@@ -289,7 +289,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
     case MULT_EXPR:
       if (TREE_CODE (op1) != INTEGER_CST)
 	break;
-      tree_to_aff_combination (op0, type, comb);
+      tree_to_aff_combination (op0, type, comb, vr);
       aff_combination_scale (comb, wi::to_widest (op1));
       return true;
 
@@ -340,27 +340,33 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
 		op1 = fold_convert (otype, op1);
 		return expr_to_aff_combination (comb, icode, otype, op0, op1);
 	      }
-	    wide_int minv, maxv;
+
 	    /* If inner type has wrapping overflow behavior, fold conversion
-	       for below case:
-		 (T1)(X - CST) -> (T1)X - (T1)CST
-	       if X - CST doesn't overflow by range information.  Also handle
-	       (T1)(X + CST) as (T1)(X - (-CST)).  */
-	    if (TYPE_UNSIGNED (itype)
+	       for below cases:
+		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
+	       when X *+- CST doesn't overflow by range information.  */
+	    if (vr && vr->kind () == VR_RANGE && TYPE_UNSIGNED (itype)
 		&& TYPE_OVERFLOW_WRAPS (itype)
-		&& TREE_CODE (op0) == SSA_NAME
-		&& TREE_CODE (op1) == INTEGER_CST
-		&& icode != MULT_EXPR
-		&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
+		&& TREE_CODE (op1) == INTEGER_CST)
 	      {
-		if (icode == PLUS_EXPR)
-		  op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
-		if (wi::geu_p (minv, wi::to_wide (op1)))
+		wide_int minv, maxv;
+		minv = wi::to_wide (vr->min ());
+		maxv = wi::to_wide (vr->max ());
+		if ((icode == MULT_EXPR || icode == PLUS_EXPR)
+		    && wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype))))
+		  {
+		    op0 = fold_convert (otype, op0);
+		    op1 = fold_convert (otype, op1);
+		    return expr_to_aff_combination (comb, icode, otype, op0,
+						    op1, vr);
+		  }
+		else if (icode == MINUS_EXPR
+			 && wi::geu_p (minv, wi::to_wide (op1)))
 		  {
 		    op0 = fold_convert (otype, op0);
 		    op1 = fold_convert (otype, op1);
 		    return expr_to_aff_combination (comb, MINUS_EXPR, otype,
-						    op0, op1);
+						    op0, op1, vr);
 		  }
 	      }
 	  }
@@ -376,7 +382,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
 /* Splits EXPR into an affine combination of parts.  */
 
 void
-tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
+tree_to_aff_combination (tree expr, tree type, aff_tree *comb, value_range *vr)
 {
   aff_tree tmp;
   enum tree_code code;
@@ -408,10 +414,10 @@ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
     CASE_CONVERT:
       /* ???  TREE_TYPE (expr) should be equal to type here, but IVOPTS
 	 calls this with not showing an outer widening cast.  */
-      if (expr_to_aff_combination (comb, code,
-				   TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
+      if (expr_to_aff_combination (comb, code, TREE_TYPE (expr),
+				   TREE_OPERAND (expr, 0), NULL_TREE, vr))
 	{
-	  aff_combination_convert (comb, type);
+	  aff_combination_convert (comb, type, vr);
 	  return;
 	}
       break;
@@ -709,7 +715,8 @@ public:
 
 void
 aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
-			hash_map<tree, name_expansion *> **cache)
+			hash_map<tree, name_expansion *> **cache,
+			value_range *vr)
 {
   unsigned i;
   aff_tree to_add, current, curre;
@@ -800,7 +807,10 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
 	  if (!*cache)
 	    *cache = new hash_map<tree, name_expansion *>;
 	  (*cache)->put (name, exp);
-	  aff_combination_expand (&current, cache);
+	  value_range vr = NULL;
+	  if (!POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (def))))
+	    get_range_info (gimple_assign_rhs1 (def), vr);
+	  aff_combination_expand (&current, cache, &vr);
 	  exp->expansion = current;
 	  exp->in_progress = 0;
 	}
@@ -812,7 +822,7 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
 	  current = exp->expansion;
 	}
       if (!useless_type_conversion_p (comb->type, current.type))
-	aff_combination_convert (&current, comb->type);
+	aff_combination_convert (&current, comb->type, vr);
 
       /* Accumulate the new terms to TO_ADD, so that we do not modify
 	 COMB while traversing it; include the term -coef * E, to remove
diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h
index 4ec4924879d..311e5e7c23a 100644
--- a/gcc/tree-affine.h
+++ b/gcc/tree-affine.h
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_TREE_AFFINE_H
 #define GCC_TREE_AFFINE_H
 
+#include "value-range.h"
 
 #define MAX_AFF_ELTS 8
 
@@ -73,13 +74,14 @@ void aff_combination_mult (aff_tree *, aff_tree *, aff_tree *);
 void aff_combination_add (aff_tree *, aff_tree *);
 void aff_combination_add_elt (aff_tree *, tree, const widest_int &);
 void aff_combination_remove_elt (aff_tree *, unsigned);
-void aff_combination_convert (aff_tree *, tree);
-void tree_to_aff_combination (tree, tree, aff_tree *);
+void aff_combination_convert (aff_tree *, tree, value_range *vr = NULL);
+void tree_to_aff_combination (tree, tree, aff_tree *, value_range *vr = NULL);
 tree aff_combination_to_tree (aff_tree *);
 void unshare_aff_combination (aff_tree *);
 bool aff_combination_constant_multiple_p (aff_tree *, aff_tree *,
 					  poly_widest_int *);
-void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **);
+void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **,
+			     value_range *vr = NULL);
 void tree_to_aff_combination_expand (tree, tree, aff_tree *,
 				     hash_map<tree, name_expansion *> **);
 tree get_inner_reference_aff (tree, aff_tree *, poly_widest_int *);
-- 
2.21.0.777.g83232e3864


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] Add value range info for affine combination to improve store motion (PR83403)
  2020-04-28  6:17 [PATCH] Add value range info for affine combination to improve store motion (PR83403) Xionghu Luo
@ 2020-04-28  7:01 ` Richard Biener
  2020-04-28 10:04   ` luoxhu
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Biener @ 2020-04-28  7:01 UTC (permalink / raw)
  To: Xionghu Luo
  Cc: gcc-patches, segher, dje.gcc, wschmidt, guojiufu, linkw, ook,
	amker.cheng, Xionghu Luo

On Tue, 28 Apr 2020, Xionghu Luo wrote:

> From: Xionghu Luo <luoxhu@linux.ibm.com>
> 
> Get and propagate value range info to convert expressions with convert
> operation on PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow.  i.e.:
> 
> (long unsigned int)((unsigned  int)n * 10 + 1)
> =>
> (long unsigned int)((unsigned  int) n * (long unsigned int)10 + (long unsigned int)1)
> 
> With this patch for affine combination, load/store motion could detect
> more address refs independency and promote some memory expressions to
> registers within loop.
> 
> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
> 
> Bootstrap and regression tested pass on Power8-LE.  Any comments?
> Thanks.

So the core of the patch is adding handling of MULT_EXPR and the rest
is a no-op?  It's not clear from the patch what passing down a
value-range does and your description doesn't say anything about this
either.

Richard.

> gcc/ChangeLog
> 
> 	2020-04-28  Xiong Hu Luo  <luoxhu@linux.ibm.com>
> 
> 	PR tree-optimization/83403
> 	* tree-affine.c (aff_combination_convert): New parameter
> 	value_range.
> 	(expr_to_aff_combination): Use range info to check CONVERT
> 	expr on PLUS_EXPR/MINUS_EXPR/MULT_EXPR.
> 	(tree_to_aff_combination): Likewise.
> 	(aff_combination_expand): Get and propagate range info.
> 	* tree-affine.h: Include value-range.h.
> 	(aff_combination_convert): New parameter value_range.
> 	(tree_to_aff_combination): Likewise.
> 	(aff_combination_expand): Likewise.
> ---
>  gcc/tree-affine.c | 66 +++++++++++++++++++++++++++--------------------
>  gcc/tree-affine.h |  8 +++---
>  2 files changed, 43 insertions(+), 31 deletions(-)
> 
> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
> index 0eb8db1b086..63f8acd4c73 100644
> --- a/gcc/tree-affine.c
> +++ b/gcc/tree-affine.c
> @@ -220,7 +220,7 @@ aff_combination_add (aff_tree *comb1, aff_tree *comb2)
>  /* Converts affine combination COMB to TYPE.  */
>  
>  void
> -aff_combination_convert (aff_tree *comb, tree type)
> +aff_combination_convert (aff_tree *comb, tree type, value_range *vr)
>  {
>    unsigned i, j;
>    tree comb_type = comb->type;
> @@ -228,7 +228,7 @@ aff_combination_convert (aff_tree *comb, tree type)
>    if  (TYPE_PRECISION (type) > TYPE_PRECISION (comb_type))
>      {
>        tree val = fold_convert (type, aff_combination_to_tree (comb));
> -      tree_to_aff_combination (val, type, comb);
> +      tree_to_aff_combination (val, type, comb, vr);
>        return;
>      }
>  
> @@ -263,8 +263,8 @@ aff_combination_convert (aff_tree *comb, tree type)
>     true when that was successful and returns the combination in COMB.  */
>  
>  static bool
> -expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> -			 tree op0, tree op1 = NULL_TREE)
> +expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, tree op0,
> +			 tree op1 = NULL_TREE, value_range *vr = NULL)
>  {
>    aff_tree tmp;
>    poly_int64 bitpos, bitsize, bytepos;
> @@ -279,8 +279,8 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>  
>      case PLUS_EXPR:
>      case MINUS_EXPR:
> -      tree_to_aff_combination (op0, type, comb);
> -      tree_to_aff_combination (op1, type, &tmp);
> +      tree_to_aff_combination (op0, type, comb, vr);
> +      tree_to_aff_combination (op1, type, &tmp, vr);
>        if (code == MINUS_EXPR)
>  	aff_combination_scale (&tmp, -1);
>        aff_combination_add (comb, &tmp);
> @@ -289,7 +289,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>      case MULT_EXPR:
>        if (TREE_CODE (op1) != INTEGER_CST)
>  	break;
> -      tree_to_aff_combination (op0, type, comb);
> +      tree_to_aff_combination (op0, type, comb, vr);
>        aff_combination_scale (comb, wi::to_widest (op1));
>        return true;
>  
> @@ -340,27 +340,33 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>  		op1 = fold_convert (otype, op1);
>  		return expr_to_aff_combination (comb, icode, otype, op0, op1);
>  	      }
> -	    wide_int minv, maxv;
> +
>  	    /* If inner type has wrapping overflow behavior, fold conversion
> -	       for below case:
> -		 (T1)(X - CST) -> (T1)X - (T1)CST
> -	       if X - CST doesn't overflow by range information.  Also handle
> -	       (T1)(X + CST) as (T1)(X - (-CST)).  */
> -	    if (TYPE_UNSIGNED (itype)
> +	       for below cases:
> +		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
> +	       when X *+- CST doesn't overflow by range information.  */
> +	    if (vr && vr->kind () == VR_RANGE && TYPE_UNSIGNED (itype)
>  		&& TYPE_OVERFLOW_WRAPS (itype)
> -		&& TREE_CODE (op0) == SSA_NAME
> -		&& TREE_CODE (op1) == INTEGER_CST
> -		&& icode != MULT_EXPR
> -		&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
> +		&& TREE_CODE (op1) == INTEGER_CST)
>  	      {
> -		if (icode == PLUS_EXPR)
> -		  op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
> -		if (wi::geu_p (minv, wi::to_wide (op1)))
> +		wide_int minv, maxv;
> +		minv = wi::to_wide (vr->min ());
> +		maxv = wi::to_wide (vr->max ());
> +		if ((icode == MULT_EXPR || icode == PLUS_EXPR)
> +		    && wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype))))
> +		  {
> +		    op0 = fold_convert (otype, op0);
> +		    op1 = fold_convert (otype, op1);
> +		    return expr_to_aff_combination (comb, icode, otype, op0,
> +						    op1, vr);
> +		  }
> +		else if (icode == MINUS_EXPR
> +			 && wi::geu_p (minv, wi::to_wide (op1)))
>  		  {
>  		    op0 = fold_convert (otype, op0);
>  		    op1 = fold_convert (otype, op1);
>  		    return expr_to_aff_combination (comb, MINUS_EXPR, otype,
> -						    op0, op1);
> +						    op0, op1, vr);
>  		  }
>  	      }
>  	  }
> @@ -376,7 +382,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>  /* Splits EXPR into an affine combination of parts.  */
>  
>  void
> -tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
> +tree_to_aff_combination (tree expr, tree type, aff_tree *comb, value_range *vr)
>  {
>    aff_tree tmp;
>    enum tree_code code;
> @@ -408,10 +414,10 @@ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
>      CASE_CONVERT:
>        /* ???  TREE_TYPE (expr) should be equal to type here, but IVOPTS
>  	 calls this with not showing an outer widening cast.  */
> -      if (expr_to_aff_combination (comb, code,
> -				   TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
> +      if (expr_to_aff_combination (comb, code, TREE_TYPE (expr),
> +				   TREE_OPERAND (expr, 0), NULL_TREE, vr))
>  	{
> -	  aff_combination_convert (comb, type);
> +	  aff_combination_convert (comb, type, vr);
>  	  return;
>  	}
>        break;
> @@ -709,7 +715,8 @@ public:
>  
>  void
>  aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
> -			hash_map<tree, name_expansion *> **cache)
> +			hash_map<tree, name_expansion *> **cache,
> +			value_range *vr)
>  {
>    unsigned i;
>    aff_tree to_add, current, curre;
> @@ -800,7 +807,10 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
>  	  if (!*cache)
>  	    *cache = new hash_map<tree, name_expansion *>;
>  	  (*cache)->put (name, exp);
> -	  aff_combination_expand (&current, cache);
> +	  value_range vr = NULL;
> +	  if (!POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (def))))
> +	    get_range_info (gimple_assign_rhs1 (def), vr);
> +	  aff_combination_expand (&current, cache, &vr);
>  	  exp->expansion = current;
>  	  exp->in_progress = 0;
>  	}
> @@ -812,7 +822,7 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
>  	  current = exp->expansion;
>  	}
>        if (!useless_type_conversion_p (comb->type, current.type))
> -	aff_combination_convert (&current, comb->type);
> +	aff_combination_convert (&current, comb->type, vr);
>  
>        /* Accumulate the new terms to TO_ADD, so that we do not modify
>  	 COMB while traversing it; include the term -coef * E, to remove
> diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h
> index 4ec4924879d..311e5e7c23a 100644
> --- a/gcc/tree-affine.h
> +++ b/gcc/tree-affine.h
> @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
>  #ifndef GCC_TREE_AFFINE_H
>  #define GCC_TREE_AFFINE_H
>  
> +#include "value-range.h"
>  
>  #define MAX_AFF_ELTS 8
>  
> @@ -73,13 +74,14 @@ void aff_combination_mult (aff_tree *, aff_tree *, aff_tree *);
>  void aff_combination_add (aff_tree *, aff_tree *);
>  void aff_combination_add_elt (aff_tree *, tree, const widest_int &);
>  void aff_combination_remove_elt (aff_tree *, unsigned);
> -void aff_combination_convert (aff_tree *, tree);
> -void tree_to_aff_combination (tree, tree, aff_tree *);
> +void aff_combination_convert (aff_tree *, tree, value_range *vr = NULL);
> +void tree_to_aff_combination (tree, tree, aff_tree *, value_range *vr = NULL);
>  tree aff_combination_to_tree (aff_tree *);
>  void unshare_aff_combination (aff_tree *);
>  bool aff_combination_constant_multiple_p (aff_tree *, aff_tree *,
>  					  poly_widest_int *);
> -void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **);
> +void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **,
> +			     value_range *vr = NULL);
>  void tree_to_aff_combination_expand (tree, tree, aff_tree *,
>  				     hash_map<tree, name_expansion *> **);
>  tree get_inner_reference_aff (tree, aff_tree *, poly_widest_int *);
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] Add value range info for affine combination to improve store motion (PR83403)
  2020-04-28  7:01 ` Richard Biener
@ 2020-04-28 10:04   ` luoxhu
  2020-04-28 10:30     ` Richard Biener
  0 siblings, 1 reply; 9+ messages in thread
From: luoxhu @ 2020-04-28 10:04 UTC (permalink / raw)
  To: Richard Biener, Xionghu Luo
  Cc: gcc-patches, segher, dje.gcc, wschmidt, guojiufu, linkw, ook,
	amker.cheng



On 2020/4/28 15:01, Richard Biener wrote:
> On Tue, 28 Apr 2020, Xionghu Luo wrote:
> 
>> From: Xionghu Luo <luoxhu@linux.ibm.com>
>>
>> Get and propagate value range info to convert expressions with convert
>> operation on PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow.  i.e.:
>>
>> (long unsigned int)((unsigned  int)n * 10 + 1)
>> =>
>> (long unsigned int)((unsigned  int) n * (long unsigned int)10 + (long unsigned int)1)
>>
>> With this patch for affine combination, load/store motion could detect
>> more address refs independency and promote some memory expressions to
>> registers within loop.
>>
>> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
>> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
>>
>> Bootstrap and regression tested pass on Power8-LE.  Any comments?
>> Thanks.
> 
> So the core of the patch is adding handling of MULT_EXPR and the rest
> is a no-op?  It's not clear from the patch what passing down a
> value-range does and your description doesn't say anything about this
> either.

This patch handles MULT_EXPR first, and also handles (long unsigned int)(n_93 * 10 + 1) as
"n_93*10" is not a ssa_name by using the value_range passed down, minv&maxv [1,81] is the 
range info of "n_93 * 10 +1", so wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype)) in
expr_to_aff_combination could ensure that no wrapping overflow of this converting.  Not sure
if it's better described now...  And some debug message as follows:

131t.lim2:
# n_93 = PHI <0(2), n_36(7)>
_118 = n_93 * 10;
_120 = (long unsigned int) _118;
_121 = _120 * 8;
_122 = &C[0][0] + _121;
*_122 = 0.0;
_128 = _118 + 1;
_129 = (long unsigned int) _128;
_130 = _129 * 8;
_131 = &C[0][0] + _130;
*_131 = 0.0;
_137 = _118 + 2;
_138 = (long unsigned int) _137;
_139 = _138 * 8;
_140 = &C[0][0] + _139;
*_140 = 0.0;


Breakpoint 28, expr_to_aff_combination (comb=0x7fffffffc068, code=NOP_EXPR, type=<integer_type 0x7ffff53c07
e0 long unsigned int>, op0=<plus_expr 0x7ffff56c0988>, op1=<tree 0x0>, vr=0x7fffffffc490) at ../../gcc-mast
er/gcc/tree-affine.c:350
92: itype = <integer_type 0x7ffff53c0690 unsigned int>
93: otype = <integer_type 0x7ffff53c07e0 long unsigned int>
94: icode = PLUS_EXPR
95: code = NOP_EXPR
(gdb) ptree op0
 <mult_expr 0x7ffff56c0960
    type <integer_type 0x7ffff53c0690 unsigned int sizes-gimplified public unsigned SI
        size <integer_cst 0x7ffff52f1200 constant 32>
        unit-size <integer_cst 0x7ffff52f1218 constant 4>
        align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff53c0690 precision:32 min <
integer_cst 0x7ffff52f1230 0> max <integer_cst 0x7ffff52f11e8 4294967295> context <translation_unit_decl 0x
7ffff5552850 pr83403.c>
        pointer_to_this <pointer_type 0x7ffff53c4c20>>

    arg:0 <ssa_name 0x7ffff5394a88 type <integer_type 0x7ffff53c0690 unsigned int>
        visited var <var_decl 0x7ffff72e2520 n>
        def_stmt n_93 = PHI <0(2), n_36(7)>
        version:93
        ptr-info 0x7ffff530b680>
    arg:1 <integer_cst 0x7ffff52feda8 type <integer_type 0x7ffff53c0690 unsigned int> constant 10>>
(gdb) ptree op1
 <integer_cst 0x7ffff52fed48 type <integer_type 0x7ffff53c0690 unsigned int> constant 1>
(gdb) p minv
$580 = {
  <wide_int_storage> = {
    val = {1, 0, 140737310886240},
    len = 1,
    precision = 32
  },
  members of generic_wide_int<wide_int_storage>:
  static is_sign_extended = true
}
(gdb) p maxv
$581 = {
  <wide_int_storage> = {
    val = {81, 65, 40},
    len = 1,
    precision = 32
  },
  members of generic_wide_int<wide_int_storage>:
  static is_sign_extended = true
}


Thanks,
Xionghu

> 
> Richard.
> 
>> gcc/ChangeLog
>>
>> 	2020-04-28  Xiong Hu Luo  <luoxhu@linux.ibm.com>
>>
>> 	PR tree-optimization/83403
>> 	* tree-affine.c (aff_combination_convert): New parameter
>> 	value_range.
>> 	(expr_to_aff_combination): Use range info to check CONVERT
>> 	expr on PLUS_EXPR/MINUS_EXPR/MULT_EXPR.
>> 	(tree_to_aff_combination): Likewise.
>> 	(aff_combination_expand): Get and propagate range info.
>> 	* tree-affine.h: Include value-range.h.
>> 	(aff_combination_convert): New parameter value_range.
>> 	(tree_to_aff_combination): Likewise.
>> 	(aff_combination_expand): Likewise.
>> ---
>>   gcc/tree-affine.c | 66 +++++++++++++++++++++++++++--------------------
>>   gcc/tree-affine.h |  8 +++---
>>   2 files changed, 43 insertions(+), 31 deletions(-)
>>
>> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
>> index 0eb8db1b086..63f8acd4c73 100644
>> --- a/gcc/tree-affine.c
>> +++ b/gcc/tree-affine.c
>> @@ -220,7 +220,7 @@ aff_combination_add (aff_tree *comb1, aff_tree *comb2)
>>   /* Converts affine combination COMB to TYPE.  */
>>   
>>   void
>> -aff_combination_convert (aff_tree *comb, tree type)
>> +aff_combination_convert (aff_tree *comb, tree type, value_range *vr)
>>   {
>>     unsigned i, j;
>>     tree comb_type = comb->type;
>> @@ -228,7 +228,7 @@ aff_combination_convert (aff_tree *comb, tree type)
>>     if  (TYPE_PRECISION (type) > TYPE_PRECISION (comb_type))
>>       {
>>         tree val = fold_convert (type, aff_combination_to_tree (comb));
>> -      tree_to_aff_combination (val, type, comb);
>> +      tree_to_aff_combination (val, type, comb, vr);
>>         return;
>>       }
>>   
>> @@ -263,8 +263,8 @@ aff_combination_convert (aff_tree *comb, tree type)
>>      true when that was successful and returns the combination in COMB.  */
>>   
>>   static bool
>> -expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>> -			 tree op0, tree op1 = NULL_TREE)
>> +expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, tree op0,
>> +			 tree op1 = NULL_TREE, value_range *vr = NULL)
>>   {
>>     aff_tree tmp;
>>     poly_int64 bitpos, bitsize, bytepos;
>> @@ -279,8 +279,8 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>>   
>>       case PLUS_EXPR:
>>       case MINUS_EXPR:
>> -      tree_to_aff_combination (op0, type, comb);
>> -      tree_to_aff_combination (op1, type, &tmp);
>> +      tree_to_aff_combination (op0, type, comb, vr);
>> +      tree_to_aff_combination (op1, type, &tmp, vr);
>>         if (code == MINUS_EXPR)
>>   	aff_combination_scale (&tmp, -1);
>>         aff_combination_add (comb, &tmp);
>> @@ -289,7 +289,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>>       case MULT_EXPR:
>>         if (TREE_CODE (op1) != INTEGER_CST)
>>   	break;
>> -      tree_to_aff_combination (op0, type, comb);
>> +      tree_to_aff_combination (op0, type, comb, vr);
>>         aff_combination_scale (comb, wi::to_widest (op1));
>>         return true;
>>   
>> @@ -340,27 +340,33 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>>   		op1 = fold_convert (otype, op1);
>>   		return expr_to_aff_combination (comb, icode, otype, op0, op1);
>>   	      }
>> -	    wide_int minv, maxv;
>> +
>>   	    /* If inner type has wrapping overflow behavior, fold conversion
>> -	       for below case:
>> -		 (T1)(X - CST) -> (T1)X - (T1)CST
>> -	       if X - CST doesn't overflow by range information.  Also handle
>> -	       (T1)(X + CST) as (T1)(X - (-CST)).  */
>> -	    if (TYPE_UNSIGNED (itype)
>> +	       for below cases:
>> +		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
>> +	       when X *+- CST doesn't overflow by range information.  */
>> +	    if (vr && vr->kind () == VR_RANGE && TYPE_UNSIGNED (itype)
>>   		&& TYPE_OVERFLOW_WRAPS (itype)
>> -		&& TREE_CODE (op0) == SSA_NAME
>> -		&& TREE_CODE (op1) == INTEGER_CST
>> -		&& icode != MULT_EXPR
>> -		&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
>> +		&& TREE_CODE (op1) == INTEGER_CST)
>>   	      {
>> -		if (icode == PLUS_EXPR)
>> -		  op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
>> -		if (wi::geu_p (minv, wi::to_wide (op1)))
>> +		wide_int minv, maxv;
>> +		minv = wi::to_wide (vr->min ());
>> +		maxv = wi::to_wide (vr->max ());
>> +		if ((icode == MULT_EXPR || icode == PLUS_EXPR)
>> +		    && wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype))))
>> +		  {
>> +		    op0 = fold_convert (otype, op0);
>> +		    op1 = fold_convert (otype, op1);
>> +		    return expr_to_aff_combination (comb, icode, otype, op0,
>> +						    op1, vr);
>> +		  }
>> +		else if (icode == MINUS_EXPR
>> +			 && wi::geu_p (minv, wi::to_wide (op1)))
>>   		  {
>>   		    op0 = fold_convert (otype, op0);
>>   		    op1 = fold_convert (otype, op1);
>>   		    return expr_to_aff_combination (comb, MINUS_EXPR, otype,
>> -						    op0, op1);
>> +						    op0, op1, vr);
>>   		  }
>>   	      }
>>   	  }
>> @@ -376,7 +382,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>>   /* Splits EXPR into an affine combination of parts.  */
>>   
>>   void
>> -tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
>> +tree_to_aff_combination (tree expr, tree type, aff_tree *comb, value_range *vr)
>>   {
>>     aff_tree tmp;
>>     enum tree_code code;
>> @@ -408,10 +414,10 @@ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
>>       CASE_CONVERT:
>>         /* ???  TREE_TYPE (expr) should be equal to type here, but IVOPTS
>>   	 calls this with not showing an outer widening cast.  */
>> -      if (expr_to_aff_combination (comb, code,
>> -				   TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
>> +      if (expr_to_aff_combination (comb, code, TREE_TYPE (expr),
>> +				   TREE_OPERAND (expr, 0), NULL_TREE, vr))
>>   	{
>> -	  aff_combination_convert (comb, type);
>> +	  aff_combination_convert (comb, type, vr);
>>   	  return;
>>   	}
>>         break;
>> @@ -709,7 +715,8 @@ public:
>>   
>>   void
>>   aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
>> -			hash_map<tree, name_expansion *> **cache)
>> +			hash_map<tree, name_expansion *> **cache,
>> +			value_range *vr)
>>   {
>>     unsigned i;
>>     aff_tree to_add, current, curre;
>> @@ -800,7 +807,10 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
>>   	  if (!*cache)
>>   	    *cache = new hash_map<tree, name_expansion *>;
>>   	  (*cache)->put (name, exp);
>> -	  aff_combination_expand (&current, cache);
>> +	  value_range vr = NULL;
>> +	  if (!POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (def))))
>> +	    get_range_info (gimple_assign_rhs1 (def), vr);
>> +	  aff_combination_expand (&current, cache, &vr);
>>   	  exp->expansion = current;
>>   	  exp->in_progress = 0;
>>   	}
>> @@ -812,7 +822,7 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
>>   	  current = exp->expansion;
>>   	}
>>         if (!useless_type_conversion_p (comb->type, current.type))
>> -	aff_combination_convert (&current, comb->type);
>> +	aff_combination_convert (&current, comb->type, vr);
>>   
>>         /* Accumulate the new terms to TO_ADD, so that we do not modify
>>   	 COMB while traversing it; include the term -coef * E, to remove
>> diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h
>> index 4ec4924879d..311e5e7c23a 100644
>> --- a/gcc/tree-affine.h
>> +++ b/gcc/tree-affine.h
>> @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
>>   #ifndef GCC_TREE_AFFINE_H
>>   #define GCC_TREE_AFFINE_H
>>   
>> +#include "value-range.h"
>>   
>>   #define MAX_AFF_ELTS 8
>>   
>> @@ -73,13 +74,14 @@ void aff_combination_mult (aff_tree *, aff_tree *, aff_tree *);
>>   void aff_combination_add (aff_tree *, aff_tree *);
>>   void aff_combination_add_elt (aff_tree *, tree, const widest_int &);
>>   void aff_combination_remove_elt (aff_tree *, unsigned);
>> -void aff_combination_convert (aff_tree *, tree);
>> -void tree_to_aff_combination (tree, tree, aff_tree *);
>> +void aff_combination_convert (aff_tree *, tree, value_range *vr = NULL);
>> +void tree_to_aff_combination (tree, tree, aff_tree *, value_range *vr = NULL);
>>   tree aff_combination_to_tree (aff_tree *);
>>   void unshare_aff_combination (aff_tree *);
>>   bool aff_combination_constant_multiple_p (aff_tree *, aff_tree *,
>>   					  poly_widest_int *);
>> -void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **);
>> +void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **,
>> +			     value_range *vr = NULL);
>>   void tree_to_aff_combination_expand (tree, tree, aff_tree *,
>>   				     hash_map<tree, name_expansion *> **);
>>   tree get_inner_reference_aff (tree, aff_tree *, poly_widest_int *);
>>
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] Add value range info for affine combination to improve store motion (PR83403)
  2020-04-28 10:04   ` luoxhu
@ 2020-04-28 10:30     ` Richard Biener
  2020-04-29  7:46       ` luoxhu
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Biener @ 2020-04-28 10:30 UTC (permalink / raw)
  To: luoxhu
  Cc: Xionghu Luo, gcc-patches, segher, dje.gcc, wschmidt, guojiufu,
	linkw, ook, amker.cheng

On Tue, 28 Apr 2020, luoxhu wrote:

> 
> 
> On 2020/4/28 15:01, Richard Biener wrote:
> > On Tue, 28 Apr 2020, Xionghu Luo wrote:
> > 
> >> From: Xionghu Luo <luoxhu@linux.ibm.com>
> >>
> >> Get and propagate value range info to convert expressions with convert
> >> operation on PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow.  i.e.:
> >>
> >> (long unsigned int)((unsigned  int)n * 10 + 1)
> >> =>
> >> (long unsigned int)((unsigned  int) n * (long unsigned int)10 + (long unsigned int)1)
> >>
> >> With this patch for affine combination, load/store motion could detect
> >> more address refs independency and promote some memory expressions to
> >> registers within loop.
> >>
> >> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
> >> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
> >>
> >> Bootstrap and regression tested pass on Power8-LE.  Any comments?
> >> Thanks.
> > 
> > So the core of the patch is adding handling of MULT_EXPR and the rest
> > is a no-op?  It's not clear from the patch what passing down a
> > value-range does and your description doesn't say anything about this
> > either.
> 
> This patch handles MULT_EXPR first, and also handles (long unsigned int)(n_93 * 10 + 1) as
> "n_93*10" is not a ssa_name by using the value_range passed down, minv&maxv [1,81] is the 
> range info of "n_93 * 10 +1", so wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype)) in
> expr_to_aff_combination could ensure that no wrapping overflow of this converting.  Not sure
> if it's better described now...  And some debug message as follows:
> 
> 131t.lim2:
> # n_93 = PHI <0(2), n_36(7)>
> _118 = n_93 * 10;
> _120 = (long unsigned int) _118;
> _121 = _120 * 8;
> _122 = &C[0][0] + _121;
> *_122 = 0.0;
> _128 = _118 + 1;
> _129 = (long unsigned int) _128;
> _130 = _129 * 8;
> _131 = &C[0][0] + _130;
> *_131 = 0.0;
> _137 = _118 + 2;
> _138 = (long unsigned int) _137;
> _139 = _138 * 8;
> _140 = &C[0][0] + _139;
> *_140 = 0.0;
> 
> 
> Breakpoint 28, expr_to_aff_combination (comb=0x7fffffffc068, code=NOP_EXPR, type=<integer_type 0x7ffff53c07
> e0 long unsigned int>, op0=<plus_expr 0x7ffff56c0988>, op1=<tree 0x0>, vr=0x7fffffffc490) at ../../gcc-mast
> er/gcc/tree-affine.c:350
> 92: itype = <integer_type 0x7ffff53c0690 unsigned int>
> 93: otype = <integer_type 0x7ffff53c07e0 long unsigned int>
> 94: icode = PLUS_EXPR
> 95: code = NOP_EXPR
> (gdb) ptree op0
>  <mult_expr 0x7ffff56c0960
>     type <integer_type 0x7ffff53c0690 unsigned int sizes-gimplified public unsigned SI
>         size <integer_cst 0x7ffff52f1200 constant 32>
>         unit-size <integer_cst 0x7ffff52f1218 constant 4>
>         align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff53c0690 precision:32 min <
> integer_cst 0x7ffff52f1230 0> max <integer_cst 0x7ffff52f11e8 4294967295> context <translation_unit_decl 0x
> 7ffff5552850 pr83403.c>
>         pointer_to_this <pointer_type 0x7ffff53c4c20>>
> 
>     arg:0 <ssa_name 0x7ffff5394a88 type <integer_type 0x7ffff53c0690 unsigned int>
>         visited var <var_decl 0x7ffff72e2520 n>
>         def_stmt n_93 = PHI <0(2), n_36(7)>
>         version:93
>         ptr-info 0x7ffff530b680>
>     arg:1 <integer_cst 0x7ffff52feda8 type <integer_type 0x7ffff53c0690 unsigned int> constant 10>>
> (gdb) ptree op1
>  <integer_cst 0x7ffff52fed48 type <integer_type 0x7ffff53c0690 unsigned int> constant 1>
> (gdb) p minv
> $580 = {
>   <wide_int_storage> = {
>     val = {1, 0, 140737310886240},
>     len = 1,
>     precision = 32
>   },
>   members of generic_wide_int<wide_int_storage>:
>   static is_sign_extended = true
> }
> (gdb) p maxv
> $581 = {
>   <wide_int_storage> = {
>     val = {81, 65, 40},
>     len = 1,
>     precision = 32
>   },
>   members of generic_wide_int<wide_int_storage>:
>   static is_sign_extended = true
> }

OK, I guess instead of get_range_info expr_to_aff_combination could
simply use determine_value_range (op0, &minv, &maxv) == VR_RANGE
(the && TREE_CODE (op0) == SSA_NAME check can then be removed)?

Thanks,
Richard.

> 
> Thanks,
> Xionghu
> 
> > 
> > Richard.
> > 
> >> gcc/ChangeLog
> >>
> >> 	2020-04-28  Xiong Hu Luo  <luoxhu@linux.ibm.com>
> >>
> >> 	PR tree-optimization/83403
> >> 	* tree-affine.c (aff_combination_convert): New parameter
> >> 	value_range.
> >> 	(expr_to_aff_combination): Use range info to check CONVERT
> >> 	expr on PLUS_EXPR/MINUS_EXPR/MULT_EXPR.
> >> 	(tree_to_aff_combination): Likewise.
> >> 	(aff_combination_expand): Get and propagate range info.
> >> 	* tree-affine.h: Include value-range.h.
> >> 	(aff_combination_convert): New parameter value_range.
> >> 	(tree_to_aff_combination): Likewise.
> >> 	(aff_combination_expand): Likewise.
> >> ---
> >>   gcc/tree-affine.c | 66 +++++++++++++++++++++++++++--------------------
> >>   gcc/tree-affine.h |  8 +++---
> >>   2 files changed, 43 insertions(+), 31 deletions(-)
> >>
> >> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
> >> index 0eb8db1b086..63f8acd4c73 100644
> >> --- a/gcc/tree-affine.c
> >> +++ b/gcc/tree-affine.c
> >> @@ -220,7 +220,7 @@ aff_combination_add (aff_tree *comb1, aff_tree *comb2)
> >>   /* Converts affine combination COMB to TYPE.  */
> >>   
> >>   void
> >> -aff_combination_convert (aff_tree *comb, tree type)
> >> +aff_combination_convert (aff_tree *comb, tree type, value_range *vr)
> >>   {
> >>     unsigned i, j;
> >>     tree comb_type = comb->type;
> >> @@ -228,7 +228,7 @@ aff_combination_convert (aff_tree *comb, tree type)
> >>     if  (TYPE_PRECISION (type) > TYPE_PRECISION (comb_type))
> >>       {
> >>         tree val = fold_convert (type, aff_combination_to_tree (comb));
> >> -      tree_to_aff_combination (val, type, comb);
> >> +      tree_to_aff_combination (val, type, comb, vr);
> >>         return;
> >>       }
> >>   
> >> @@ -263,8 +263,8 @@ aff_combination_convert (aff_tree *comb, tree type)
> >>      true when that was successful and returns the combination in COMB.  */
> >>   
> >>   static bool
> >> -expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> >> -			 tree op0, tree op1 = NULL_TREE)
> >> +expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, tree op0,
> >> +			 tree op1 = NULL_TREE, value_range *vr = NULL)
> >>   {
> >>     aff_tree tmp;
> >>     poly_int64 bitpos, bitsize, bytepos;
> >> @@ -279,8 +279,8 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> >>   
> >>       case PLUS_EXPR:
> >>       case MINUS_EXPR:
> >> -      tree_to_aff_combination (op0, type, comb);
> >> -      tree_to_aff_combination (op1, type, &tmp);
> >> +      tree_to_aff_combination (op0, type, comb, vr);
> >> +      tree_to_aff_combination (op1, type, &tmp, vr);
> >>         if (code == MINUS_EXPR)
> >>   	aff_combination_scale (&tmp, -1);
> >>         aff_combination_add (comb, &tmp);
> >> @@ -289,7 +289,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> >>       case MULT_EXPR:
> >>         if (TREE_CODE (op1) != INTEGER_CST)
> >>   	break;
> >> -      tree_to_aff_combination (op0, type, comb);
> >> +      tree_to_aff_combination (op0, type, comb, vr);
> >>         aff_combination_scale (comb, wi::to_widest (op1));
> >>         return true;
> >>   
> >> @@ -340,27 +340,33 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> >>   		op1 = fold_convert (otype, op1);
> >>   		return expr_to_aff_combination (comb, icode, otype, op0, op1);
> >>   	      }
> >> -	    wide_int minv, maxv;
> >> +
> >>   	    /* If inner type has wrapping overflow behavior, fold conversion
> >> -	       for below case:
> >> -		 (T1)(X - CST) -> (T1)X - (T1)CST
> >> -	       if X - CST doesn't overflow by range information.  Also handle
> >> -	       (T1)(X + CST) as (T1)(X - (-CST)).  */
> >> -	    if (TYPE_UNSIGNED (itype)
> >> +	       for below cases:
> >> +		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
> >> +	       when X *+- CST doesn't overflow by range information.  */
> >> +	    if (vr && vr->kind () == VR_RANGE && TYPE_UNSIGNED (itype)
> >>   		&& TYPE_OVERFLOW_WRAPS (itype)
> >> -		&& TREE_CODE (op0) == SSA_NAME
> >> -		&& TREE_CODE (op1) == INTEGER_CST
> >> -		&& icode != MULT_EXPR
> >> -		&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
> >> +		&& TREE_CODE (op1) == INTEGER_CST)
> >>   	      {
> >> -		if (icode == PLUS_EXPR)
> >> -		  op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
> >> -		if (wi::geu_p (minv, wi::to_wide (op1)))
> >> +		wide_int minv, maxv;
> >> +		minv = wi::to_wide (vr->min ());
> >> +		maxv = wi::to_wide (vr->max ());
> >> +		if ((icode == MULT_EXPR || icode == PLUS_EXPR)
> >> +		    && wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype))))
> >> +		  {
> >> +		    op0 = fold_convert (otype, op0);
> >> +		    op1 = fold_convert (otype, op1);
> >> +		    return expr_to_aff_combination (comb, icode, otype, op0,
> >> +						    op1, vr);
> >> +		  }
> >> +		else if (icode == MINUS_EXPR
> >> +			 && wi::geu_p (minv, wi::to_wide (op1)))
> >>   		  {
> >>   		    op0 = fold_convert (otype, op0);
> >>   		    op1 = fold_convert (otype, op1);
> >>   		    return expr_to_aff_combination (comb, MINUS_EXPR, otype,
> >> -						    op0, op1);
> >> +						    op0, op1, vr);
> >>   		  }
> >>   	      }
> >>   	  }
> >> @@ -376,7 +382,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> >>   /* Splits EXPR into an affine combination of parts.  */
> >>   
> >>   void
> >> -tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
> >> +tree_to_aff_combination (tree expr, tree type, aff_tree *comb, value_range *vr)
> >>   {
> >>     aff_tree tmp;
> >>     enum tree_code code;
> >> @@ -408,10 +414,10 @@ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
> >>       CASE_CONVERT:
> >>         /* ???  TREE_TYPE (expr) should be equal to type here, but IVOPTS
> >>   	 calls this with not showing an outer widening cast.  */
> >> -      if (expr_to_aff_combination (comb, code,
> >> -				   TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
> >> +      if (expr_to_aff_combination (comb, code, TREE_TYPE (expr),
> >> +				   TREE_OPERAND (expr, 0), NULL_TREE, vr))
> >>   	{
> >> -	  aff_combination_convert (comb, type);
> >> +	  aff_combination_convert (comb, type, vr);
> >>   	  return;
> >>   	}
> >>         break;
> >> @@ -709,7 +715,8 @@ public:
> >>   
> >>   void
> >>   aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
> >> -			hash_map<tree, name_expansion *> **cache)
> >> +			hash_map<tree, name_expansion *> **cache,
> >> +			value_range *vr)
> >>   {
> >>     unsigned i;
> >>     aff_tree to_add, current, curre;
> >> @@ -800,7 +807,10 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
> >>   	  if (!*cache)
> >>   	    *cache = new hash_map<tree, name_expansion *>;
> >>   	  (*cache)->put (name, exp);
> >> -	  aff_combination_expand (&current, cache);
> >> +	  value_range vr = NULL;
> >> +	  if (!POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (def))))
> >> +	    get_range_info (gimple_assign_rhs1 (def), vr);
> >> +	  aff_combination_expand (&current, cache, &vr);
> >>   	  exp->expansion = current;
> >>   	  exp->in_progress = 0;
> >>   	}
> >> @@ -812,7 +822,7 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
> >>   	  current = exp->expansion;
> >>   	}
> >>         if (!useless_type_conversion_p (comb->type, current.type))
> >> -	aff_combination_convert (&current, comb->type);
> >> +	aff_combination_convert (&current, comb->type, vr);
> >>   
> >>         /* Accumulate the new terms to TO_ADD, so that we do not modify
> >>   	 COMB while traversing it; include the term -coef * E, to remove
> >> diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h
> >> index 4ec4924879d..311e5e7c23a 100644
> >> --- a/gcc/tree-affine.h
> >> +++ b/gcc/tree-affine.h
> >> @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
> >>   #ifndef GCC_TREE_AFFINE_H
> >>   #define GCC_TREE_AFFINE_H
> >>   
> >> +#include "value-range.h"
> >>   
> >>   #define MAX_AFF_ELTS 8
> >>   
> >> @@ -73,13 +74,14 @@ void aff_combination_mult (aff_tree *, aff_tree *, aff_tree *);
> >>   void aff_combination_add (aff_tree *, aff_tree *);
> >>   void aff_combination_add_elt (aff_tree *, tree, const widest_int &);
> >>   void aff_combination_remove_elt (aff_tree *, unsigned);
> >> -void aff_combination_convert (aff_tree *, tree);
> >> -void tree_to_aff_combination (tree, tree, aff_tree *);
> >> +void aff_combination_convert (aff_tree *, tree, value_range *vr = NULL);
> >> +void tree_to_aff_combination (tree, tree, aff_tree *, value_range *vr = NULL);
> >>   tree aff_combination_to_tree (aff_tree *);
> >>   void unshare_aff_combination (aff_tree *);
> >>   bool aff_combination_constant_multiple_p (aff_tree *, aff_tree *,
> >>   					  poly_widest_int *);
> >> -void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **);
> >> +void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **,
> >> +			     value_range *vr = NULL);
> >>   void tree_to_aff_combination_expand (tree, tree, aff_tree *,
> >>   				     hash_map<tree, name_expansion *> **);
> >>   tree get_inner_reference_aff (tree, aff_tree *, poly_widest_int *);
> >>
> > 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] Add value range info for affine combination to improve store motion (PR83403)
  2020-04-28 10:30     ` Richard Biener
@ 2020-04-29  7:46       ` luoxhu
  2020-04-30  7:53         ` [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403) luoxhu
  0 siblings, 1 reply; 9+ messages in thread
From: luoxhu @ 2020-04-29  7:46 UTC (permalink / raw)
  To: Richard Biener
  Cc: Xionghu Luo, gcc-patches, segher, dje.gcc, wschmidt, guojiufu,
	linkw, ook, amker.cheng



On 2020/4/28 18:30, Richard Biener wrote:

> 
> OK, I guess instead of get_range_info expr_to_aff_combination could
> simply use determine_value_range (op0, &minv, &maxv) == VR_RANGE
> (the && TREE_CODE (op0) == SSA_NAME check can then be removed)?
> 

Tried with determine_value_range, it works and is much more straight forward,
Thanks for your suggestion, update the patch as below with some tests, 
regression testing.


[PATCH] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)

Use determine_value_range to get value range info for fold convert expressions
with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
wrapping overflow inner type.  i.e.:

(long unsigned int)((unsigned  int)n * 10 + 1)
=>
(long unsigned  int)n * (long unsigned int)10 + (long unsigned int)1

With this patch for affine combination, load/store motion could detect
more address refs independency and promote some memory expressions to
registers within loop.

PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.

gcc/ChangeLog

	2020-04-29  Xiong Hu Luo  <luoxhu@linux.ibm.com>

	PR tree-optimization/83403
	* tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
	determine_value_range, Add fold conversion of MULT_EXPR, fix the
	previous PLUS_EXPR.

gcc/testsuite/ChangeLog

	2020-04-29  Xiong Hu Luo  <luoxhu@linux.ibm.com>

	PR tree-optimization/83403
	* gcc.dg/tree-ssa/pr83403-1.c: New test.
	* gcc.dg/tree-ssa/pr83403-2.c: New test.
	* gcc.dg/tree-ssa/pr83403.h: New header.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c |  8 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c |  8 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h   | 30 +++++++++++++++++++++++
 gcc/tree-affine.c                         | 22 +++++++++--------
 4 files changed, 58 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
new file mode 100644
index 00000000000..748375b03af
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE unsigned int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
new file mode 100644
index 00000000000..ca2e6bbd61c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
new file mode 100644
index 00000000000..0da8a835b5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
@@ -0,0 +1,30 @@
+__attribute__ ((noinline)) void
+calculate (const double *__restrict__ A, const double *__restrict__ B,
+	   double *__restrict__ C)
+{
+  TYPE m = 0;
+  TYPE n = 0;
+  TYPE k = 0;
+
+  A = (const double *) __builtin_assume_aligned (A, 16);
+  B = (const double *) __builtin_assume_aligned (B, 16);
+  C = (double *) __builtin_assume_aligned (C, 16);
+
+  for (n = 0; n < 9; n++)
+    {
+      for (m = 0; m < 10; m++)
+	{
+	  C[(n * 10) + m] = 0.0;
+	}
+
+      for (k = 0; k < 17; k++)
+	{
+#pragma simd
+	  for (m = 0; m < 10; m++)
+	    {
+	      C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
+	    }
+	}
+    }
+}
+
diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
index 0eb8db1b086..86bb64fe245 100644
--- a/gcc/tree-affine.c
+++ b/gcc/tree-affine.c
@@ -343,23 +343,25 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
 	    wide_int minv, maxv;
 	    /* If inner type has wrapping overflow behavior, fold conversion
 	       for below case:
-		 (T1)(X - CST) -> (T1)X - (T1)CST
-	       if X - CST doesn't overflow by range information.  Also handle
-	       (T1)(X + CST) as (T1)(X - (-CST)).  */
+		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
+	       if X *+- CST doesn't overflow by range information.  */
 	    if (TYPE_UNSIGNED (itype)
 		&& TYPE_OVERFLOW_WRAPS (itype)
-		&& TREE_CODE (op0) == SSA_NAME
 		&& TREE_CODE (op1) == INTEGER_CST
-		&& icode != MULT_EXPR
-		&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
+		&& determine_value_range (op0, &minv, &maxv) == VR_RANGE)
 	      {
-		if (icode == PLUS_EXPR)
-		  op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
-		if (wi::geu_p (minv, wi::to_wide (op1)))
+		if ((icode == PLUS_EXPR
+		     && wi::leu_p (maxv + wi::to_wide (op1),
+				   wi::to_wide (TYPE_MAX_VALUE (itype))))
+		    || (icode == MULT_EXPR
+			&& wi::leu_p (maxv * wi::to_wide (op1),
+				      wi::to_wide (TYPE_MAX_VALUE (itype))))
+		    || (icode == MINUS_EXPR
+			&& wi::geu_p (minv, wi::to_wide (op1))))
 		  {
 		    op0 = fold_convert (otype, op0);
 		    op1 = fold_convert (otype, op1);
-		    return expr_to_aff_combination (comb, MINUS_EXPR, otype,
+		    return expr_to_aff_combination (comb, icode, otype,
 						    op0, op1);
 		  }
 	      }
-- 
2.21.0.777.g83232e3864


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
  2020-04-29  7:46       ` luoxhu
@ 2020-04-30  7:53         ` luoxhu
  2020-05-06 12:09           ` Richard Biener
  0 siblings, 1 reply; 9+ messages in thread
From: luoxhu @ 2020-04-30  7:53 UTC (permalink / raw)
  To: Richard Biener
  Cc: Xionghu Luo, gcc-patches, segher, dje.gcc, wschmidt, guojiufu,
	linkw, ook, amker.cheng

Update the patch with overflow check.  Bootstrap and regression tested PASS on Power8-LE.


Use determine_value_range to get value range info for fold convert expressions
with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
wrapping overflow inner type.  i.e.:

(long unsigned int)((unsigned  int)n * 10 + 1)
=>
(long unsigned int)n * (long unsigned int)10 + (long unsigned int)1

With this patch for affine combination, load/store motion could detect
more address refs independency and promote some memory expressions to
registers within loop.

PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.

gcc/ChangeLog

	2020-04-30  Xiong Hu Luo  <luoxhu@linux.ibm.com>

	PR tree-optimization/83403
	* tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
	determine_value_range, Add fold conversion of MULT_EXPR, fix the
	previous PLUS_EXPR.

gcc/testsuite/ChangeLog

	2020-04-30  Xiong Hu Luo  <luoxhu@linux.ibm.com>

	PR tree-optimization/83403
	* gcc.dg/tree-ssa/pr83403-1.c: New test.
	* gcc.dg/tree-ssa/pr83403-2.c: New test.
	* gcc.dg/tree-ssa/pr83403.h: New header.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c |  8 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c |  8 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h   | 30 +++++++++++++++++++++++
 gcc/tree-affine.c                         | 24 ++++++++++--------
 4 files changed, 60 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
new file mode 100644
index 00000000000..748375b03af
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE unsigned int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
new file mode 100644
index 00000000000..ca2e6bbd61c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
new file mode 100644
index 00000000000..0da8a835b5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
@@ -0,0 +1,30 @@
+__attribute__ ((noinline)) void
+calculate (const double *__restrict__ A, const double *__restrict__ B,
+	   double *__restrict__ C)
+{
+  TYPE m = 0;
+  TYPE n = 0;
+  TYPE k = 0;
+
+  A = (const double *) __builtin_assume_aligned (A, 16);
+  B = (const double *) __builtin_assume_aligned (B, 16);
+  C = (double *) __builtin_assume_aligned (C, 16);
+
+  for (n = 0; n < 9; n++)
+    {
+      for (m = 0; m < 10; m++)
+	{
+	  C[(n * 10) + m] = 0.0;
+	}
+
+      for (k = 0; k < 17; k++)
+	{
+#pragma simd
+	  for (m = 0; m < 10; m++)
+	    {
+	      C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
+	    }
+	}
+    }
+}
+
diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
index 0eb8db1b086..5620e6bf28f 100644
--- a/gcc/tree-affine.c
+++ b/gcc/tree-affine.c
@@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
 	    wide_int minv, maxv;
 	    /* If inner type has wrapping overflow behavior, fold conversion
 	       for below case:
-		 (T1)(X - CST) -> (T1)X - (T1)CST
-	       if X - CST doesn't overflow by range information.  Also handle
-	       (T1)(X + CST) as (T1)(X - (-CST)).  */
+		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
+	       if X *+- CST doesn't overflow by range information.  */
 	    if (TYPE_UNSIGNED (itype)
 		&& TYPE_OVERFLOW_WRAPS (itype)
-		&& TREE_CODE (op0) == SSA_NAME
 		&& TREE_CODE (op1) == INTEGER_CST
-		&& icode != MULT_EXPR
-		&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
+		&& determine_value_range (op0, &minv, &maxv) == VR_RANGE)
 	      {
+		wi::overflow_type overflow = wi::OVF_NONE;
+		signop sign = UNSIGNED;
 		if (icode == PLUS_EXPR)
-		  op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
-		if (wi::geu_p (minv, wi::to_wide (op1)))
+		  wi::add (maxv, wi::to_wide (op1), sign, &overflow);
+		else if (icode == MULT_EXPR)
+		  wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
+		else
+		  wi::sub (minv, wi::to_wide (op1), sign, &overflow);
+
+		if (overflow == wi::OVF_NONE)
 		  {
 		    op0 = fold_convert (otype, op0);
 		    op1 = fold_convert (otype, op1);
-		    return expr_to_aff_combination (comb, MINUS_EXPR, otype,
-						    op0, op1);
+		    return expr_to_aff_combination (comb, icode, otype, op0,
+						    op1);
 		  }1
 	      }
 	  }
-- 
2.21.0.777.g83232e3864



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
  2020-04-30  7:53         ` [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403) luoxhu
@ 2020-05-06 12:09           ` Richard Biener
  2020-05-11  9:04             ` luoxhu
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Biener @ 2020-05-06 12:09 UTC (permalink / raw)
  To: luoxhu
  Cc: Xionghu Luo, gcc-patches, segher, dje.gcc, wschmidt, guojiufu,
	linkw, ook, amker.cheng

On Thu, 30 Apr 2020, luoxhu wrote:

> Update the patch with overflow check.  Bootstrap and regression tested PASS on Power8-LE.
> 
> 
> Use determine_value_range to get value range info for fold convert expressions
> with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
> wrapping overflow inner type.  i.e.:
> 
> (long unsigned int)((unsigned  int)n * 10 + 1)
> =>
> (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
> 
> With this patch for affine combination, load/store motion could detect
> more address refs independency and promote some memory expressions to
> registers within loop.
> 
> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.

This is OK for trunk if bootstrapped / tested properl.

Thanks,
Richard.

> gcc/ChangeLog
> 
> 	2020-04-30  Xiong Hu Luo  <luoxhu@linux.ibm.com>
> 
> 	PR tree-optimization/83403
> 	* tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
> 	determine_value_range, Add fold conversion of MULT_EXPR, fix the
> 	previous PLUS_EXPR.
> 
> gcc/testsuite/ChangeLog
> 
> 	2020-04-30  Xiong Hu Luo  <luoxhu@linux.ibm.com>
> 
> 	PR tree-optimization/83403
> 	* gcc.dg/tree-ssa/pr83403-1.c: New test.
> 	* gcc.dg/tree-ssa/pr83403-2.c: New test.
> 	* gcc.dg/tree-ssa/pr83403.h: New header.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c |  8 ++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c |  8 ++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr83403.h   | 30 +++++++++++++++++++++++
>  gcc/tree-affine.c                         | 24 ++++++++++--------
>  4 files changed, 60 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> new file mode 100644
> index 00000000000..748375b03af
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
> +
> +#define TYPE unsigned int
> +
> +#include "pr83403.h"
> +
> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> new file mode 100644
> index 00000000000..ca2e6bbd61c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
> +
> +#define TYPE int
> +
> +#include "pr83403.h"
> +
> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> new file mode 100644
> index 00000000000..0da8a835b5f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> @@ -0,0 +1,30 @@
> +__attribute__ ((noinline)) void
> +calculate (const double *__restrict__ A, const double *__restrict__ B,
> +	   double *__restrict__ C)
> +{
> +  TYPE m = 0;
> +  TYPE n = 0;
> +  TYPE k = 0;
> +
> +  A = (const double *) __builtin_assume_aligned (A, 16);
> +  B = (const double *) __builtin_assume_aligned (B, 16);
> +  C = (double *) __builtin_assume_aligned (C, 16);
> +
> +  for (n = 0; n < 9; n++)
> +    {
> +      for (m = 0; m < 10; m++)
> +	{
> +	  C[(n * 10) + m] = 0.0;
> +	}
> +
> +      for (k = 0; k < 17; k++)
> +	{
> +#pragma simd
> +	  for (m = 0; m < 10; m++)
> +	    {
> +	      C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
> +	    }
> +	}
> +    }
> +}
> +
> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
> index 0eb8db1b086..5620e6bf28f 100644
> --- a/gcc/tree-affine.c
> +++ b/gcc/tree-affine.c
> @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>  	    wide_int minv, maxv;
>  	    /* If inner type has wrapping overflow behavior, fold conversion
>  	       for below case:
> -		 (T1)(X - CST) -> (T1)X - (T1)CST
> -	       if X - CST doesn't overflow by range information.  Also handle
> -	       (T1)(X + CST) as (T1)(X - (-CST)).  */
> +		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
> +	       if X *+- CST doesn't overflow by range information.  */
>  	    if (TYPE_UNSIGNED (itype)
>  		&& TYPE_OVERFLOW_WRAPS (itype)
> -		&& TREE_CODE (op0) == SSA_NAME
>  		&& TREE_CODE (op1) == INTEGER_CST
> -		&& icode != MULT_EXPR
> -		&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
> +		&& determine_value_range (op0, &minv, &maxv) == VR_RANGE)
>  	      {
> +		wi::overflow_type overflow = wi::OVF_NONE;
> +		signop sign = UNSIGNED;
>  		if (icode == PLUS_EXPR)
> -		  op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
> -		if (wi::geu_p (minv, wi::to_wide (op1)))
> +		  wi::add (maxv, wi::to_wide (op1), sign, &overflow);
> +		else if (icode == MULT_EXPR)
> +		  wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
> +		else
> +		  wi::sub (minv, wi::to_wide (op1), sign, &overflow);
> +
> +		if (overflow == wi::OVF_NONE)
>  		  {
>  		    op0 = fold_convert (otype, op0);
>  		    op1 = fold_convert (otype, op1);
> -		    return expr_to_aff_combination (comb, MINUS_EXPR, otype,
> -						    op0, op1);
> +		    return expr_to_aff_combination (comb, icode, otype, op0,
> +						    op1);
>  		  }1
>  	      }
>  	  }
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
  2020-05-06 12:09           ` Richard Biener
@ 2020-05-11  9:04             ` luoxhu
  2020-05-11  9:12               ` Richard Biener
  0 siblings, 1 reply; 9+ messages in thread
From: luoxhu @ 2020-05-11  9:04 UTC (permalink / raw)
  To: Richard Biener
  Cc: luoxhu, gcc-patches, segher, dje.gcc, wschmidt, guojiufu, linkw,
	ook, amker.cheng

在 2020-05-06 20:09,Richard Biener 写道:
> On Thu, 30 Apr 2020, luoxhu wrote:
> 
>> Update the patch with overflow check.  Bootstrap and regression tested 
>> PASS on Power8-LE.
>> 
>> 
>> Use determine_value_range to get value range info for fold convert 
>> expressions
>> with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not 
>> overflow on
>> wrapping overflow inner type.  i.e.:
>> 
>> (long unsigned int)((unsigned  int)n * 10 + 1)
>> =>
>> (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
>> 
>> With this patch for affine combination, load/store motion could detect
>> more address refs independency and promote some memory expressions to
>> registers within loop.
>> 
>> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
>> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
> 
> This is OK for trunk if bootstrapped / tested properl.


Bootstrap and regression tested pass on power8LE, committed to
r11-259-g0447929f11e6a3e1b076841712b90a8b6bc7d33a, is it necessary
to backport it to gcc-10&gcc-9?


Thanks,
Xionghu


> 
> Thanks,
> Richard.
> 
>> gcc/ChangeLog
>> 
>> 	2020-04-30  Xiong Hu Luo  <luoxhu@linux.ibm.com>
>> 
>> 	PR tree-optimization/83403
>> 	* tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
>> 	determine_value_range, Add fold conversion of MULT_EXPR, fix the
>> 	previous PLUS_EXPR.
>> 
>> gcc/testsuite/ChangeLog
>> 
>> 	2020-04-30  Xiong Hu Luo  <luoxhu@linux.ibm.com>
>> 
>> 	PR tree-optimization/83403
>> 	* gcc.dg/tree-ssa/pr83403-1.c: New test.
>> 	* gcc.dg/tree-ssa/pr83403-2.c: New test.
>> 	* gcc.dg/tree-ssa/pr83403.h: New header.
>> ---
>>  gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c |  8 ++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c |  8 ++++++
>>  gcc/testsuite/gcc.dg/tree-ssa/pr83403.h   | 30 
>> +++++++++++++++++++++++
>>  gcc/tree-affine.c                         | 24 ++++++++++--------
>>  4 files changed, 60 insertions(+), 10 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
>> 
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
>> new file mode 100644
>> index 00000000000..748375b03af
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
>> @@ -0,0 +1,8 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
>> +
>> +#define TYPE unsigned int
>> +
>> +#include "pr83403.h"
>> +
>> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 
>> "lim2" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
>> new file mode 100644
>> index 00000000000..ca2e6bbd61c
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
>> @@ -0,0 +1,8 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
>> +
>> +#define TYPE int
>> +
>> +#include "pr83403.h"
>> +
>> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 
>> "lim2" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
>> new file mode 100644
>> index 00000000000..0da8a835b5f
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
>> @@ -0,0 +1,30 @@
>> +__attribute__ ((noinline)) void
>> +calculate (const double *__restrict__ A, const double *__restrict__ 
>> B,
>> +	   double *__restrict__ C)
>> +{
>> +  TYPE m = 0;
>> +  TYPE n = 0;
>> +  TYPE k = 0;
>> +
>> +  A = (const double *) __builtin_assume_aligned (A, 16);
>> +  B = (const double *) __builtin_assume_aligned (B, 16);
>> +  C = (double *) __builtin_assume_aligned (C, 16);
>> +
>> +  for (n = 0; n < 9; n++)
>> +    {
>> +      for (m = 0; m < 10; m++)
>> +	{
>> +	  C[(n * 10) + m] = 0.0;
>> +	}
>> +
>> +      for (k = 0; k < 17; k++)
>> +	{
>> +#pragma simd
>> +	  for (m = 0; m < 10; m++)
>> +	    {
>> +	      C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
>> +	    }
>> +	}
>> +    }
>> +}
>> +
>> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
>> index 0eb8db1b086..5620e6bf28f 100644
>> --- a/gcc/tree-affine.c
>> +++ b/gcc/tree-affine.c
>> @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, 
>> tree_code code, tree type,
>>  	    wide_int minv, maxv;
>>  	    /* If inner type has wrapping overflow behavior, fold conversion
>>  	       for below case:
>> -		 (T1)(X - CST) -> (T1)X - (T1)CST
>> -	       if X - CST doesn't overflow by range information.  Also 
>> handle
>> -	       (T1)(X + CST) as (T1)(X - (-CST)).  */
>> +		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
>> +	       if X *+- CST doesn't overflow by range information.  */
>>  	    if (TYPE_UNSIGNED (itype)
>>  		&& TYPE_OVERFLOW_WRAPS (itype)
>> -		&& TREE_CODE (op0) == SSA_NAME
>>  		&& TREE_CODE (op1) == INTEGER_CST
>> -		&& icode != MULT_EXPR
>> -		&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
>> +		&& determine_value_range (op0, &minv, &maxv) == VR_RANGE)
>>  	      {
>> +		wi::overflow_type overflow = wi::OVF_NONE;
>> +		signop sign = UNSIGNED;
>>  		if (icode == PLUS_EXPR)
>> -		  op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
>> -		if (wi::geu_p (minv, wi::to_wide (op1)))
>> +		  wi::add (maxv, wi::to_wide (op1), sign, &overflow);
>> +		else if (icode == MULT_EXPR)
>> +		  wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
>> +		else
>> +		  wi::sub (minv, wi::to_wide (op1), sign, &overflow);
>> +
>> +		if (overflow == wi::OVF_NONE)
>>  		  {
>>  		    op0 = fold_convert (otype, op0);
>>  		    op1 = fold_convert (otype, op1);
>> -		    return expr_to_aff_combination (comb, MINUS_EXPR, otype,
>> -						    op0, op1);
>> +		    return expr_to_aff_combination (comb, icode, otype, op0,
>> +						    op1);
>>  		  }1
>>  	      }
>>  	  }
>> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
  2020-05-11  9:04             ` luoxhu
@ 2020-05-11  9:12               ` Richard Biener
  0 siblings, 0 replies; 9+ messages in thread
From: Richard Biener @ 2020-05-11  9:12 UTC (permalink / raw)
  To: luoxhu
  Cc: luoxhu, gcc-patches, segher, dje.gcc, wschmidt, guojiufu, linkw,
	ook, amker.cheng

On Mon, 11 May 2020, luoxhu wrote:

> 在 2020-05-06 20:09,Richard Biener 写道:
> > On Thu, 30 Apr 2020, luoxhu wrote:
> > 
> >> Update the patch with overflow check.  Bootstrap and regression tested PASS
> >> on Power8-LE.
> >> 
> >> 
> >> Use determine_value_range to get value range info for fold convert
> >> expressions
> >> with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
> >> wrapping overflow inner type.  i.e.:
> >> 
> >> (long unsigned int)((unsigned  int)n * 10 + 1)
> >> =>
> >> (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
> >> 
> >> With this patch for affine combination, load/store motion could detect
> >> more address refs independency and promote some memory expressions to
> >> registers within loop.
> >> 
> >> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
> >> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
> > 
> > This is OK for trunk if bootstrapped / tested properl.
> 
> 
> Bootstrap and regression tested pass on power8LE, committed to
> r11-259-g0447929f11e6a3e1b076841712b90a8b6bc7d33a, is it necessary
> to backport it to gcc-10&gcc-9?

For sure not.

Richard.

> 
> Thanks,
> Xionghu
> 
> 
> > 
> > Thanks,
> > Richard.
> > 
> >> gcc/ChangeLog
> >> 
> >>  2020-04-30  Xiong Hu Luo  <luoxhu@linux.ibm.com>
> >> 
> >>  PR tree-optimization/83403
> >>  * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
> >>  determine_value_range, Add fold conversion of MULT_EXPR, fix the
> >>  previous PLUS_EXPR.
> >> 
> >> gcc/testsuite/ChangeLog
> >> 
> >>  2020-04-30  Xiong Hu Luo  <luoxhu@linux.ibm.com>
> >> 
> >>  PR tree-optimization/83403
> >>  * gcc.dg/tree-ssa/pr83403-1.c: New test.
> >>  * gcc.dg/tree-ssa/pr83403-2.c: New test.
> >>  * gcc.dg/tree-ssa/pr83403.h: New header.
> >> ---
> >>  gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c |  8 ++++++
> >>  gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c |  8 ++++++
> >>  gcc/testsuite/gcc.dg/tree-ssa/pr83403.h   | 30 
> >> +++++++++++++++++++++++
> >>  gcc/tree-affine.c                         | 24 ++++++++++--------
> >>  4 files changed, 60 insertions(+), 10 deletions(-)
> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> >> 
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> >> new file mode 100644
> >> index 00000000000..748375b03af
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> >> @@ -0,0 +1,8 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
> >> +
> >> +#define TYPE unsigned int
> >> +
> >> +#include "pr83403.h"
> >> +
> >> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2"
> >> } } */
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> >> new file mode 100644
> >> index 00000000000..ca2e6bbd61c
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> >> @@ -0,0 +1,8 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
> >> +
> >> +#define TYPE int
> >> +
> >> +#include "pr83403.h"
> >> +
> >> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2"
> >> } } */
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> >> new file mode 100644
> >> index 00000000000..0da8a835b5f
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> >> @@ -0,0 +1,30 @@
> >> +__attribute__ ((noinline)) void
> >> +calculate (const double *__restrict__ A, const double *__restrict__ B,
> >> +	   double *__restrict__ C)
> >> +{
> >> +  TYPE m = 0;
> >> +  TYPE n = 0;
> >> +  TYPE k = 0;
> >> +
> >> +  A = (const double *) __builtin_assume_aligned (A, 16);
> >> +  B = (const double *) __builtin_assume_aligned (B, 16);
> >> +  C = (double *) __builtin_assume_aligned (C, 16);
> >> +
> >> +  for (n = 0; n < 9; n++)
> >> +    {
> >> +      for (m = 0; m < 10; m++)
> >> +	{
> >> +	  C[(n * 10) + m] = 0.0;
> >> +	}
> >> +
> >> +      for (k = 0; k < 17; k++)
> >> +	{
> >> +#pragma simd
> >> +	  for (m = 0; m < 10; m++)
> >> +	    {
> >> +	      C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
> >> +	    }
> >> +	}
> >> +    }
> >> +}
> >> +
> >> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
> >> index 0eb8db1b086..5620e6bf28f 100644
> >> --- a/gcc/tree-affine.c
> >> +++ b/gcc/tree-affine.c
> >> @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code
> >> code, tree type,
> >>       wide_int minv, maxv;
> >>       /* If inner type has wrapping overflow behavior, fold conversion
> >> 	       for below case:
> >> -		 (T1)(X - CST) -> (T1)X - (T1)CST
> >> -	       if X - CST doesn't overflow by range information.  Also handle
> >> -	       (T1)(X + CST) as (T1)(X - (-CST)).  */
> >> +		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
> >> +	       if X *+- CST doesn't overflow by range information.  */
> >>       if (TYPE_UNSIGNED (itype)
> >> 		&& TYPE_OVERFLOW_WRAPS (itype)
> >> -		&& TREE_CODE (op0) == SSA_NAME
> >> 		&& TREE_CODE (op1) == INTEGER_CST
> >> -		&& icode != MULT_EXPR
> >> -		&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
> >> +		&& determine_value_range (op0, &minv, &maxv) == VR_RANGE)
> >> 	      {
> >> +		wi::overflow_type overflow = wi::OVF_NONE;
> >> +		signop sign = UNSIGNED;
> >> 		if (icode == PLUS_EXPR)
> >> -		  op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
> >> -		if (wi::geu_p (minv, wi::to_wide (op1)))
> >> +		  wi::add (maxv, wi::to_wide (op1), sign, &overflow);
> >> +		else if (icode == MULT_EXPR)
> >> +		  wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
> >> +		else
> >> +		  wi::sub (minv, wi::to_wide (op1), sign, &overflow);
> >> +
> >> +		if (overflow == wi::OVF_NONE)
> >>      {
> >>        op0 = fold_convert (otype, op0);
> >>        op1 = fold_convert (otype, op1);
> >> -		    return expr_to_aff_combination (comb, MINUS_EXPR, otype,
> >> -						    op0, op1);
> >> +		    return expr_to_aff_combination (comb, icode, otype, op0,
> >> +						    op1);
> >>         	  }1
> >>         }
> >>     }
> >> 
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-05-11  9:12 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-28  6:17 [PATCH] Add value range info for affine combination to improve store motion (PR83403) Xionghu Luo
2020-04-28  7:01 ` Richard Biener
2020-04-28 10:04   ` luoxhu
2020-04-28 10:30     ` Richard Biener
2020-04-29  7:46       ` luoxhu
2020-04-30  7:53         ` [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403) luoxhu
2020-05-06 12:09           ` Richard Biener
2020-05-11  9:04             ` luoxhu
2020-05-11  9:12               ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).