* [PATCH] Add value range info for affine combination to improve store motion (PR83403)
@ 2020-04-28 6:17 Xionghu Luo
2020-04-28 7:01 ` Richard Biener
0 siblings, 1 reply; 9+ messages in thread
From: Xionghu Luo @ 2020-04-28 6:17 UTC (permalink / raw)
To: gcc-patches
Cc: segher, dje.gcc, wschmidt, guojiufu, linkw, rguenther, ook,
amker.cheng, Xionghu Luo
From: Xionghu Luo <luoxhu@linux.ibm.com>
Get and propagate value range info to convert expressions with convert
operation on PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow. i.e.:
(long unsigned int)((unsigned int)n * 10 + 1)
=>
(long unsigned int)((unsigned int) n * (long unsigned int)10 + (long unsigned int)1)
With this patch for affine combination, load/store motion could detect
more address refs independency and promote some memory expressions to
registers within loop.
PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
Bootstrap and regression tested pass on Power8-LE. Any comments?
Thanks.
gcc/ChangeLog
2020-04-28 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR tree-optimization/83403
* tree-affine.c (aff_combination_convert): New parameter
value_range.
(expr_to_aff_combination): Use range info to check CONVERT
expr on PLUS_EXPR/MINUS_EXPR/MULT_EXPR.
(tree_to_aff_combination): Likewise.
(aff_combination_expand): Get and propagate range info.
* tree-affine.h: Include value-range.h.
(aff_combination_convert): New parameter value_range.
(tree_to_aff_combination): Likewise.
(aff_combination_expand): Likewise.
---
gcc/tree-affine.c | 66 +++++++++++++++++++++++++++--------------------
gcc/tree-affine.h | 8 +++---
2 files changed, 43 insertions(+), 31 deletions(-)
diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
index 0eb8db1b086..63f8acd4c73 100644
--- a/gcc/tree-affine.c
+++ b/gcc/tree-affine.c
@@ -220,7 +220,7 @@ aff_combination_add (aff_tree *comb1, aff_tree *comb2)
/* Converts affine combination COMB to TYPE. */
void
-aff_combination_convert (aff_tree *comb, tree type)
+aff_combination_convert (aff_tree *comb, tree type, value_range *vr)
{
unsigned i, j;
tree comb_type = comb->type;
@@ -228,7 +228,7 @@ aff_combination_convert (aff_tree *comb, tree type)
if (TYPE_PRECISION (type) > TYPE_PRECISION (comb_type))
{
tree val = fold_convert (type, aff_combination_to_tree (comb));
- tree_to_aff_combination (val, type, comb);
+ tree_to_aff_combination (val, type, comb, vr);
return;
}
@@ -263,8 +263,8 @@ aff_combination_convert (aff_tree *comb, tree type)
true when that was successful and returns the combination in COMB. */
static bool
-expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
- tree op0, tree op1 = NULL_TREE)
+expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, tree op0,
+ tree op1 = NULL_TREE, value_range *vr = NULL)
{
aff_tree tmp;
poly_int64 bitpos, bitsize, bytepos;
@@ -279,8 +279,8 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
case PLUS_EXPR:
case MINUS_EXPR:
- tree_to_aff_combination (op0, type, comb);
- tree_to_aff_combination (op1, type, &tmp);
+ tree_to_aff_combination (op0, type, comb, vr);
+ tree_to_aff_combination (op1, type, &tmp, vr);
if (code == MINUS_EXPR)
aff_combination_scale (&tmp, -1);
aff_combination_add (comb, &tmp);
@@ -289,7 +289,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
case MULT_EXPR:
if (TREE_CODE (op1) != INTEGER_CST)
break;
- tree_to_aff_combination (op0, type, comb);
+ tree_to_aff_combination (op0, type, comb, vr);
aff_combination_scale (comb, wi::to_widest (op1));
return true;
@@ -340,27 +340,33 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
op1 = fold_convert (otype, op1);
return expr_to_aff_combination (comb, icode, otype, op0, op1);
}
- wide_int minv, maxv;
+
/* If inner type has wrapping overflow behavior, fold conversion
- for below case:
- (T1)(X - CST) -> (T1)X - (T1)CST
- if X - CST doesn't overflow by range information. Also handle
- (T1)(X + CST) as (T1)(X - (-CST)). */
- if (TYPE_UNSIGNED (itype)
+ for below cases:
+ (T1)(X *+- CST) -> (T1)X *+- (T1)CST
+ when X *+- CST doesn't overflow by range information. */
+ if (vr && vr->kind () == VR_RANGE && TYPE_UNSIGNED (itype)
&& TYPE_OVERFLOW_WRAPS (itype)
- && TREE_CODE (op0) == SSA_NAME
- && TREE_CODE (op1) == INTEGER_CST
- && icode != MULT_EXPR
- && get_range_info (op0, &minv, &maxv) == VR_RANGE)
+ && TREE_CODE (op1) == INTEGER_CST)
{
- if (icode == PLUS_EXPR)
- op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
- if (wi::geu_p (minv, wi::to_wide (op1)))
+ wide_int minv, maxv;
+ minv = wi::to_wide (vr->min ());
+ maxv = wi::to_wide (vr->max ());
+ if ((icode == MULT_EXPR || icode == PLUS_EXPR)
+ && wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype))))
+ {
+ op0 = fold_convert (otype, op0);
+ op1 = fold_convert (otype, op1);
+ return expr_to_aff_combination (comb, icode, otype, op0,
+ op1, vr);
+ }
+ else if (icode == MINUS_EXPR
+ && wi::geu_p (minv, wi::to_wide (op1)))
{
op0 = fold_convert (otype, op0);
op1 = fold_convert (otype, op1);
return expr_to_aff_combination (comb, MINUS_EXPR, otype,
- op0, op1);
+ op0, op1, vr);
}
}
}
@@ -376,7 +382,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
/* Splits EXPR into an affine combination of parts. */
void
-tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
+tree_to_aff_combination (tree expr, tree type, aff_tree *comb, value_range *vr)
{
aff_tree tmp;
enum tree_code code;
@@ -408,10 +414,10 @@ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
CASE_CONVERT:
/* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS
calls this with not showing an outer widening cast. */
- if (expr_to_aff_combination (comb, code,
- TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
+ if (expr_to_aff_combination (comb, code, TREE_TYPE (expr),
+ TREE_OPERAND (expr, 0), NULL_TREE, vr))
{
- aff_combination_convert (comb, type);
+ aff_combination_convert (comb, type, vr);
return;
}
break;
@@ -709,7 +715,8 @@ public:
void
aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
- hash_map<tree, name_expansion *> **cache)
+ hash_map<tree, name_expansion *> **cache,
+ value_range *vr)
{
unsigned i;
aff_tree to_add, current, curre;
@@ -800,7 +807,10 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
if (!*cache)
*cache = new hash_map<tree, name_expansion *>;
(*cache)->put (name, exp);
- aff_combination_expand (¤t, cache);
+ value_range vr = NULL;
+ if (!POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (def))))
+ get_range_info (gimple_assign_rhs1 (def), vr);
+ aff_combination_expand (¤t, cache, &vr);
exp->expansion = current;
exp->in_progress = 0;
}
@@ -812,7 +822,7 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
current = exp->expansion;
}
if (!useless_type_conversion_p (comb->type, current.type))
- aff_combination_convert (¤t, comb->type);
+ aff_combination_convert (¤t, comb->type, vr);
/* Accumulate the new terms to TO_ADD, so that we do not modify
COMB while traversing it; include the term -coef * E, to remove
diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h
index 4ec4924879d..311e5e7c23a 100644
--- a/gcc/tree-affine.h
+++ b/gcc/tree-affine.h
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
#ifndef GCC_TREE_AFFINE_H
#define GCC_TREE_AFFINE_H
+#include "value-range.h"
#define MAX_AFF_ELTS 8
@@ -73,13 +74,14 @@ void aff_combination_mult (aff_tree *, aff_tree *, aff_tree *);
void aff_combination_add (aff_tree *, aff_tree *);
void aff_combination_add_elt (aff_tree *, tree, const widest_int &);
void aff_combination_remove_elt (aff_tree *, unsigned);
-void aff_combination_convert (aff_tree *, tree);
-void tree_to_aff_combination (tree, tree, aff_tree *);
+void aff_combination_convert (aff_tree *, tree, value_range *vr = NULL);
+void tree_to_aff_combination (tree, tree, aff_tree *, value_range *vr = NULL);
tree aff_combination_to_tree (aff_tree *);
void unshare_aff_combination (aff_tree *);
bool aff_combination_constant_multiple_p (aff_tree *, aff_tree *,
poly_widest_int *);
-void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **);
+void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **,
+ value_range *vr = NULL);
void tree_to_aff_combination_expand (tree, tree, aff_tree *,
hash_map<tree, name_expansion *> **);
tree get_inner_reference_aff (tree, aff_tree *, poly_widest_int *);
--
2.21.0.777.g83232e3864
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Add value range info for affine combination to improve store motion (PR83403)
2020-04-28 6:17 [PATCH] Add value range info for affine combination to improve store motion (PR83403) Xionghu Luo
@ 2020-04-28 7:01 ` Richard Biener
2020-04-28 10:04 ` luoxhu
0 siblings, 1 reply; 9+ messages in thread
From: Richard Biener @ 2020-04-28 7:01 UTC (permalink / raw)
To: Xionghu Luo
Cc: gcc-patches, segher, dje.gcc, wschmidt, guojiufu, linkw, ook,
amker.cheng, Xionghu Luo
On Tue, 28 Apr 2020, Xionghu Luo wrote:
> From: Xionghu Luo <luoxhu@linux.ibm.com>
>
> Get and propagate value range info to convert expressions with convert
> operation on PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow. i.e.:
>
> (long unsigned int)((unsigned int)n * 10 + 1)
> =>
> (long unsigned int)((unsigned int) n * (long unsigned int)10 + (long unsigned int)1)
>
> With this patch for affine combination, load/store motion could detect
> more address refs independency and promote some memory expressions to
> registers within loop.
>
> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
>
> Bootstrap and regression tested pass on Power8-LE. Any comments?
> Thanks.
So the core of the patch is adding handling of MULT_EXPR and the rest
is a no-op? It's not clear from the patch what passing down a
value-range does and your description doesn't say anything about this
either.
Richard.
> gcc/ChangeLog
>
> 2020-04-28 Xiong Hu Luo <luoxhu@linux.ibm.com>
>
> PR tree-optimization/83403
> * tree-affine.c (aff_combination_convert): New parameter
> value_range.
> (expr_to_aff_combination): Use range info to check CONVERT
> expr on PLUS_EXPR/MINUS_EXPR/MULT_EXPR.
> (tree_to_aff_combination): Likewise.
> (aff_combination_expand): Get and propagate range info.
> * tree-affine.h: Include value-range.h.
> (aff_combination_convert): New parameter value_range.
> (tree_to_aff_combination): Likewise.
> (aff_combination_expand): Likewise.
> ---
> gcc/tree-affine.c | 66 +++++++++++++++++++++++++++--------------------
> gcc/tree-affine.h | 8 +++---
> 2 files changed, 43 insertions(+), 31 deletions(-)
>
> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
> index 0eb8db1b086..63f8acd4c73 100644
> --- a/gcc/tree-affine.c
> +++ b/gcc/tree-affine.c
> @@ -220,7 +220,7 @@ aff_combination_add (aff_tree *comb1, aff_tree *comb2)
> /* Converts affine combination COMB to TYPE. */
>
> void
> -aff_combination_convert (aff_tree *comb, tree type)
> +aff_combination_convert (aff_tree *comb, tree type, value_range *vr)
> {
> unsigned i, j;
> tree comb_type = comb->type;
> @@ -228,7 +228,7 @@ aff_combination_convert (aff_tree *comb, tree type)
> if (TYPE_PRECISION (type) > TYPE_PRECISION (comb_type))
> {
> tree val = fold_convert (type, aff_combination_to_tree (comb));
> - tree_to_aff_combination (val, type, comb);
> + tree_to_aff_combination (val, type, comb, vr);
> return;
> }
>
> @@ -263,8 +263,8 @@ aff_combination_convert (aff_tree *comb, tree type)
> true when that was successful and returns the combination in COMB. */
>
> static bool
> -expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> - tree op0, tree op1 = NULL_TREE)
> +expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, tree op0,
> + tree op1 = NULL_TREE, value_range *vr = NULL)
> {
> aff_tree tmp;
> poly_int64 bitpos, bitsize, bytepos;
> @@ -279,8 +279,8 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>
> case PLUS_EXPR:
> case MINUS_EXPR:
> - tree_to_aff_combination (op0, type, comb);
> - tree_to_aff_combination (op1, type, &tmp);
> + tree_to_aff_combination (op0, type, comb, vr);
> + tree_to_aff_combination (op1, type, &tmp, vr);
> if (code == MINUS_EXPR)
> aff_combination_scale (&tmp, -1);
> aff_combination_add (comb, &tmp);
> @@ -289,7 +289,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> case MULT_EXPR:
> if (TREE_CODE (op1) != INTEGER_CST)
> break;
> - tree_to_aff_combination (op0, type, comb);
> + tree_to_aff_combination (op0, type, comb, vr);
> aff_combination_scale (comb, wi::to_widest (op1));
> return true;
>
> @@ -340,27 +340,33 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> op1 = fold_convert (otype, op1);
> return expr_to_aff_combination (comb, icode, otype, op0, op1);
> }
> - wide_int minv, maxv;
> +
> /* If inner type has wrapping overflow behavior, fold conversion
> - for below case:
> - (T1)(X - CST) -> (T1)X - (T1)CST
> - if X - CST doesn't overflow by range information. Also handle
> - (T1)(X + CST) as (T1)(X - (-CST)). */
> - if (TYPE_UNSIGNED (itype)
> + for below cases:
> + (T1)(X *+- CST) -> (T1)X *+- (T1)CST
> + when X *+- CST doesn't overflow by range information. */
> + if (vr && vr->kind () == VR_RANGE && TYPE_UNSIGNED (itype)
> && TYPE_OVERFLOW_WRAPS (itype)
> - && TREE_CODE (op0) == SSA_NAME
> - && TREE_CODE (op1) == INTEGER_CST
> - && icode != MULT_EXPR
> - && get_range_info (op0, &minv, &maxv) == VR_RANGE)
> + && TREE_CODE (op1) == INTEGER_CST)
> {
> - if (icode == PLUS_EXPR)
> - op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
> - if (wi::geu_p (minv, wi::to_wide (op1)))
> + wide_int minv, maxv;
> + minv = wi::to_wide (vr->min ());
> + maxv = wi::to_wide (vr->max ());
> + if ((icode == MULT_EXPR || icode == PLUS_EXPR)
> + && wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype))))
> + {
> + op0 = fold_convert (otype, op0);
> + op1 = fold_convert (otype, op1);
> + return expr_to_aff_combination (comb, icode, otype, op0,
> + op1, vr);
> + }
> + else if (icode == MINUS_EXPR
> + && wi::geu_p (minv, wi::to_wide (op1)))
> {
> op0 = fold_convert (otype, op0);
> op1 = fold_convert (otype, op1);
> return expr_to_aff_combination (comb, MINUS_EXPR, otype,
> - op0, op1);
> + op0, op1, vr);
> }
> }
> }
> @@ -376,7 +382,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> /* Splits EXPR into an affine combination of parts. */
>
> void
> -tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
> +tree_to_aff_combination (tree expr, tree type, aff_tree *comb, value_range *vr)
> {
> aff_tree tmp;
> enum tree_code code;
> @@ -408,10 +414,10 @@ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
> CASE_CONVERT:
> /* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS
> calls this with not showing an outer widening cast. */
> - if (expr_to_aff_combination (comb, code,
> - TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
> + if (expr_to_aff_combination (comb, code, TREE_TYPE (expr),
> + TREE_OPERAND (expr, 0), NULL_TREE, vr))
> {
> - aff_combination_convert (comb, type);
> + aff_combination_convert (comb, type, vr);
> return;
> }
> break;
> @@ -709,7 +715,8 @@ public:
>
> void
> aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
> - hash_map<tree, name_expansion *> **cache)
> + hash_map<tree, name_expansion *> **cache,
> + value_range *vr)
> {
> unsigned i;
> aff_tree to_add, current, curre;
> @@ -800,7 +807,10 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
> if (!*cache)
> *cache = new hash_map<tree, name_expansion *>;
> (*cache)->put (name, exp);
> - aff_combination_expand (¤t, cache);
> + value_range vr = NULL;
> + if (!POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (def))))
> + get_range_info (gimple_assign_rhs1 (def), vr);
> + aff_combination_expand (¤t, cache, &vr);
> exp->expansion = current;
> exp->in_progress = 0;
> }
> @@ -812,7 +822,7 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
> current = exp->expansion;
> }
> if (!useless_type_conversion_p (comb->type, current.type))
> - aff_combination_convert (¤t, comb->type);
> + aff_combination_convert (¤t, comb->type, vr);
>
> /* Accumulate the new terms to TO_ADD, so that we do not modify
> COMB while traversing it; include the term -coef * E, to remove
> diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h
> index 4ec4924879d..311e5e7c23a 100644
> --- a/gcc/tree-affine.h
> +++ b/gcc/tree-affine.h
> @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
> #ifndef GCC_TREE_AFFINE_H
> #define GCC_TREE_AFFINE_H
>
> +#include "value-range.h"
>
> #define MAX_AFF_ELTS 8
>
> @@ -73,13 +74,14 @@ void aff_combination_mult (aff_tree *, aff_tree *, aff_tree *);
> void aff_combination_add (aff_tree *, aff_tree *);
> void aff_combination_add_elt (aff_tree *, tree, const widest_int &);
> void aff_combination_remove_elt (aff_tree *, unsigned);
> -void aff_combination_convert (aff_tree *, tree);
> -void tree_to_aff_combination (tree, tree, aff_tree *);
> +void aff_combination_convert (aff_tree *, tree, value_range *vr = NULL);
> +void tree_to_aff_combination (tree, tree, aff_tree *, value_range *vr = NULL);
> tree aff_combination_to_tree (aff_tree *);
> void unshare_aff_combination (aff_tree *);
> bool aff_combination_constant_multiple_p (aff_tree *, aff_tree *,
> poly_widest_int *);
> -void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **);
> +void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **,
> + value_range *vr = NULL);
> void tree_to_aff_combination_expand (tree, tree, aff_tree *,
> hash_map<tree, name_expansion *> **);
> tree get_inner_reference_aff (tree, aff_tree *, poly_widest_int *);
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Add value range info for affine combination to improve store motion (PR83403)
2020-04-28 7:01 ` Richard Biener
@ 2020-04-28 10:04 ` luoxhu
2020-04-28 10:30 ` Richard Biener
0 siblings, 1 reply; 9+ messages in thread
From: luoxhu @ 2020-04-28 10:04 UTC (permalink / raw)
To: Richard Biener, Xionghu Luo
Cc: gcc-patches, segher, dje.gcc, wschmidt, guojiufu, linkw, ook,
amker.cheng
On 2020/4/28 15:01, Richard Biener wrote:
> On Tue, 28 Apr 2020, Xionghu Luo wrote:
>
>> From: Xionghu Luo <luoxhu@linux.ibm.com>
>>
>> Get and propagate value range info to convert expressions with convert
>> operation on PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow. i.e.:
>>
>> (long unsigned int)((unsigned int)n * 10 + 1)
>> =>
>> (long unsigned int)((unsigned int) n * (long unsigned int)10 + (long unsigned int)1)
>>
>> With this patch for affine combination, load/store motion could detect
>> more address refs independency and promote some memory expressions to
>> registers within loop.
>>
>> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
>> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
>>
>> Bootstrap and regression tested pass on Power8-LE. Any comments?
>> Thanks.
>
> So the core of the patch is adding handling of MULT_EXPR and the rest
> is a no-op? It's not clear from the patch what passing down a
> value-range does and your description doesn't say anything about this
> either.
This patch handles MULT_EXPR first, and also handles (long unsigned int)(n_93 * 10 + 1) as
"n_93*10" is not a ssa_name by using the value_range passed down, minv&maxv [1,81] is the
range info of "n_93 * 10 +1", so wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype)) in
expr_to_aff_combination could ensure that no wrapping overflow of this converting. Not sure
if it's better described now... And some debug message as follows:
131t.lim2:
# n_93 = PHI <0(2), n_36(7)>
_118 = n_93 * 10;
_120 = (long unsigned int) _118;
_121 = _120 * 8;
_122 = &C[0][0] + _121;
*_122 = 0.0;
_128 = _118 + 1;
_129 = (long unsigned int) _128;
_130 = _129 * 8;
_131 = &C[0][0] + _130;
*_131 = 0.0;
_137 = _118 + 2;
_138 = (long unsigned int) _137;
_139 = _138 * 8;
_140 = &C[0][0] + _139;
*_140 = 0.0;
Breakpoint 28, expr_to_aff_combination (comb=0x7fffffffc068, code=NOP_EXPR, type=<integer_type 0x7ffff53c07
e0 long unsigned int>, op0=<plus_expr 0x7ffff56c0988>, op1=<tree 0x0>, vr=0x7fffffffc490) at ../../gcc-mast
er/gcc/tree-affine.c:350
92: itype = <integer_type 0x7ffff53c0690 unsigned int>
93: otype = <integer_type 0x7ffff53c07e0 long unsigned int>
94: icode = PLUS_EXPR
95: code = NOP_EXPR
(gdb) ptree op0
<mult_expr 0x7ffff56c0960
type <integer_type 0x7ffff53c0690 unsigned int sizes-gimplified public unsigned SI
size <integer_cst 0x7ffff52f1200 constant 32>
unit-size <integer_cst 0x7ffff52f1218 constant 4>
align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff53c0690 precision:32 min <
integer_cst 0x7ffff52f1230 0> max <integer_cst 0x7ffff52f11e8 4294967295> context <translation_unit_decl 0x
7ffff5552850 pr83403.c>
pointer_to_this <pointer_type 0x7ffff53c4c20>>
arg:0 <ssa_name 0x7ffff5394a88 type <integer_type 0x7ffff53c0690 unsigned int>
visited var <var_decl 0x7ffff72e2520 n>
def_stmt n_93 = PHI <0(2), n_36(7)>
version:93
ptr-info 0x7ffff530b680>
arg:1 <integer_cst 0x7ffff52feda8 type <integer_type 0x7ffff53c0690 unsigned int> constant 10>>
(gdb) ptree op1
<integer_cst 0x7ffff52fed48 type <integer_type 0x7ffff53c0690 unsigned int> constant 1>
(gdb) p minv
$580 = {
<wide_int_storage> = {
val = {1, 0, 140737310886240},
len = 1,
precision = 32
},
members of generic_wide_int<wide_int_storage>:
static is_sign_extended = true
}
(gdb) p maxv
$581 = {
<wide_int_storage> = {
val = {81, 65, 40},
len = 1,
precision = 32
},
members of generic_wide_int<wide_int_storage>:
static is_sign_extended = true
}
Thanks,
Xionghu
>
> Richard.
>
>> gcc/ChangeLog
>>
>> 2020-04-28 Xiong Hu Luo <luoxhu@linux.ibm.com>
>>
>> PR tree-optimization/83403
>> * tree-affine.c (aff_combination_convert): New parameter
>> value_range.
>> (expr_to_aff_combination): Use range info to check CONVERT
>> expr on PLUS_EXPR/MINUS_EXPR/MULT_EXPR.
>> (tree_to_aff_combination): Likewise.
>> (aff_combination_expand): Get and propagate range info.
>> * tree-affine.h: Include value-range.h.
>> (aff_combination_convert): New parameter value_range.
>> (tree_to_aff_combination): Likewise.
>> (aff_combination_expand): Likewise.
>> ---
>> gcc/tree-affine.c | 66 +++++++++++++++++++++++++++--------------------
>> gcc/tree-affine.h | 8 +++---
>> 2 files changed, 43 insertions(+), 31 deletions(-)
>>
>> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
>> index 0eb8db1b086..63f8acd4c73 100644
>> --- a/gcc/tree-affine.c
>> +++ b/gcc/tree-affine.c
>> @@ -220,7 +220,7 @@ aff_combination_add (aff_tree *comb1, aff_tree *comb2)
>> /* Converts affine combination COMB to TYPE. */
>>
>> void
>> -aff_combination_convert (aff_tree *comb, tree type)
>> +aff_combination_convert (aff_tree *comb, tree type, value_range *vr)
>> {
>> unsigned i, j;
>> tree comb_type = comb->type;
>> @@ -228,7 +228,7 @@ aff_combination_convert (aff_tree *comb, tree type)
>> if (TYPE_PRECISION (type) > TYPE_PRECISION (comb_type))
>> {
>> tree val = fold_convert (type, aff_combination_to_tree (comb));
>> - tree_to_aff_combination (val, type, comb);
>> + tree_to_aff_combination (val, type, comb, vr);
>> return;
>> }
>>
>> @@ -263,8 +263,8 @@ aff_combination_convert (aff_tree *comb, tree type)
>> true when that was successful and returns the combination in COMB. */
>>
>> static bool
>> -expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>> - tree op0, tree op1 = NULL_TREE)
>> +expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, tree op0,
>> + tree op1 = NULL_TREE, value_range *vr = NULL)
>> {
>> aff_tree tmp;
>> poly_int64 bitpos, bitsize, bytepos;
>> @@ -279,8 +279,8 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>>
>> case PLUS_EXPR:
>> case MINUS_EXPR:
>> - tree_to_aff_combination (op0, type, comb);
>> - tree_to_aff_combination (op1, type, &tmp);
>> + tree_to_aff_combination (op0, type, comb, vr);
>> + tree_to_aff_combination (op1, type, &tmp, vr);
>> if (code == MINUS_EXPR)
>> aff_combination_scale (&tmp, -1);
>> aff_combination_add (comb, &tmp);
>> @@ -289,7 +289,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>> case MULT_EXPR:
>> if (TREE_CODE (op1) != INTEGER_CST)
>> break;
>> - tree_to_aff_combination (op0, type, comb);
>> + tree_to_aff_combination (op0, type, comb, vr);
>> aff_combination_scale (comb, wi::to_widest (op1));
>> return true;
>>
>> @@ -340,27 +340,33 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>> op1 = fold_convert (otype, op1);
>> return expr_to_aff_combination (comb, icode, otype, op0, op1);
>> }
>> - wide_int minv, maxv;
>> +
>> /* If inner type has wrapping overflow behavior, fold conversion
>> - for below case:
>> - (T1)(X - CST) -> (T1)X - (T1)CST
>> - if X - CST doesn't overflow by range information. Also handle
>> - (T1)(X + CST) as (T1)(X - (-CST)). */
>> - if (TYPE_UNSIGNED (itype)
>> + for below cases:
>> + (T1)(X *+- CST) -> (T1)X *+- (T1)CST
>> + when X *+- CST doesn't overflow by range information. */
>> + if (vr && vr->kind () == VR_RANGE && TYPE_UNSIGNED (itype)
>> && TYPE_OVERFLOW_WRAPS (itype)
>> - && TREE_CODE (op0) == SSA_NAME
>> - && TREE_CODE (op1) == INTEGER_CST
>> - && icode != MULT_EXPR
>> - && get_range_info (op0, &minv, &maxv) == VR_RANGE)
>> + && TREE_CODE (op1) == INTEGER_CST)
>> {
>> - if (icode == PLUS_EXPR)
>> - op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
>> - if (wi::geu_p (minv, wi::to_wide (op1)))
>> + wide_int minv, maxv;
>> + minv = wi::to_wide (vr->min ());
>> + maxv = wi::to_wide (vr->max ());
>> + if ((icode == MULT_EXPR || icode == PLUS_EXPR)
>> + && wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype))))
>> + {
>> + op0 = fold_convert (otype, op0);
>> + op1 = fold_convert (otype, op1);
>> + return expr_to_aff_combination (comb, icode, otype, op0,
>> + op1, vr);
>> + }
>> + else if (icode == MINUS_EXPR
>> + && wi::geu_p (minv, wi::to_wide (op1)))
>> {
>> op0 = fold_convert (otype, op0);
>> op1 = fold_convert (otype, op1);
>> return expr_to_aff_combination (comb, MINUS_EXPR, otype,
>> - op0, op1);
>> + op0, op1, vr);
>> }
>> }
>> }
>> @@ -376,7 +382,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
>> /* Splits EXPR into an affine combination of parts. */
>>
>> void
>> -tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
>> +tree_to_aff_combination (tree expr, tree type, aff_tree *comb, value_range *vr)
>> {
>> aff_tree tmp;
>> enum tree_code code;
>> @@ -408,10 +414,10 @@ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
>> CASE_CONVERT:
>> /* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS
>> calls this with not showing an outer widening cast. */
>> - if (expr_to_aff_combination (comb, code,
>> - TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
>> + if (expr_to_aff_combination (comb, code, TREE_TYPE (expr),
>> + TREE_OPERAND (expr, 0), NULL_TREE, vr))
>> {
>> - aff_combination_convert (comb, type);
>> + aff_combination_convert (comb, type, vr);
>> return;
>> }
>> break;
>> @@ -709,7 +715,8 @@ public:
>>
>> void
>> aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
>> - hash_map<tree, name_expansion *> **cache)
>> + hash_map<tree, name_expansion *> **cache,
>> + value_range *vr)
>> {
>> unsigned i;
>> aff_tree to_add, current, curre;
>> @@ -800,7 +807,10 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
>> if (!*cache)
>> *cache = new hash_map<tree, name_expansion *>;
>> (*cache)->put (name, exp);
>> - aff_combination_expand (¤t, cache);
>> + value_range vr = NULL;
>> + if (!POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (def))))
>> + get_range_info (gimple_assign_rhs1 (def), vr);
>> + aff_combination_expand (¤t, cache, &vr);
>> exp->expansion = current;
>> exp->in_progress = 0;
>> }
>> @@ -812,7 +822,7 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
>> current = exp->expansion;
>> }
>> if (!useless_type_conversion_p (comb->type, current.type))
>> - aff_combination_convert (¤t, comb->type);
>> + aff_combination_convert (¤t, comb->type, vr);
>>
>> /* Accumulate the new terms to TO_ADD, so that we do not modify
>> COMB while traversing it; include the term -coef * E, to remove
>> diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h
>> index 4ec4924879d..311e5e7c23a 100644
>> --- a/gcc/tree-affine.h
>> +++ b/gcc/tree-affine.h
>> @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
>> #ifndef GCC_TREE_AFFINE_H
>> #define GCC_TREE_AFFINE_H
>>
>> +#include "value-range.h"
>>
>> #define MAX_AFF_ELTS 8
>>
>> @@ -73,13 +74,14 @@ void aff_combination_mult (aff_tree *, aff_tree *, aff_tree *);
>> void aff_combination_add (aff_tree *, aff_tree *);
>> void aff_combination_add_elt (aff_tree *, tree, const widest_int &);
>> void aff_combination_remove_elt (aff_tree *, unsigned);
>> -void aff_combination_convert (aff_tree *, tree);
>> -void tree_to_aff_combination (tree, tree, aff_tree *);
>> +void aff_combination_convert (aff_tree *, tree, value_range *vr = NULL);
>> +void tree_to_aff_combination (tree, tree, aff_tree *, value_range *vr = NULL);
>> tree aff_combination_to_tree (aff_tree *);
>> void unshare_aff_combination (aff_tree *);
>> bool aff_combination_constant_multiple_p (aff_tree *, aff_tree *,
>> poly_widest_int *);
>> -void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **);
>> +void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **,
>> + value_range *vr = NULL);
>> void tree_to_aff_combination_expand (tree, tree, aff_tree *,
>> hash_map<tree, name_expansion *> **);
>> tree get_inner_reference_aff (tree, aff_tree *, poly_widest_int *);
>>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Add value range info for affine combination to improve store motion (PR83403)
2020-04-28 10:04 ` luoxhu
@ 2020-04-28 10:30 ` Richard Biener
2020-04-29 7:46 ` luoxhu
0 siblings, 1 reply; 9+ messages in thread
From: Richard Biener @ 2020-04-28 10:30 UTC (permalink / raw)
To: luoxhu
Cc: Xionghu Luo, gcc-patches, segher, dje.gcc, wschmidt, guojiufu,
linkw, ook, amker.cheng
On Tue, 28 Apr 2020, luoxhu wrote:
>
>
> On 2020/4/28 15:01, Richard Biener wrote:
> > On Tue, 28 Apr 2020, Xionghu Luo wrote:
> >
> >> From: Xionghu Luo <luoxhu@linux.ibm.com>
> >>
> >> Get and propagate value range info to convert expressions with convert
> >> operation on PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow. i.e.:
> >>
> >> (long unsigned int)((unsigned int)n * 10 + 1)
> >> =>
> >> (long unsigned int)((unsigned int) n * (long unsigned int)10 + (long unsigned int)1)
> >>
> >> With this patch for affine combination, load/store motion could detect
> >> more address refs independency and promote some memory expressions to
> >> registers within loop.
> >>
> >> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
> >> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
> >>
> >> Bootstrap and regression tested pass on Power8-LE. Any comments?
> >> Thanks.
> >
> > So the core of the patch is adding handling of MULT_EXPR and the rest
> > is a no-op? It's not clear from the patch what passing down a
> > value-range does and your description doesn't say anything about this
> > either.
>
> This patch handles MULT_EXPR first, and also handles (long unsigned int)(n_93 * 10 + 1) as
> "n_93*10" is not a ssa_name by using the value_range passed down, minv&maxv [1,81] is the
> range info of "n_93 * 10 +1", so wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype)) in
> expr_to_aff_combination could ensure that no wrapping overflow of this converting. Not sure
> if it's better described now... And some debug message as follows:
>
> 131t.lim2:
> # n_93 = PHI <0(2), n_36(7)>
> _118 = n_93 * 10;
> _120 = (long unsigned int) _118;
> _121 = _120 * 8;
> _122 = &C[0][0] + _121;
> *_122 = 0.0;
> _128 = _118 + 1;
> _129 = (long unsigned int) _128;
> _130 = _129 * 8;
> _131 = &C[0][0] + _130;
> *_131 = 0.0;
> _137 = _118 + 2;
> _138 = (long unsigned int) _137;
> _139 = _138 * 8;
> _140 = &C[0][0] + _139;
> *_140 = 0.0;
>
>
> Breakpoint 28, expr_to_aff_combination (comb=0x7fffffffc068, code=NOP_EXPR, type=<integer_type 0x7ffff53c07
> e0 long unsigned int>, op0=<plus_expr 0x7ffff56c0988>, op1=<tree 0x0>, vr=0x7fffffffc490) at ../../gcc-mast
> er/gcc/tree-affine.c:350
> 92: itype = <integer_type 0x7ffff53c0690 unsigned int>
> 93: otype = <integer_type 0x7ffff53c07e0 long unsigned int>
> 94: icode = PLUS_EXPR
> 95: code = NOP_EXPR
> (gdb) ptree op0
> <mult_expr 0x7ffff56c0960
> type <integer_type 0x7ffff53c0690 unsigned int sizes-gimplified public unsigned SI
> size <integer_cst 0x7ffff52f1200 constant 32>
> unit-size <integer_cst 0x7ffff52f1218 constant 4>
> align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff53c0690 precision:32 min <
> integer_cst 0x7ffff52f1230 0> max <integer_cst 0x7ffff52f11e8 4294967295> context <translation_unit_decl 0x
> 7ffff5552850 pr83403.c>
> pointer_to_this <pointer_type 0x7ffff53c4c20>>
>
> arg:0 <ssa_name 0x7ffff5394a88 type <integer_type 0x7ffff53c0690 unsigned int>
> visited var <var_decl 0x7ffff72e2520 n>
> def_stmt n_93 = PHI <0(2), n_36(7)>
> version:93
> ptr-info 0x7ffff530b680>
> arg:1 <integer_cst 0x7ffff52feda8 type <integer_type 0x7ffff53c0690 unsigned int> constant 10>>
> (gdb) ptree op1
> <integer_cst 0x7ffff52fed48 type <integer_type 0x7ffff53c0690 unsigned int> constant 1>
> (gdb) p minv
> $580 = {
> <wide_int_storage> = {
> val = {1, 0, 140737310886240},
> len = 1,
> precision = 32
> },
> members of generic_wide_int<wide_int_storage>:
> static is_sign_extended = true
> }
> (gdb) p maxv
> $581 = {
> <wide_int_storage> = {
> val = {81, 65, 40},
> len = 1,
> precision = 32
> },
> members of generic_wide_int<wide_int_storage>:
> static is_sign_extended = true
> }
OK, I guess instead of get_range_info expr_to_aff_combination could
simply use determine_value_range (op0, &minv, &maxv) == VR_RANGE
(the && TREE_CODE (op0) == SSA_NAME check can then be removed)?
Thanks,
Richard.
>
> Thanks,
> Xionghu
>
> >
> > Richard.
> >
> >> gcc/ChangeLog
> >>
> >> 2020-04-28 Xiong Hu Luo <luoxhu@linux.ibm.com>
> >>
> >> PR tree-optimization/83403
> >> * tree-affine.c (aff_combination_convert): New parameter
> >> value_range.
> >> (expr_to_aff_combination): Use range info to check CONVERT
> >> expr on PLUS_EXPR/MINUS_EXPR/MULT_EXPR.
> >> (tree_to_aff_combination): Likewise.
> >> (aff_combination_expand): Get and propagate range info.
> >> * tree-affine.h: Include value-range.h.
> >> (aff_combination_convert): New parameter value_range.
> >> (tree_to_aff_combination): Likewise.
> >> (aff_combination_expand): Likewise.
> >> ---
> >> gcc/tree-affine.c | 66 +++++++++++++++++++++++++++--------------------
> >> gcc/tree-affine.h | 8 +++---
> >> 2 files changed, 43 insertions(+), 31 deletions(-)
> >>
> >> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
> >> index 0eb8db1b086..63f8acd4c73 100644
> >> --- a/gcc/tree-affine.c
> >> +++ b/gcc/tree-affine.c
> >> @@ -220,7 +220,7 @@ aff_combination_add (aff_tree *comb1, aff_tree *comb2)
> >> /* Converts affine combination COMB to TYPE. */
> >>
> >> void
> >> -aff_combination_convert (aff_tree *comb, tree type)
> >> +aff_combination_convert (aff_tree *comb, tree type, value_range *vr)
> >> {
> >> unsigned i, j;
> >> tree comb_type = comb->type;
> >> @@ -228,7 +228,7 @@ aff_combination_convert (aff_tree *comb, tree type)
> >> if (TYPE_PRECISION (type) > TYPE_PRECISION (comb_type))
> >> {
> >> tree val = fold_convert (type, aff_combination_to_tree (comb));
> >> - tree_to_aff_combination (val, type, comb);
> >> + tree_to_aff_combination (val, type, comb, vr);
> >> return;
> >> }
> >>
> >> @@ -263,8 +263,8 @@ aff_combination_convert (aff_tree *comb, tree type)
> >> true when that was successful and returns the combination in COMB. */
> >>
> >> static bool
> >> -expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> >> - tree op0, tree op1 = NULL_TREE)
> >> +expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, tree op0,
> >> + tree op1 = NULL_TREE, value_range *vr = NULL)
> >> {
> >> aff_tree tmp;
> >> poly_int64 bitpos, bitsize, bytepos;
> >> @@ -279,8 +279,8 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> >>
> >> case PLUS_EXPR:
> >> case MINUS_EXPR:
> >> - tree_to_aff_combination (op0, type, comb);
> >> - tree_to_aff_combination (op1, type, &tmp);
> >> + tree_to_aff_combination (op0, type, comb, vr);
> >> + tree_to_aff_combination (op1, type, &tmp, vr);
> >> if (code == MINUS_EXPR)
> >> aff_combination_scale (&tmp, -1);
> >> aff_combination_add (comb, &tmp);
> >> @@ -289,7 +289,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> >> case MULT_EXPR:
> >> if (TREE_CODE (op1) != INTEGER_CST)
> >> break;
> >> - tree_to_aff_combination (op0, type, comb);
> >> + tree_to_aff_combination (op0, type, comb, vr);
> >> aff_combination_scale (comb, wi::to_widest (op1));
> >> return true;
> >>
> >> @@ -340,27 +340,33 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> >> op1 = fold_convert (otype, op1);
> >> return expr_to_aff_combination (comb, icode, otype, op0, op1);
> >> }
> >> - wide_int minv, maxv;
> >> +
> >> /* If inner type has wrapping overflow behavior, fold conversion
> >> - for below case:
> >> - (T1)(X - CST) -> (T1)X - (T1)CST
> >> - if X - CST doesn't overflow by range information. Also handle
> >> - (T1)(X + CST) as (T1)(X - (-CST)). */
> >> - if (TYPE_UNSIGNED (itype)
> >> + for below cases:
> >> + (T1)(X *+- CST) -> (T1)X *+- (T1)CST
> >> + when X *+- CST doesn't overflow by range information. */
> >> + if (vr && vr->kind () == VR_RANGE && TYPE_UNSIGNED (itype)
> >> && TYPE_OVERFLOW_WRAPS (itype)
> >> - && TREE_CODE (op0) == SSA_NAME
> >> - && TREE_CODE (op1) == INTEGER_CST
> >> - && icode != MULT_EXPR
> >> - && get_range_info (op0, &minv, &maxv) == VR_RANGE)
> >> + && TREE_CODE (op1) == INTEGER_CST)
> >> {
> >> - if (icode == PLUS_EXPR)
> >> - op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
> >> - if (wi::geu_p (minv, wi::to_wide (op1)))
> >> + wide_int minv, maxv;
> >> + minv = wi::to_wide (vr->min ());
> >> + maxv = wi::to_wide (vr->max ());
> >> + if ((icode == MULT_EXPR || icode == PLUS_EXPR)
> >> + && wi::leu_p (maxv, wi::to_wide (TYPE_MAX_VALUE (itype))))
> >> + {
> >> + op0 = fold_convert (otype, op0);
> >> + op1 = fold_convert (otype, op1);
> >> + return expr_to_aff_combination (comb, icode, otype, op0,
> >> + op1, vr);
> >> + }
> >> + else if (icode == MINUS_EXPR
> >> + && wi::geu_p (minv, wi::to_wide (op1)))
> >> {
> >> op0 = fold_convert (otype, op0);
> >> op1 = fold_convert (otype, op1);
> >> return expr_to_aff_combination (comb, MINUS_EXPR, otype,
> >> - op0, op1);
> >> + op0, op1, vr);
> >> }
> >> }
> >> }
> >> @@ -376,7 +382,7 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> >> /* Splits EXPR into an affine combination of parts. */
> >>
> >> void
> >> -tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
> >> +tree_to_aff_combination (tree expr, tree type, aff_tree *comb, value_range *vr)
> >> {
> >> aff_tree tmp;
> >> enum tree_code code;
> >> @@ -408,10 +414,10 @@ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
> >> CASE_CONVERT:
> >> /* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS
> >> calls this with not showing an outer widening cast. */
> >> - if (expr_to_aff_combination (comb, code,
> >> - TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
> >> + if (expr_to_aff_combination (comb, code, TREE_TYPE (expr),
> >> + TREE_OPERAND (expr, 0), NULL_TREE, vr))
> >> {
> >> - aff_combination_convert (comb, type);
> >> + aff_combination_convert (comb, type, vr);
> >> return;
> >> }
> >> break;
> >> @@ -709,7 +715,8 @@ public:
> >>
> >> void
> >> aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
> >> - hash_map<tree, name_expansion *> **cache)
> >> + hash_map<tree, name_expansion *> **cache,
> >> + value_range *vr)
> >> {
> >> unsigned i;
> >> aff_tree to_add, current, curre;
> >> @@ -800,7 +807,10 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
> >> if (!*cache)
> >> *cache = new hash_map<tree, name_expansion *>;
> >> (*cache)->put (name, exp);
> >> - aff_combination_expand (¤t, cache);
> >> + value_range vr = NULL;
> >> + if (!POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (def))))
> >> + get_range_info (gimple_assign_rhs1 (def), vr);
> >> + aff_combination_expand (¤t, cache, &vr);
> >> exp->expansion = current;
> >> exp->in_progress = 0;
> >> }
> >> @@ -812,7 +822,7 @@ aff_combination_expand (aff_tree *comb ATTRIBUTE_UNUSED,
> >> current = exp->expansion;
> >> }
> >> if (!useless_type_conversion_p (comb->type, current.type))
> >> - aff_combination_convert (¤t, comb->type);
> >> + aff_combination_convert (¤t, comb->type, vr);
> >>
> >> /* Accumulate the new terms to TO_ADD, so that we do not modify
> >> COMB while traversing it; include the term -coef * E, to remove
> >> diff --git a/gcc/tree-affine.h b/gcc/tree-affine.h
> >> index 4ec4924879d..311e5e7c23a 100644
> >> --- a/gcc/tree-affine.h
> >> +++ b/gcc/tree-affine.h
> >> @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
> >> #ifndef GCC_TREE_AFFINE_H
> >> #define GCC_TREE_AFFINE_H
> >>
> >> +#include "value-range.h"
> >>
> >> #define MAX_AFF_ELTS 8
> >>
> >> @@ -73,13 +74,14 @@ void aff_combination_mult (aff_tree *, aff_tree *, aff_tree *);
> >> void aff_combination_add (aff_tree *, aff_tree *);
> >> void aff_combination_add_elt (aff_tree *, tree, const widest_int &);
> >> void aff_combination_remove_elt (aff_tree *, unsigned);
> >> -void aff_combination_convert (aff_tree *, tree);
> >> -void tree_to_aff_combination (tree, tree, aff_tree *);
> >> +void aff_combination_convert (aff_tree *, tree, value_range *vr = NULL);
> >> +void tree_to_aff_combination (tree, tree, aff_tree *, value_range *vr = NULL);
> >> tree aff_combination_to_tree (aff_tree *);
> >> void unshare_aff_combination (aff_tree *);
> >> bool aff_combination_constant_multiple_p (aff_tree *, aff_tree *,
> >> poly_widest_int *);
> >> -void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **);
> >> +void aff_combination_expand (aff_tree *, hash_map<tree, name_expansion *> **,
> >> + value_range *vr = NULL);
> >> void tree_to_aff_combination_expand (tree, tree, aff_tree *,
> >> hash_map<tree, name_expansion *> **);
> >> tree get_inner_reference_aff (tree, aff_tree *, poly_widest_int *);
> >>
> >
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Add value range info for affine combination to improve store motion (PR83403)
2020-04-28 10:30 ` Richard Biener
@ 2020-04-29 7:46 ` luoxhu
2020-04-30 7:53 ` [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403) luoxhu
0 siblings, 1 reply; 9+ messages in thread
From: luoxhu @ 2020-04-29 7:46 UTC (permalink / raw)
To: Richard Biener
Cc: Xionghu Luo, gcc-patches, segher, dje.gcc, wschmidt, guojiufu,
linkw, ook, amker.cheng
On 2020/4/28 18:30, Richard Biener wrote:
>
> OK, I guess instead of get_range_info expr_to_aff_combination could
> simply use determine_value_range (op0, &minv, &maxv) == VR_RANGE
> (the && TREE_CODE (op0) == SSA_NAME check can then be removed)?
>
Tried with determine_value_range, it works and is much more straight forward,
Thanks for your suggestion, update the patch as below with some tests,
regression testing.
[PATCH] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
Use determine_value_range to get value range info for fold convert expressions
with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
wrapping overflow inner type. i.e.:
(long unsigned int)((unsigned int)n * 10 + 1)
=>
(long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
With this patch for affine combination, load/store motion could detect
more address refs independency and promote some memory expressions to
registers within loop.
PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
gcc/ChangeLog
2020-04-29 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR tree-optimization/83403
* tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
determine_value_range, Add fold conversion of MULT_EXPR, fix the
previous PLUS_EXPR.
gcc/testsuite/ChangeLog
2020-04-29 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR tree-optimization/83403
* gcc.dg/tree-ssa/pr83403-1.c: New test.
* gcc.dg/tree-ssa/pr83403-2.c: New test.
* gcc.dg/tree-ssa/pr83403.h: New header.
---
gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 8 ++++++
gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 8 ++++++
gcc/testsuite/gcc.dg/tree-ssa/pr83403.h | 30 +++++++++++++++++++++++
gcc/tree-affine.c | 22 +++++++++--------
4 files changed, 58 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
new file mode 100644
index 00000000000..748375b03af
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE unsigned int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
new file mode 100644
index 00000000000..ca2e6bbd61c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
new file mode 100644
index 00000000000..0da8a835b5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
@@ -0,0 +1,30 @@
+__attribute__ ((noinline)) void
+calculate (const double *__restrict__ A, const double *__restrict__ B,
+ double *__restrict__ C)
+{
+ TYPE m = 0;
+ TYPE n = 0;
+ TYPE k = 0;
+
+ A = (const double *) __builtin_assume_aligned (A, 16);
+ B = (const double *) __builtin_assume_aligned (B, 16);
+ C = (double *) __builtin_assume_aligned (C, 16);
+
+ for (n = 0; n < 9; n++)
+ {
+ for (m = 0; m < 10; m++)
+ {
+ C[(n * 10) + m] = 0.0;
+ }
+
+ for (k = 0; k < 17; k++)
+ {
+#pragma simd
+ for (m = 0; m < 10; m++)
+ {
+ C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
+ }
+ }
+ }
+}
+
diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
index 0eb8db1b086..86bb64fe245 100644
--- a/gcc/tree-affine.c
+++ b/gcc/tree-affine.c
@@ -343,23 +343,25 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
wide_int minv, maxv;
/* If inner type has wrapping overflow behavior, fold conversion
for below case:
- (T1)(X - CST) -> (T1)X - (T1)CST
- if X - CST doesn't overflow by range information. Also handle
- (T1)(X + CST) as (T1)(X - (-CST)). */
+ (T1)(X *+- CST) -> (T1)X *+- (T1)CST
+ if X *+- CST doesn't overflow by range information. */
if (TYPE_UNSIGNED (itype)
&& TYPE_OVERFLOW_WRAPS (itype)
- && TREE_CODE (op0) == SSA_NAME
&& TREE_CODE (op1) == INTEGER_CST
- && icode != MULT_EXPR
- && get_range_info (op0, &minv, &maxv) == VR_RANGE)
+ && determine_value_range (op0, &minv, &maxv) == VR_RANGE)
{
- if (icode == PLUS_EXPR)
- op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
- if (wi::geu_p (minv, wi::to_wide (op1)))
+ if ((icode == PLUS_EXPR
+ && wi::leu_p (maxv + wi::to_wide (op1),
+ wi::to_wide (TYPE_MAX_VALUE (itype))))
+ || (icode == MULT_EXPR
+ && wi::leu_p (maxv * wi::to_wide (op1),
+ wi::to_wide (TYPE_MAX_VALUE (itype))))
+ || (icode == MINUS_EXPR
+ && wi::geu_p (minv, wi::to_wide (op1))))
{
op0 = fold_convert (otype, op0);
op1 = fold_convert (otype, op1);
- return expr_to_aff_combination (comb, MINUS_EXPR, otype,
+ return expr_to_aff_combination (comb, icode, otype,
op0, op1);
}
}
--
2.21.0.777.g83232e3864
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
2020-04-29 7:46 ` luoxhu
@ 2020-04-30 7:53 ` luoxhu
2020-05-06 12:09 ` Richard Biener
0 siblings, 1 reply; 9+ messages in thread
From: luoxhu @ 2020-04-30 7:53 UTC (permalink / raw)
To: Richard Biener
Cc: Xionghu Luo, gcc-patches, segher, dje.gcc, wschmidt, guojiufu,
linkw, ook, amker.cheng
Update the patch with overflow check. Bootstrap and regression tested PASS on Power8-LE.
Use determine_value_range to get value range info for fold convert expressions
with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
wrapping overflow inner type. i.e.:
(long unsigned int)((unsigned int)n * 10 + 1)
=>
(long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
With this patch for affine combination, load/store motion could detect
more address refs independency and promote some memory expressions to
registers within loop.
PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
gcc/ChangeLog
2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR tree-optimization/83403
* tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
determine_value_range, Add fold conversion of MULT_EXPR, fix the
previous PLUS_EXPR.
gcc/testsuite/ChangeLog
2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR tree-optimization/83403
* gcc.dg/tree-ssa/pr83403-1.c: New test.
* gcc.dg/tree-ssa/pr83403-2.c: New test.
* gcc.dg/tree-ssa/pr83403.h: New header.
---
gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 8 ++++++
gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 8 ++++++
gcc/testsuite/gcc.dg/tree-ssa/pr83403.h | 30 +++++++++++++++++++++++
gcc/tree-affine.c | 24 ++++++++++--------
4 files changed, 60 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
new file mode 100644
index 00000000000..748375b03af
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE unsigned int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
new file mode 100644
index 00000000000..ca2e6bbd61c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
new file mode 100644
index 00000000000..0da8a835b5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
@@ -0,0 +1,30 @@
+__attribute__ ((noinline)) void
+calculate (const double *__restrict__ A, const double *__restrict__ B,
+ double *__restrict__ C)
+{
+ TYPE m = 0;
+ TYPE n = 0;
+ TYPE k = 0;
+
+ A = (const double *) __builtin_assume_aligned (A, 16);
+ B = (const double *) __builtin_assume_aligned (B, 16);
+ C = (double *) __builtin_assume_aligned (C, 16);
+
+ for (n = 0; n < 9; n++)
+ {
+ for (m = 0; m < 10; m++)
+ {
+ C[(n * 10) + m] = 0.0;
+ }
+
+ for (k = 0; k < 17; k++)
+ {
+#pragma simd
+ for (m = 0; m < 10; m++)
+ {
+ C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
+ }
+ }
+ }
+}
+
diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
index 0eb8db1b086..5620e6bf28f 100644
--- a/gcc/tree-affine.c
+++ b/gcc/tree-affine.c
@@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
wide_int minv, maxv;
/* If inner type has wrapping overflow behavior, fold conversion
for below case:
- (T1)(X - CST) -> (T1)X - (T1)CST
- if X - CST doesn't overflow by range information. Also handle
- (T1)(X + CST) as (T1)(X - (-CST)). */
+ (T1)(X *+- CST) -> (T1)X *+- (T1)CST
+ if X *+- CST doesn't overflow by range information. */
if (TYPE_UNSIGNED (itype)
&& TYPE_OVERFLOW_WRAPS (itype)
- && TREE_CODE (op0) == SSA_NAME
&& TREE_CODE (op1) == INTEGER_CST
- && icode != MULT_EXPR
- && get_range_info (op0, &minv, &maxv) == VR_RANGE)
+ && determine_value_range (op0, &minv, &maxv) == VR_RANGE)
{
+ wi::overflow_type overflow = wi::OVF_NONE;
+ signop sign = UNSIGNED;
if (icode == PLUS_EXPR)
- op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
- if (wi::geu_p (minv, wi::to_wide (op1)))
+ wi::add (maxv, wi::to_wide (op1), sign, &overflow);
+ else if (icode == MULT_EXPR)
+ wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
+ else
+ wi::sub (minv, wi::to_wide (op1), sign, &overflow);
+
+ if (overflow == wi::OVF_NONE)
{
op0 = fold_convert (otype, op0);
op1 = fold_convert (otype, op1);
- return expr_to_aff_combination (comb, MINUS_EXPR, otype,
- op0, op1);
+ return expr_to_aff_combination (comb, icode, otype, op0,
+ op1);
}1
}
}
--
2.21.0.777.g83232e3864
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
2020-04-30 7:53 ` [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403) luoxhu
@ 2020-05-06 12:09 ` Richard Biener
2020-05-11 9:04 ` luoxhu
0 siblings, 1 reply; 9+ messages in thread
From: Richard Biener @ 2020-05-06 12:09 UTC (permalink / raw)
To: luoxhu
Cc: Xionghu Luo, gcc-patches, segher, dje.gcc, wschmidt, guojiufu,
linkw, ook, amker.cheng
On Thu, 30 Apr 2020, luoxhu wrote:
> Update the patch with overflow check. Bootstrap and regression tested PASS on Power8-LE.
>
>
> Use determine_value_range to get value range info for fold convert expressions
> with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
> wrapping overflow inner type. i.e.:
>
> (long unsigned int)((unsigned int)n * 10 + 1)
> =>
> (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
>
> With this patch for affine combination, load/store motion could detect
> more address refs independency and promote some memory expressions to
> registers within loop.
>
> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
This is OK for trunk if bootstrapped / tested properl.
Thanks,
Richard.
> gcc/ChangeLog
>
> 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com>
>
> PR tree-optimization/83403
> * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
> determine_value_range, Add fold conversion of MULT_EXPR, fix the
> previous PLUS_EXPR.
>
> gcc/testsuite/ChangeLog
>
> 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com>
>
> PR tree-optimization/83403
> * gcc.dg/tree-ssa/pr83403-1.c: New test.
> * gcc.dg/tree-ssa/pr83403-2.c: New test.
> * gcc.dg/tree-ssa/pr83403.h: New header.
> ---
> gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 8 ++++++
> gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 8 ++++++
> gcc/testsuite/gcc.dg/tree-ssa/pr83403.h | 30 +++++++++++++++++++++++
> gcc/tree-affine.c | 24 ++++++++++--------
> 4 files changed, 60 insertions(+), 10 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> new file mode 100644
> index 00000000000..748375b03af
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
> +
> +#define TYPE unsigned int
> +
> +#include "pr83403.h"
> +
> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> new file mode 100644
> index 00000000000..ca2e6bbd61c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
> +
> +#define TYPE int
> +
> +#include "pr83403.h"
> +
> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> new file mode 100644
> index 00000000000..0da8a835b5f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> @@ -0,0 +1,30 @@
> +__attribute__ ((noinline)) void
> +calculate (const double *__restrict__ A, const double *__restrict__ B,
> + double *__restrict__ C)
> +{
> + TYPE m = 0;
> + TYPE n = 0;
> + TYPE k = 0;
> +
> + A = (const double *) __builtin_assume_aligned (A, 16);
> + B = (const double *) __builtin_assume_aligned (B, 16);
> + C = (double *) __builtin_assume_aligned (C, 16);
> +
> + for (n = 0; n < 9; n++)
> + {
> + for (m = 0; m < 10; m++)
> + {
> + C[(n * 10) + m] = 0.0;
> + }
> +
> + for (k = 0; k < 17; k++)
> + {
> +#pragma simd
> + for (m = 0; m < 10; m++)
> + {
> + C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
> + }
> + }
> + }
> +}
> +
> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
> index 0eb8db1b086..5620e6bf28f 100644
> --- a/gcc/tree-affine.c
> +++ b/gcc/tree-affine.c
> @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
> wide_int minv, maxv;
> /* If inner type has wrapping overflow behavior, fold conversion
> for below case:
> - (T1)(X - CST) -> (T1)X - (T1)CST
> - if X - CST doesn't overflow by range information. Also handle
> - (T1)(X + CST) as (T1)(X - (-CST)). */
> + (T1)(X *+- CST) -> (T1)X *+- (T1)CST
> + if X *+- CST doesn't overflow by range information. */
> if (TYPE_UNSIGNED (itype)
> && TYPE_OVERFLOW_WRAPS (itype)
> - && TREE_CODE (op0) == SSA_NAME
> && TREE_CODE (op1) == INTEGER_CST
> - && icode != MULT_EXPR
> - && get_range_info (op0, &minv, &maxv) == VR_RANGE)
> + && determine_value_range (op0, &minv, &maxv) == VR_RANGE)
> {
> + wi::overflow_type overflow = wi::OVF_NONE;
> + signop sign = UNSIGNED;
> if (icode == PLUS_EXPR)
> - op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
> - if (wi::geu_p (minv, wi::to_wide (op1)))
> + wi::add (maxv, wi::to_wide (op1), sign, &overflow);
> + else if (icode == MULT_EXPR)
> + wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
> + else
> + wi::sub (minv, wi::to_wide (op1), sign, &overflow);
> +
> + if (overflow == wi::OVF_NONE)
> {
> op0 = fold_convert (otype, op0);
> op1 = fold_convert (otype, op1);
> - return expr_to_aff_combination (comb, MINUS_EXPR, otype,
> - op0, op1);
> + return expr_to_aff_combination (comb, icode, otype, op0,
> + op1);
> }1
> }
> }
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
2020-05-06 12:09 ` Richard Biener
@ 2020-05-11 9:04 ` luoxhu
2020-05-11 9:12 ` Richard Biener
0 siblings, 1 reply; 9+ messages in thread
From: luoxhu @ 2020-05-11 9:04 UTC (permalink / raw)
To: Richard Biener
Cc: luoxhu, gcc-patches, segher, dje.gcc, wschmidt, guojiufu, linkw,
ook, amker.cheng
在 2020-05-06 20:09,Richard Biener 写道:
> On Thu, 30 Apr 2020, luoxhu wrote:
>
>> Update the patch with overflow check. Bootstrap and regression tested
>> PASS on Power8-LE.
>>
>>
>> Use determine_value_range to get value range info for fold convert
>> expressions
>> with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not
>> overflow on
>> wrapping overflow inner type. i.e.:
>>
>> (long unsigned int)((unsigned int)n * 10 + 1)
>> =>
>> (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
>>
>> With this patch for affine combination, load/store motion could detect
>> more address refs independency and promote some memory expressions to
>> registers within loop.
>>
>> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
>> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
>
> This is OK for trunk if bootstrapped / tested properl.
Bootstrap and regression tested pass on power8LE, committed to
r11-259-g0447929f11e6a3e1b076841712b90a8b6bc7d33a, is it necessary
to backport it to gcc-10&gcc-9?
Thanks,
Xionghu
>
> Thanks,
> Richard.
>
>> gcc/ChangeLog
>>
>> 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com>
>>
>> PR tree-optimization/83403
>> * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
>> determine_value_range, Add fold conversion of MULT_EXPR, fix the
>> previous PLUS_EXPR.
>>
>> gcc/testsuite/ChangeLog
>>
>> 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com>
>>
>> PR tree-optimization/83403
>> * gcc.dg/tree-ssa/pr83403-1.c: New test.
>> * gcc.dg/tree-ssa/pr83403-2.c: New test.
>> * gcc.dg/tree-ssa/pr83403.h: New header.
>> ---
>> gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 8 ++++++
>> gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 8 ++++++
>> gcc/testsuite/gcc.dg/tree-ssa/pr83403.h | 30
>> +++++++++++++++++++++++
>> gcc/tree-affine.c | 24 ++++++++++--------
>> 4 files changed, 60 insertions(+), 10 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
>>
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
>> new file mode 100644
>> index 00000000000..748375b03af
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
>> @@ -0,0 +1,8 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
>> +
>> +#define TYPE unsigned int
>> +
>> +#include "pr83403.h"
>> +
>> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10
>> "lim2" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
>> new file mode 100644
>> index 00000000000..ca2e6bbd61c
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
>> @@ -0,0 +1,8 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
>> +
>> +#define TYPE int
>> +
>> +#include "pr83403.h"
>> +
>> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10
>> "lim2" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
>> new file mode 100644
>> index 00000000000..0da8a835b5f
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
>> @@ -0,0 +1,30 @@
>> +__attribute__ ((noinline)) void
>> +calculate (const double *__restrict__ A, const double *__restrict__
>> B,
>> + double *__restrict__ C)
>> +{
>> + TYPE m = 0;
>> + TYPE n = 0;
>> + TYPE k = 0;
>> +
>> + A = (const double *) __builtin_assume_aligned (A, 16);
>> + B = (const double *) __builtin_assume_aligned (B, 16);
>> + C = (double *) __builtin_assume_aligned (C, 16);
>> +
>> + for (n = 0; n < 9; n++)
>> + {
>> + for (m = 0; m < 10; m++)
>> + {
>> + C[(n * 10) + m] = 0.0;
>> + }
>> +
>> + for (k = 0; k < 17; k++)
>> + {
>> +#pragma simd
>> + for (m = 0; m < 10; m++)
>> + {
>> + C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
>> + }
>> + }
>> + }
>> +}
>> +
>> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
>> index 0eb8db1b086..5620e6bf28f 100644
>> --- a/gcc/tree-affine.c
>> +++ b/gcc/tree-affine.c
>> @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb,
>> tree_code code, tree type,
>> wide_int minv, maxv;
>> /* If inner type has wrapping overflow behavior, fold conversion
>> for below case:
>> - (T1)(X - CST) -> (T1)X - (T1)CST
>> - if X - CST doesn't overflow by range information. Also
>> handle
>> - (T1)(X + CST) as (T1)(X - (-CST)). */
>> + (T1)(X *+- CST) -> (T1)X *+- (T1)CST
>> + if X *+- CST doesn't overflow by range information. */
>> if (TYPE_UNSIGNED (itype)
>> && TYPE_OVERFLOW_WRAPS (itype)
>> - && TREE_CODE (op0) == SSA_NAME
>> && TREE_CODE (op1) == INTEGER_CST
>> - && icode != MULT_EXPR
>> - && get_range_info (op0, &minv, &maxv) == VR_RANGE)
>> + && determine_value_range (op0, &minv, &maxv) == VR_RANGE)
>> {
>> + wi::overflow_type overflow = wi::OVF_NONE;
>> + signop sign = UNSIGNED;
>> if (icode == PLUS_EXPR)
>> - op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
>> - if (wi::geu_p (minv, wi::to_wide (op1)))
>> + wi::add (maxv, wi::to_wide (op1), sign, &overflow);
>> + else if (icode == MULT_EXPR)
>> + wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
>> + else
>> + wi::sub (minv, wi::to_wide (op1), sign, &overflow);
>> +
>> + if (overflow == wi::OVF_NONE)
>> {
>> op0 = fold_convert (otype, op0);
>> op1 = fold_convert (otype, op1);
>> - return expr_to_aff_combination (comb, MINUS_EXPR, otype,
>> - op0, op1);
>> + return expr_to_aff_combination (comb, icode, otype, op0,
>> + op1);
>> }1
>> }
>> }
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
2020-05-11 9:04 ` luoxhu
@ 2020-05-11 9:12 ` Richard Biener
0 siblings, 0 replies; 9+ messages in thread
From: Richard Biener @ 2020-05-11 9:12 UTC (permalink / raw)
To: luoxhu
Cc: luoxhu, gcc-patches, segher, dje.gcc, wschmidt, guojiufu, linkw,
ook, amker.cheng
On Mon, 11 May 2020, luoxhu wrote:
> 在 2020-05-06 20:09,Richard Biener 写道:
> > On Thu, 30 Apr 2020, luoxhu wrote:
> >
> >> Update the patch with overflow check. Bootstrap and regression tested PASS
> >> on Power8-LE.
> >>
> >>
> >> Use determine_value_range to get value range info for fold convert
> >> expressions
> >> with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
> >> wrapping overflow inner type. i.e.:
> >>
> >> (long unsigned int)((unsigned int)n * 10 + 1)
> >> =>
> >> (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
> >>
> >> With this patch for affine combination, load/store motion could detect
> >> more address refs independency and promote some memory expressions to
> >> registers within loop.
> >>
> >> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
> >> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
> >
> > This is OK for trunk if bootstrapped / tested properl.
>
>
> Bootstrap and regression tested pass on power8LE, committed to
> r11-259-g0447929f11e6a3e1b076841712b90a8b6bc7d33a, is it necessary
> to backport it to gcc-10&gcc-9?
For sure not.
Richard.
>
> Thanks,
> Xionghu
>
>
> >
> > Thanks,
> > Richard.
> >
> >> gcc/ChangeLog
> >>
> >> 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com>
> >>
> >> PR tree-optimization/83403
> >> * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
> >> determine_value_range, Add fold conversion of MULT_EXPR, fix the
> >> previous PLUS_EXPR.
> >>
> >> gcc/testsuite/ChangeLog
> >>
> >> 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com>
> >>
> >> PR tree-optimization/83403
> >> * gcc.dg/tree-ssa/pr83403-1.c: New test.
> >> * gcc.dg/tree-ssa/pr83403-2.c: New test.
> >> * gcc.dg/tree-ssa/pr83403.h: New header.
> >> ---
> >> gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 8 ++++++
> >> gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 8 ++++++
> >> gcc/testsuite/gcc.dg/tree-ssa/pr83403.h | 30
> >> +++++++++++++++++++++++
> >> gcc/tree-affine.c | 24 ++++++++++--------
> >> 4 files changed, 60 insertions(+), 10 deletions(-)
> >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> >>
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> >> new file mode 100644
> >> index 00000000000..748375b03af
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> >> @@ -0,0 +1,8 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
> >> +
> >> +#define TYPE unsigned int
> >> +
> >> +#include "pr83403.h"
> >> +
> >> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2"
> >> } } */
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> >> new file mode 100644
> >> index 00000000000..ca2e6bbd61c
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> >> @@ -0,0 +1,8 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
> >> +
> >> +#define TYPE int
> >> +
> >> +#include "pr83403.h"
> >> +
> >> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2"
> >> } } */
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> >> new file mode 100644
> >> index 00000000000..0da8a835b5f
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> >> @@ -0,0 +1,30 @@
> >> +__attribute__ ((noinline)) void
> >> +calculate (const double *__restrict__ A, const double *__restrict__ B,
> >> + double *__restrict__ C)
> >> +{
> >> + TYPE m = 0;
> >> + TYPE n = 0;
> >> + TYPE k = 0;
> >> +
> >> + A = (const double *) __builtin_assume_aligned (A, 16);
> >> + B = (const double *) __builtin_assume_aligned (B, 16);
> >> + C = (double *) __builtin_assume_aligned (C, 16);
> >> +
> >> + for (n = 0; n < 9; n++)
> >> + {
> >> + for (m = 0; m < 10; m++)
> >> + {
> >> + C[(n * 10) + m] = 0.0;
> >> + }
> >> +
> >> + for (k = 0; k < 17; k++)
> >> + {
> >> +#pragma simd
> >> + for (m = 0; m < 10; m++)
> >> + {
> >> + C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
> >> + }
> >> + }
> >> + }
> >> +}
> >> +
> >> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
> >> index 0eb8db1b086..5620e6bf28f 100644
> >> --- a/gcc/tree-affine.c
> >> +++ b/gcc/tree-affine.c
> >> @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code
> >> code, tree type,
> >> wide_int minv, maxv;
> >> /* If inner type has wrapping overflow behavior, fold conversion
> >> for below case:
> >> - (T1)(X - CST) -> (T1)X - (T1)CST
> >> - if X - CST doesn't overflow by range information. Also handle
> >> - (T1)(X + CST) as (T1)(X - (-CST)). */
> >> + (T1)(X *+- CST) -> (T1)X *+- (T1)CST
> >> + if X *+- CST doesn't overflow by range information. */
> >> if (TYPE_UNSIGNED (itype)
> >> && TYPE_OVERFLOW_WRAPS (itype)
> >> - && TREE_CODE (op0) == SSA_NAME
> >> && TREE_CODE (op1) == INTEGER_CST
> >> - && icode != MULT_EXPR
> >> - && get_range_info (op0, &minv, &maxv) == VR_RANGE)
> >> + && determine_value_range (op0, &minv, &maxv) == VR_RANGE)
> >> {
> >> + wi::overflow_type overflow = wi::OVF_NONE;
> >> + signop sign = UNSIGNED;
> >> if (icode == PLUS_EXPR)
> >> - op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
> >> - if (wi::geu_p (minv, wi::to_wide (op1)))
> >> + wi::add (maxv, wi::to_wide (op1), sign, &overflow);
> >> + else if (icode == MULT_EXPR)
> >> + wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
> >> + else
> >> + wi::sub (minv, wi::to_wide (op1), sign, &overflow);
> >> +
> >> + if (overflow == wi::OVF_NONE)
> >> {
> >> op0 = fold_convert (otype, op0);
> >> op1 = fold_convert (otype, op1);
> >> - return expr_to_aff_combination (comb, MINUS_EXPR, otype,
> >> - op0, op1);
> >> + return expr_to_aff_combination (comb, icode, otype, op0,
> >> + op1);
> >> }1
> >> }
> >> }
> >>
>
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2020-05-11 9:12 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-28 6:17 [PATCH] Add value range info for affine combination to improve store motion (PR83403) Xionghu Luo
2020-04-28 7:01 ` Richard Biener
2020-04-28 10:04 ` luoxhu
2020-04-28 10:30 ` Richard Biener
2020-04-29 7:46 ` luoxhu
2020-04-30 7:53 ` [PATCH v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403) luoxhu
2020-05-06 12:09 ` Richard Biener
2020-05-11 9:04 ` luoxhu
2020-05-11 9:12 ` Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).