public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/autopar_devel] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
@ 2020-08-22 21:04 Giuliano Belinassi
  0 siblings, 0 replies; only message in thread
From: Giuliano Belinassi @ 2020-08-22 21:04 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7f96e4628c8888f0ec3d73a0b3890acfe64a4b1d

commit 7f96e4628c8888f0ec3d73a0b3890acfe64a4b1d
Author: Xionghu Luo <luoxhu@linux.ibm.com>
Date:   Sun May 10 21:06:20 2020 -0500

    Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403)
    
    Use determine_value_range to get value range info for fold convert expressions
    with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
    wrapping overflow inner type.  i.e.:
    
    (long unsigned int)((unsigned int)n * 10 + 1)
    =>
    (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
    
    With this patch for affine combination, load/store motion could detect
    more address refs independency and promote some memory expressions to
    registers within loop.
    
    PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
    to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.
    
    Bootstrap and regression tested pass on Power8-LE.
    
    gcc/ChangeLog
    
            2020-05-11  Xiong Hu Luo  <luoxhu@linux.ibm.com>
    
            PR tree-optimization/83403
            * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
            determine_value_range, Add fold conversion of MULT_EXPR, fix the
            previous PLUS_EXPR.
    
    gcc/testsuite/ChangeLog
    
            2020-05-11  Xiong Hu Luo  <luoxhu@linux.ibm.com>
    
            PR tree-optimization/83403
            * gcc.dg/tree-ssa/pr83403-1.c: New test.
            * gcc.dg/tree-ssa/pr83403-2.c: New test.
            * gcc.dg/tree-ssa/pr83403.h: New header.

Diff:
---
 gcc/ChangeLog                             |  7 +++++++
 gcc/testsuite/ChangeLog                   |  7 +++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c |  8 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c |  8 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h   | 30 ++++++++++++++++++++++++++++++
 gcc/tree-affine.c                         | 24 ++++++++++++++----------
 6 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b0f04132534..d23ebffc24b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2020-05-11  Xiong Hu Luo  <luoxhu@linux.ibm.com>
+
+	PR tree-optimization/83403
+	* tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
+	determine_value_range, Add fold conversion of MULT_EXPR, fix the
+	previous PLUS_EXPR.
+
 2020-05-10  Gerald Pfeifer  <gerald@pfeifer.com>
 
 	* config/i386/i386-c.c (ix86_target_macros): Define _ILP32 and
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 791c62aeab8..aa11ca3fd99 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2020-05-11  Xiong Hu Luo  <luoxhu@linux.ibm.com>
+
+	PR tree-optimization/83403
+	* gcc.dg/tree-ssa/pr83403-1.c: New test.
+	* gcc.dg/tree-ssa/pr83403-2.c: New test.
+	* gcc.dg/tree-ssa/pr83403.h: New header.
+
 2020-05-10  Harald Anlauf  <anlauf@gmx.de>
 
 	PR fortran/93499
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
new file mode 100644
index 00000000000..748375b03af
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE unsigned int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
new file mode 100644
index 00000000000..ca2e6bbd61c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
new file mode 100644
index 00000000000..0da8a835b5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
@@ -0,0 +1,30 @@
+__attribute__ ((noinline)) void
+calculate (const double *__restrict__ A, const double *__restrict__ B,
+	   double *__restrict__ C)
+{
+  TYPE m = 0;
+  TYPE n = 0;
+  TYPE k = 0;
+
+  A = (const double *) __builtin_assume_aligned (A, 16);
+  B = (const double *) __builtin_assume_aligned (B, 16);
+  C = (double *) __builtin_assume_aligned (C, 16);
+
+  for (n = 0; n < 9; n++)
+    {
+      for (m = 0; m < 10; m++)
+	{
+	  C[(n * 10) + m] = 0.0;
+	}
+
+      for (k = 0; k < 17; k++)
+	{
+#pragma simd
+	  for (m = 0; m < 10; m++)
+	    {
+	      C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
+	    }
+	}
+    }
+}
+
diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
index 0eb8db1b086..5620e6bf28f 100644
--- a/gcc/tree-affine.c
+++ b/gcc/tree-affine.c
@@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
 	    wide_int minv, maxv;
 	    /* If inner type has wrapping overflow behavior, fold conversion
 	       for below case:
-		 (T1)(X - CST) -> (T1)X - (T1)CST
-	       if X - CST doesn't overflow by range information.  Also handle
-	       (T1)(X + CST) as (T1)(X - (-CST)).  */
+		 (T1)(X *+- CST) -> (T1)X *+- (T1)CST
+	       if X *+- CST doesn't overflow by range information.  */
 	    if (TYPE_UNSIGNED (itype)
 		&& TYPE_OVERFLOW_WRAPS (itype)
-		&& TREE_CODE (op0) == SSA_NAME
 		&& TREE_CODE (op1) == INTEGER_CST
-		&& icode != MULT_EXPR
-		&& get_range_info (op0, &minv, &maxv) == VR_RANGE)
+		&& determine_value_range (op0, &minv, &maxv) == VR_RANGE)
 	      {
+		wi::overflow_type overflow = wi::OVF_NONE;
+		signop sign = UNSIGNED;
 		if (icode == PLUS_EXPR)
-		  op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
-		if (wi::geu_p (minv, wi::to_wide (op1)))
+		  wi::add (maxv, wi::to_wide (op1), sign, &overflow);
+		else if (icode == MULT_EXPR)
+		  wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
+		else
+		  wi::sub (minv, wi::to_wide (op1), sign, &overflow);
+
+		if (overflow == wi::OVF_NONE)
 		  {
 		    op0 = fold_convert (otype, op0);
 		    op1 = fold_convert (otype, op1);
-		    return expr_to_aff_combination (comb, MINUS_EXPR, otype,
-						    op0, op1);
+		    return expr_to_aff_combination (comb, icode, otype, op0,
+						    op1);
 		  }
 	      }
 	  }


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-08-22 21:04 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-22 21:04 [gcc/devel/autopar_devel] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403) Giuliano Belinassi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).