Use max_loop_iterations in transform_to_exit_first_loop_alt 2015-06-29 Tom de Vries PR tree-optimization/66652 * tree-parloops.c (try_transform_to_exit_first_loop_alt): Use max_loop_iterations to determine if nit + 1 overflows. * testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c (f): Rewrite using restrict pointers. (main): Add arguments to calls to f. * testsuite/libgomp.c/parloops-exit-first-loop-alt.c: Same. * gcc.dg/parloops-exit-first-loop-alt-pr66652.c: New test. * gcc.dg/parloops-exit-first-loop-alt-3.c (f): Rewrite using restrict pointers. * gcc.dg/parloops-exit-first-loop-alt.c: Same. --- .../gcc.dg/parloops-exit-first-loop-alt-3.c | 2 +- .../gcc.dg/parloops-exit-first-loop-alt-pr66652.c | 31 ++++++++++++++++++++++ .../gcc.dg/parloops-exit-first-loop-alt.c | 19 +++++-------- gcc/tree-parloops.c | 31 ++++++++++++++++++++++ .../libgomp.c/parloops-exit-first-loop-alt-3.c | 6 ++--- .../libgomp.c/parloops-exit-first-loop-alt.c | 7 ++--- 6 files changed, 77 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c index b0fde37..fec53a1 100644 --- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c +++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-3.c @@ -7,7 +7,7 @@ unsigned int *a; unsigned int -f (unsigned int n) +f (unsigned int n, unsigned int *__restrict__ a) { int i; unsigned int sum = 1; diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c new file mode 100644 index 0000000..2ea097d --- /dev/null +++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt-pr66652.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target pthread } */ +/* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops" } */ + +#include +#include +#include + +unsigned int +f (unsigned int n, unsigned int sum) +{ + unsigned int i; + + i = UINT_MAX; + do + { + sum += i % 13; + i++; + } + while (i < n - 1); + + return sum; +} + +/* Four times % 13: + - once in f._loopfn.0 + - once in the parallel + - once in the low iteration count loop + - once for a peeled off last iteration following the parallel. + In other words, we want try_transform_to_exit_first_loop_alt to fail. */ +/* { dg-final { scan-tree-dump-times "(?n)% 13" 4 "parloops" } } */ diff --git a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt.c b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt.c index b36f01b..e088fa1 100644 --- a/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt.c +++ b/gcc/testsuite/gcc.dg/parloops-exit-first-loop-alt.c @@ -4,14 +4,9 @@ /* Variable bound, vector addition. */ -#define N 1000 - -unsigned int a[N]; -unsigned int b[N]; -unsigned int c[N]; - void -f (unsigned int n) +f (unsigned int n, unsigned int *__restrict__ a, unsigned int *__restrict__ b, + unsigned int *__restrict__ c) { int i; @@ -19,9 +14,9 @@ f (unsigned int n) c[i] = a[i] + b[i]; } -/* Three times three array accesses: - - three in f._loopfn.0 - - three in the parallel - - three in the low iteration count loop +/* Three times a store: + - one in f._loopfn.0 + - one in the parallel + - one in the low iteration count loop Crucially, none for a peeled off last iteration following the parallel. */ -/* { dg-final { scan-tree-dump-times "(?n)\\\[i" 9 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "(?n)^ \\*_\[0-9\]*" 3 "parloops" } } */ diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index cb0ba54..32d059a 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -1795,8 +1795,39 @@ try_transform_to_exit_first_loop_alt (struct loop *loop, gcc_assert (TREE_CODE (nit) == SSA_NAME); + /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an + iv with base 0 and step 1 that is incremented in the latch, like this: + + : + # iv_1 = PHI <0 (preheader), iv_2 (latch)> + ... + if (iv_1 < nit) + goto ; + else + goto ; + + : + iv_2 = ivtmp_1 + 1; + goto ; + + The range of iv_1 is [0, nit]. The latch edge is taken for + iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the + number of latch executions is equal to nit. + + The function max_loop_iterations gives us the maximum number of latch + executions, so it gives us the maximum value of nit. */ + widest_int nit_max; + if (!max_loop_iterations (loop, &nit_max)) + return false; + + /* Check if nit + 1 overflows. */ + widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type)); + if (!wi::lts_p (nit_max, type_max)) + return false; + gimple def = SSA_NAME_DEF_STMT (nit); + /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */ if (def && is_gimple_assign (def) && gimple_assign_rhs_code (def) == PLUS_EXPR) diff --git a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c index bf06e0e..78365e8 100644 --- a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c +++ b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c @@ -10,7 +10,7 @@ unsigned int *a; unsigned int __attribute__((noclone,noinline)) -f (unsigned int n) +f (unsigned int n, unsigned int *__restrict__ a) { int i; unsigned int sum = 1; @@ -32,12 +32,12 @@ main (void) array[i] = i % 7; a = &array[0]; - res = f (N); + res = f (N, a); if (res != 11995) abort (); /* Test low iteration count case. */ - res = f (10); + res = f (10, a); if (res != 25) abort (); diff --git a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c index 35c47d7..a49454b 100644 --- a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c +++ b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c @@ -13,7 +13,8 @@ unsigned int b[N]; unsigned int c[N]; void __attribute__((noclone,noinline)) -f (unsigned int n) +f (unsigned int n, unsigned int *__restrict__ a, unsigned int *__restrict__ b, + unsigned int *__restrict__ c) { int i; @@ -44,7 +45,7 @@ main (void) init (); - f (N); + f (N, a, b, c); for (i = 0; i < N; i++) { @@ -58,7 +59,7 @@ main (void) init (); - f (10); + f (10, a, b, c); for (i = 0; i < N; i++) { -- 1.9.1