Hi! On Sat, 15 Nov 2014 13:14:52 +0100, Tom de Vries wrote: > I'm submitting a patch series with initial support for the oacc kernels directive. Committed to gomp-4_0-branch in r222287: commit abaf92b2db3c0799edac63cfb846af2dbde47423 Author: tschwinge Date: Tue Apr 21 20:27:40 2015 +0000 Handle global loop counters in c/c++ oacc kernels gcc/ * passes.def: Add pass_fre after pass_ch_oacc_kernels. gcc/testsuite/ * c-c++-common/goacc/kernels-counter-vars-function-scope.c: New test. * c-c++-common/goacc/kernels-one-counter-var.c: New test. * g++.dg/ipa/devirt-37.C: Update for new pass_fre. * g++.dg/ipa/devirt-40.C: Likewise. * g++.dg/tree-ssa/pr61034.C: Likewise. * gcc.dg/ipa/ipa-pta-13.c: Likewise. * gcc.dg/ipa/ipa-pta-3.c: Likewise. * gcc.dg/ipa/ipa-pta-4.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@222287 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 2 + gcc/passes.def | 1 + gcc/testsuite/ChangeLog.gomp | 9 ++++ .../goacc/kernels-counter-vars-function-scope.c | 55 ++++++++++++++++++++ .../c-c++-common/goacc/kernels-one-counter-var.c | 54 +++++++++++++++++++ gcc/testsuite/g++.dg/ipa/devirt-37.C | 12 ++--- gcc/testsuite/g++.dg/ipa/devirt-40.C | 6 +-- gcc/testsuite/g++.dg/tree-ssa/pr61034.C | 10 ++-- gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c | 6 +-- gcc/testsuite/gcc.dg/ipa/ipa-pta-3.c | 6 +-- gcc/testsuite/gcc.dg/ipa/ipa-pta-4.c | 6 +-- 11 files changed, 144 insertions(+), 23 deletions(-) diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp index f14c3718..b1933ba 100644 --- gcc/ChangeLog.gomp +++ gcc/ChangeLog.gomp @@ -1,5 +1,7 @@ 2015-04-21 Tom de Vries + * passes.def: Add pass_fre after pass_ch_oacc_kernels. + * passes.def: Add pass_scev_cprop to pass_oacc_kernels. * tree-ssa-loop.c (pass_scev_cprop::clone): New function. diff --git gcc/passes.def gcc/passes.def index 3e85808..04cbba0 100644 --- gcc/passes.def +++ gcc/passes.def @@ -91,6 +91,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_oacc_kernels); PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels) NEXT_PASS (pass_ch_oacc_kernels); + NEXT_PASS (pass_fre); NEXT_PASS (pass_tree_loop_init); NEXT_PASS (pass_lim); NEXT_PASS (pass_copy_prop); diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp index eed22e2..ed80f5b 100644 --- gcc/testsuite/ChangeLog.gomp +++ gcc/testsuite/ChangeLog.gomp @@ -1,6 +1,15 @@ 2015-04-21 Tom de Vries Thomas Schwinge + * c-c++-common/goacc/kernels-counter-vars-function-scope.c: New test. + * c-c++-common/goacc/kernels-one-counter-var.c: New test. + * g++.dg/ipa/devirt-37.C: Update for new pass_fre. + * g++.dg/ipa/devirt-40.C: Likewise. + * g++.dg/tree-ssa/pr61034.C: Likewise. + * gcc.dg/ipa/ipa-pta-13.c: Likewise. + * gcc.dg/ipa/ipa-pta-3.c: Likewise. + * gcc.dg/ipa/ipa-pta-4.c: Likewise. + * gcc.dg/pr41488.c: Update for new pass_scev_cprop. * gcc.dg/tree-ssa/loop-17.c: Likewise. * gcc.dg/tree-ssa/loop-39.c: Likewise. diff --git gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c new file mode 100644 index 0000000..06cdb29 --- /dev/null +++ gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c @@ -0,0 +1,55 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ +/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + COUNTERTYPE i; + COUNTERTYPE ii; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + + for (i = 0; i < N; i++) + a[i] = i * 2; + + for (i = 0; i < N; i++) + b[i] = i * 4; + +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { + for (ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + + for (i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only one loop is analyzed, and that it can be parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ + +/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c new file mode 100644 index 0000000..2699437 --- /dev/null +++ gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c @@ -0,0 +1,54 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ +/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + COUNTERTYPE i; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + + for (i = 0; i < N; i++) + a[i] = i * 2; + + for (i = 0; i < N; i++) + b[i] = i * 4; + +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { + for (i = 0; i < N; i++) + c[i] = a[i] + b[i]; + } + + for (i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only one loop is analyzed, and that it can be parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ + +/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git gcc/testsuite/g++.dg/ipa/devirt-37.C gcc/testsuite/g++.dg/ipa/devirt-37.C index 7e1acdc..eb2c7f2 100644 --- gcc/testsuite/g++.dg/ipa/devirt-37.C +++ gcc/testsuite/g++.dg/ipa/devirt-37.C @@ -1,4 +1,4 @@ -/* { dg-options "-fpermissive -O2 -fno-indirect-inlining -fno-devirtualize-speculatively -fdump-tree-fre2-details -fno-early-inlining" } */ +/* { dg-options "-fpermissive -O2 -fno-indirect-inlining -fno-devirtualize-speculatively -fdump-tree-fre3-details -fno-early-inlining" } */ #include struct A {virtual void test() {abort ();}}; struct B:A @@ -30,8 +30,8 @@ t() /* After inlining the call within constructor needs to be checked to not go into a basetype. We should see the vtbl store and we should notice extcall as possibly clobbering the type but ignore it because b is in static storage. */ -/* { dg-final { scan-tree-dump "No dynamic type change found." "fre2" } } */ -/* { dg-final { scan-tree-dump "Checking vtbl store:" "fre2" } } */ -/* { dg-final { scan-tree-dump "Function call may change dynamic type:extcall" "fre2" } } */ -/* { dg-final { scan-tree-dump "converting indirect call to function virtual void" "fre2" } } */ -/* { dg-final { cleanup-tree-dump "fre2" } } */ +/* { dg-final { scan-tree-dump "No dynamic type change found." "fre3" } } */ +/* { dg-final { scan-tree-dump "Checking vtbl store:" "fre3" } } */ +/* { dg-final { scan-tree-dump "Function call may change dynamic type:extcall" "fre3" } } */ +/* { dg-final { scan-tree-dump "converting indirect call to function virtual void" "fre3" } } */ +/* { dg-final { cleanup-tree-dump "fre3" } } */ diff --git gcc/testsuite/g++.dg/ipa/devirt-40.C gcc/testsuite/g++.dg/ipa/devirt-40.C index 79cb129..7e4ae7c 100644 --- gcc/testsuite/g++.dg/ipa/devirt-40.C +++ gcc/testsuite/g++.dg/ipa/devirt-40.C @@ -1,4 +1,4 @@ -/* { dg-options "-O2 -fdump-tree-fre2-details" } */ +/* { dg-options "-O2 -fdump-tree-fre3-details" } */ typedef enum { } UErrorCode; @@ -19,5 +19,5 @@ A::m_fn1 (UnicodeString &, int &p2, UErrorCode &) const UnicodeString a[2]; } -/* { dg-final { scan-tree-dump-not "\\n OBJ_TYPE_REF" "fre2" } } */ -/* { dg-final { cleanup-tree-dump "fre2" } } */ +/* { dg-final { scan-tree-dump-not "\\n OBJ_TYPE_REF" "fre3" } } */ +/* { dg-final { cleanup-tree-dump "fre3" } } */ diff --git gcc/testsuite/g++.dg/tree-ssa/pr61034.C gcc/testsuite/g++.dg/tree-ssa/pr61034.C index 9ec3995..78417a1 100644 --- gcc/testsuite/g++.dg/tree-ssa/pr61034.C +++ gcc/testsuite/g++.dg/tree-ssa/pr61034.C @@ -1,5 +1,5 @@ // { dg-do compile } -// { dg-options "-O3 -fdump-tree-fre2" } +// { dg-options "-O3 -fdump-tree-fre3" } #define assume(x) if(!(x))__builtin_unreachable() @@ -41,7 +41,7 @@ bool f(I a, I b, I c, I d) { // a bunch of conditional free()s and unreachable()s. // This works only if everything is inlined into 'f'. -// { dg-final { scan-tree-dump-times ";; Function" 1 "fre2" } } -// { dg-final { scan-tree-dump-times "free" 19 "fre2" } } -// { dg-final { scan-tree-dump-times "unreachable" 11 "fre2" } } -// { dg-final { cleanup-tree-dump "fre2" } } +// { dg-final { scan-tree-dump-times ";; Function" 1 "fre3" } } +// { dg-final { scan-tree-dump-times "free" 19 "fre3" } } +// { dg-final { scan-tree-dump-times "unreachable" 11 "fre3" } } +// { dg-final { cleanup-tree-dump "fre3" } } diff --git gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c index f7f95f4..8d73900 100644 --- gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c +++ gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c @@ -1,5 +1,5 @@ /* { dg-do link } */ -/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre2 -fno-ipa-icf" } */ +/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre3 -fno-ipa-icf" } */ static int x, y; @@ -54,9 +54,9 @@ int main() local_address_taken (&y); /* As we are computing flow- and context-insensitive we may not CSE the load of x here. */ - /* { dg-final { scan-tree-dump " = x;" "fre2" } } */ + /* { dg-final { scan-tree-dump " = x;" "fre3" } } */ return x; } /* { dg-final { cleanup-ipa-dump "pta" } } */ -/* { dg-final { cleanup-tree-dump "fre2" } } */ +/* { dg-final { cleanup-tree-dump "fre3" } } */ diff --git gcc/testsuite/gcc.dg/ipa/ipa-pta-3.c gcc/testsuite/gcc.dg/ipa/ipa-pta-3.c index 4790080..2398a21 100644 --- gcc/testsuite/gcc.dg/ipa/ipa-pta-3.c +++ gcc/testsuite/gcc.dg/ipa/ipa-pta-3.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre2-details" } */ +/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre3-details" } */ static int __attribute__((noinline,noclone)) foo (int *p, int *q) @@ -23,6 +23,6 @@ int main() /* { dg-final { scan-ipa-dump "foo.arg0 = &a" "pta" } } */ /* { dg-final { scan-ipa-dump "foo.arg1 = &b" "pta" } } */ -/* { dg-final { scan-tree-dump "Replaced \\\*p_2\\\(D\\\) with 1" "fre2" } } */ -/* { dg-final { cleanup-tree-dump "fre2" } } */ +/* { dg-final { scan-tree-dump "Replaced \\\*p_2\\\(D\\\) with 1" "fre3" } } */ +/* { dg-final { cleanup-tree-dump "fre3" } } */ /* { dg-final { cleanup-ipa-dump "pta" } } */ diff --git gcc/testsuite/gcc.dg/ipa/ipa-pta-4.c gcc/testsuite/gcc.dg/ipa/ipa-pta-4.c index bf6fa28..b72489f 100644 --- gcc/testsuite/gcc.dg/ipa/ipa-pta-4.c +++ gcc/testsuite/gcc.dg/ipa/ipa-pta-4.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre2-details" } */ +/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta-details -fdump-tree-fre3-details" } */ int a, b; @@ -28,6 +28,6 @@ int main() /* { dg-final { scan-ipa-dump "foo.arg0 = &a" "pta" } } */ /* { dg-final { scan-ipa-dump "foo.arg1 = &b" "pta" } } */ -/* { dg-final { scan-tree-dump "Replaced \\\*p_2\\\(D\\\) with 1" "fre2" } } */ -/* { dg-final { cleanup-tree-dump "fre2" } } */ +/* { dg-final { scan-tree-dump "Replaced \\\*p_2\\\(D\\\) with 1" "fre3" } } */ +/* { dg-final { cleanup-tree-dump "fre3" } } */ /* { dg-final { cleanup-ipa-dump "pta" } } */ Grüße, Thomas