Handle nested loops in kernels regions 2015-07-12 Tom de Vries * omp-low.c (build_receiver_ref): Mark *.omp_data_i as non-trapping. * tree-parloops.c (gen_parallel_loop): Add LOOPS_NEED_FIXUP to loop state. (parallelize_loops): Allow nested loops. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c: New test. * c-c++-common/goacc/kernels-loop-nest.c: New test. --- gcc/omp-low.c | 1 + .../c-c++-common/goacc/kernels-loop-nest.c | 42 ++++++++++++++++++++++ gcc/tree-parloops.c | 5 +-- .../libgomp.oacc-c-c++-common/kernels-loop-nest.c | 26 ++++++++++++++ 4 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 11ac909..a938ce0 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -1147,6 +1147,7 @@ build_receiver_ref (tree var, bool by_ref, omp_context *ctx) field = x; x = build_simple_mem_ref (ctx->receiver_decl); + TREE_THIS_NOTRAP (x) = 1; x = omp_build_component_ref (x, field); if (by_ref) x = build_simple_mem_ref (x); diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c new file mode 100644 index 0000000..3e06c9f --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c @@ -0,0 +1,42 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ +/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +/* Based on autopar/outer-1.c. */ + +#include + +#define N 1000 + +int +main (void) +{ + int x[N][N]; + +#pragma acc kernels copyout (x) + { + for (int ii = 0; ii < N; ii++) + for (int jj = 0; jj < N; jj++) + x[ii][jj] = ii + jj + 3; + } + + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + if (x[i][j] != i + j + 3) + abort (); + + return 0; +} + +/* Check that only one loop is analyzed, and that it can be parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ + +/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */ + +/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index 04708c0..492ffcb 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -2442,6 +2442,7 @@ gen_parallel_loop (struct loop *loop, /* Cancel the loop (it is simpler to do it here rather than to teach the expander to do it). */ cancel_loop_tree (loop); + loops_state_set (LOOPS_NEED_FIXUP); /* Free loop bound estimations that could contain references to removed statements. */ @@ -2761,10 +2762,6 @@ parallelize_loops (bool oacc_kernels_p) if (!loop->in_oacc_kernels_region) continue; - /* TODO: Allow nested loops. */ - if (loop->inner) - continue; - if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Trying loop %d with header bb %d in oacc kernels region\n", diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c new file mode 100644 index 0000000..21d2599 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-nest.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ + +#include + +#define N 1000 + +int +main (void) +{ + int x[N][N]; + +#pragma acc kernels copyout (x) + { + for (int ii = 0; ii < N; ii++) + for (int jj = 0; jj < N; jj++) + x[ii][jj] = ii + jj + 3; + } + + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + if (x[i][j] != i + j + 3) + abort (); + + return 0; +} -- 1.9.1