Ignore acc loop directive in kernels region 2016-02-29 Tom de Vries * gimplify.c (gimplify_ctx_in_oacc_kernels_region): New function. (gimplify_omp_for): Ignore all but private clause on loop directive in kernels region. * omp-low.c (lower_omp_for_seq): New function. (lower_omp_for): Use lower_omp_for_seq in kernels region. Don't generate omp continue/return. * c-c++-common/goacc/kernels-acc-loop-reduction.c: New test. * c-c++-common/goacc/kernels-acc-loop-smaller-equal.c: Same. * c-c++-common/goacc/kernels-loop-2-acc-loop.c: Same. * c-c++-common/goacc/kernels-loop-3-acc-loop.c: Same. * c-c++-common/goacc/kernels-loop-acc-loop.c: Same. * c-c++-common/goacc/kernels-loop-n-acc-loop.c: Same. * c-c++-common/goacc/combined-directives.c: Update test. * c-c++-common/goacc/loop-private-1.c: Same. * gfortran.dg/goacc/combined-directives.f90: Same. * gfortran.dg/goacc/gang-static.f95: Same. * gfortran.dg/goacc/reduction-2.f95: Same. --- gcc/gimplify.c | 41 ++++++++++ gcc/omp-low.c | 93 ++++++++++++++++++++-- .../c-c++-common/goacc/combined-directives.c | 16 ++-- .../goacc/kernels-acc-loop-reduction.c | 24 ++++++ .../goacc/kernels-acc-loop-smaller-equal.c | 22 +++++ .../c-c++-common/goacc/kernels-loop-2-acc-loop.c | 17 ++++ .../c-c++-common/goacc/kernels-loop-3-acc-loop.c | 14 ++++ .../c-c++-common/goacc/kernels-loop-acc-loop.c | 14 ++++ .../c-c++-common/goacc/kernels-loop-n-acc-loop.c | 14 ++++ gcc/testsuite/c-c++-common/goacc/loop-private-1.c | 2 +- .../gfortran.dg/goacc/combined-directives.f90 | 16 ++-- gcc/testsuite/gfortran.dg/goacc/gang-static.f95 | 4 +- gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 | 3 +- 13 files changed, 252 insertions(+), 28 deletions(-) diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 7be6bd7..4b82305 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -8364,6 +8364,20 @@ find_combined_omp_for (tree *tp, int *walk_subtrees, void *) return NULL_TREE; } +/* Return true if CTX is (part of) an oacc kernels region. */ + +static bool +gimplify_ctx_in_oacc_kernels_region (gimplify_omp_ctx *ctx) +{ + for (;ctx != NULL; ctx = ctx->outer_context) + { + if (ctx->region_type == ORT_ACC_KERNELS) + return true; + } + + return false; +} + /* Gimplify the gross structure of an OMP_FOR statement. */ static enum gimplify_status @@ -8403,6 +8417,33 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) gcc_unreachable (); } + /* Skip loop clauses not handled in kernels region. */ + if (gimplify_ctx_in_oacc_kernels_region (gimplify_omp_ctxp)) + { + tree *prev_ptr = &OMP_FOR_CLAUSES (for_stmt); + + while (tree probe = *prev_ptr) + { + tree *next_ptr = &OMP_CLAUSE_CHAIN (probe); + + bool keep_clause; + switch (OMP_CLAUSE_CODE (probe)) + { + case OMP_CLAUSE_PRIVATE: + keep_clause = true; + break; + default: + keep_clause = false; + break; + } + + if (keep_clause) + prev_ptr = next_ptr; + else + *prev_ptr = *next_ptr; + } + } + /* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear clause for the IV. */ if (ort == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index fcbb3e0..bb70ac2 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -14944,6 +14944,75 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, } } +/* Lower the loops with index I and higher in omp_for FOR_STMT as a sequential + loop, and append the resulting gimple statements to PRE_P. */ + +static void +lower_omp_for_seq (gimple_seq *pre_p, gimple *for_stmt, unsigned int i) +{ + unsigned int len = gimple_omp_for_collapse (for_stmt); + gcc_assert (i < len); + + /* Gimplify OMP_FOR[i] as: + + OMP_FOR_INIT[i]; + goto ; + : + if (i == len - 1) + OMP_FOR_BODY; + else + OMP_FOR[i+1]; + OMP_FOR_INCR[i]; + : + if (OMP_FOR_COND[i]) + goto ; + else + goto ; + : + */ + + tree loop_entry_label = create_artificial_label (UNKNOWN_LOCATION); + tree fall_thru_label = create_artificial_label (UNKNOWN_LOCATION); + tree loop_exit_label = create_artificial_label (UNKNOWN_LOCATION); + + /* OMP_FOR_INIT[i]. */ + tree init = gimple_omp_for_initial (for_stmt, i); + tree var = gimple_omp_for_index (for_stmt, i); + gimple *g = gimple_build_assign (var, init); + gimple_seq_add_stmt (pre_p, g); + + /* goto . */ + gimple_seq_add_stmt (pre_p, gimple_build_goto (loop_entry_label)); + + /* . */ + gimple_seq_add_stmt (pre_p, gimple_build_label (fall_thru_label)); + + /* if (i == len - 1) OMP_FOR_BODY + else OMP_FOR[i+1]. */ + if (i == len - 1) + gimple_seq_add_seq (pre_p, gimple_omp_body (for_stmt)); + else + lower_omp_for_seq (pre_p, for_stmt, i + 1); + + /* OMP_FOR_INCR[i]. */ + tree incr = gimple_omp_for_incr (for_stmt, i); + g = gimple_build_assign (var, incr); + gimple_seq_add_stmt (pre_p, g); + + /* . */ + gimple_seq_add_stmt (pre_p, gimple_build_label (loop_entry_label)); + + /* if (OMP_FOR_COND[i]) goto + else goto . */ + enum tree_code cond = gimple_omp_for_cond (for_stmt, i); + tree final_val = gimple_omp_for_final (for_stmt, i); + gimple *gimple_cond = gimple_build_cond (cond, var, final_val, + fall_thru_label, loop_exit_label); + gimple_seq_add_stmt (pre_p, gimple_cond); + + /* . */ + gimple_seq_add_stmt (pre_p, gimple_build_label (loop_exit_label)); +} /* Lower code for an OMP loop directive. */ @@ -14957,6 +15026,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq omp_for_body, body, dlist; gimple_seq oacc_head = NULL, oacc_tail = NULL; size_t i; + bool oacc_kernels_p = (is_gimple_omp_oacc (ctx->stmt) + && ctx_in_oacc_kernels_region (ctx)); push_gimplify_context (); @@ -15065,7 +15136,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) extract_omp_for_data (stmt, &fd, NULL); if (is_gimple_omp_oacc (ctx->stmt) - && !ctx_in_oacc_kernels_region (ctx)) + && !oacc_kernels_p) lower_oacc_head_tail (gimple_location (stmt), gimple_omp_for_clauses (stmt), &oacc_head, &oacc_tail, ctx); @@ -15088,13 +15159,18 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) ctx); } - if (!gimple_omp_for_grid_phony (stmt)) - gimple_seq_add_stmt (&body, stmt); - gimple_seq_add_seq (&body, gimple_omp_body (stmt)); + if (oacc_kernels_p) + lower_omp_for_seq (&body, stmt, 0); + else if (gimple_omp_for_grid_phony (stmt)) + gimple_seq_add_seq (&body, gimple_omp_body (stmt)); + else + { + gimple_seq_add_stmt (&body, stmt); + gimple_seq_add_seq (&body, gimple_omp_body (stmt)); - if (!gimple_omp_for_grid_phony (stmt)) - gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v, - fd.loop.v)); + gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v, + fd.loop.v)); + } /* After the loop, add exit clauses. */ lower_reduction_clauses (gimple_omp_for_clauses (stmt), &body, ctx); @@ -15106,7 +15182,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) body = maybe_catch_exception (body); - if (!gimple_omp_for_grid_phony (stmt)) + if (!gimple_omp_for_grid_phony (stmt) + && !oacc_kernels_p) { /* Region exit marker goes at the end of the loop body. */ gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait)); diff --git a/gcc/testsuite/c-c++-common/goacc/combined-directives.c b/gcc/testsuite/c-c++-common/goacc/combined-directives.c index c387285..66b8b65 100644 --- a/gcc/testsuite/c-c++-common/goacc/combined-directives.c +++ b/gcc/testsuite/c-c++-common/goacc/combined-directives.c @@ -108,12 +108,12 @@ test () // ; } -// { dg-final { scan-tree-dump-times "acc loop collapse.2. private.j. private.i" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop gang" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop worker" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } } -// { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } } +// { dg-final { scan-tree-dump-times "acc loop collapse.2. private.j. private.i" 1 "gimple" } } +// { dg-final { scan-tree-dump-times "acc loop gang" 1 "gimple" } } +// { dg-final { scan-tree-dump-times "acc loop worker" 1 "gimple" } } +// { dg-final { scan-tree-dump-times "acc loop vector" 1 "gimple" } } +// { dg-final { scan-tree-dump-times "acc loop seq" 1 "gimple" } } +// { dg-final { scan-tree-dump-times "acc loop auto" 1 "gimple" } } +// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 1 "gimple" } } +// { dg-final { scan-tree-dump-times "acc loop independent private.i" 1 "gimple" } } // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } } diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c b/gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c new file mode 100644 index 0000000..6a9f52b --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c @@ -0,0 +1,24 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +unsigned int a[1000]; + +unsigned int +foo (int n) +{ + unsigned int sum = 0; + +#pragma acc kernels loop gang reduction(+:sum) + for (int i = 0; i < n; i++) + sum += a[i]; + + return sum; +} + +/* Check that only one loop is analyzed, and that it can be parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c b/gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c new file mode 100644 index 0000000..d18c779 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c @@ -0,0 +1,22 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +unsigned int +foo (int n) +{ + unsigned int sum = 1; + + #pragma acc kernels loop + for (int i = 1; i <= n; i++) + sum += i; + + return sum; +} + +/* Check that only one loop is analyzed, and that it can be parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c new file mode 100644 index 0000000..95354e1 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c @@ -0,0 +1,17 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +/* Check that loops with '#pragma acc loop' tagged gets properly parallelized. */ +#define ACC_LOOP +#include "kernels-loop-2.c" + +/* Check that only three loops are analyzed, and that all can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c new file mode 100644 index 0000000..1ad3067 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c @@ -0,0 +1,14 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +/* Check that loops with '#pragma acc loop' tagged gets properly parallelized. */ +#define ACC_LOOP +#include "kernels-loop-3.c" + +/* Check that only one loop is analyzed, and that it can be parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c new file mode 100644 index 0000000..47b8459 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c @@ -0,0 +1,14 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +/* Check that loops with '#pragma acc loop' tagged gets properly parallelized. */ +#define ACC_LOOP +#include "kernels-loop.c" + +/* Check that only one loop is analyzed, and that it can be parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c new file mode 100644 index 0000000..25b56d7 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c @@ -0,0 +1,14 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +/* Check that loops with '#pragma acc loop' tagged gets properly parallelized. */ +#define ACC_LOOP +#include "kernels-loop-n.c" + +/* Check that only one loop is analyzed, and that it can be parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/loop-private-1.c b/gcc/testsuite/c-c++-common/goacc/loop-private-1.c index 38a4a7d..9b2f7fa 100644 --- a/gcc/testsuite/c-c++-common/goacc/loop-private-1.c +++ b/gcc/testsuite/c-c++-common/goacc/loop-private-1.c @@ -10,4 +10,4 @@ f (int i, int j) ; } -/* { dg-final { scan-tree-dump-times "#pragma acc loop collapse\\(2\\) private\\(j\\) private\\(i\\)" 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "#pragma acc loop private\\(j\\) private\\(i\\)" 1 "gimple" } } */ diff --git a/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 b/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 index 6977525..e89ddc9 100644 --- a/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 @@ -144,12 +144,12 @@ subroutine test ! !$acc end kernels loop end subroutine test -! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. collapse.2." 2 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.i. gang" 2 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. worker" 2 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } } +! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. collapse.2." 1 "gimple" } } +! { dg-final { scan-tree-dump-times "acc loop private.i. gang" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. worker" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 1 "gimple" } } ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/gang-static.f95 b/gcc/testsuite/gfortran.dg/goacc/gang-static.f95 index 3481085..c14b7b2 100644 --- a/gcc/testsuite/gfortran.dg/goacc/gang-static.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/gang-static.f95 @@ -78,5 +78,5 @@ end subroutine test ! { dg-final { scan-tree-dump-times "gang\\(static:2\\)" 1 "omplower" } } ! { dg-final { scan-tree-dump-times "gang\\(static:5\\)" 1 "omplower" } } ! { dg-final { scan-tree-dump-times "gang\\(static:20\\)" 1 "omplower" } } -! { dg-final { scan-tree-dump-times "gang\\(num: 5 static:\\\*\\)" 1 "omplower" } } -! { dg-final { scan-tree-dump-times "gang\\(num: 30 static:20\\)" 1 "omplower" } } +! { dg-final { scan-tree-dump-times "gang\\(num: 5 static:\\\*\\)" 0 "omplower" } } +! { dg-final { scan-tree-dump-times "gang\\(num: 30 static:20\\)" 0 "omplower" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 b/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 index 929fb0e..4c431c8 100644 --- a/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 @@ -11,6 +11,7 @@ subroutine foo () !$acc end parallel loop !$acc kernels loop reduction(+:a) do k = 2,6 + a = a + 1 enddo !$acc end kernels loop end subroutine @@ -18,5 +19,5 @@ end subroutine ! { dg-final { scan-tree-dump-times "target oacc_parallel firstprivate.a." 1 "gimple" } } ! { dg-final { scan-tree-dump-times "acc loop private.p. reduction..:a." 1 "gimple" } } ! { dg-final { scan-tree-dump-times "target oacc_kernels map.force_tofrom:a .len: 4.." 1 "gimple" } } -! { dg-final { scan-tree-dump-times "acc loop private.k. reduction..:a." 1 "gimple" } } +! { dg-final { scan-tree-dump-times "acc loop private.k." 1 "gimple" } }