Handle acc loop directive 2015-11-09 Tom de Vries * omp-low.c (struct omp_region): Add inside_kernels_p field. (expand_omp_for_generic): Only set address taken for istart0 and end0 unless necessary. Adjust to generate a 'sequential' loop when GOMP builtin arguments are BUILT_IN_NONE. (expand_omp_for): Use expand_omp_for_generic() to generate a non-parallelized loop for OMP_FORs inside OpenACC kernels regions. (expand_omp): Mark inside_kernels_p field true for regions nested inside OpenACC kernels constructs. --- gcc/omp-low.c | 127 ++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 40 deletions(-) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 1283cc7..859a2eb 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -136,6 +136,9 @@ struct omp_region /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has a depend clause. */ gomp_ordered *ord_stmt; + + /* True if this is nested inside an OpenACC kernels construct. */ + bool inside_kernels_p; }; /* Context structure. Used to store information about each parallel @@ -8238,6 +8241,7 @@ expand_omp_for_generic (struct omp_region *region, gassign *assign_stmt; bool in_combined_parallel = is_combined_parallel (region); bool broken_loop = region->cont == NULL; + bool seq_loop = (start_fn == BUILT_IN_NONE || next_fn == BUILT_IN_NONE); edge e, ne; tree *counts = NULL; int i; @@ -8335,8 +8339,12 @@ expand_omp_for_generic (struct omp_region *region, type = TREE_TYPE (fd->loop.v); istart0 = create_tmp_var (fd->iter_type, ".istart0"); iend0 = create_tmp_var (fd->iter_type, ".iend0"); - TREE_ADDRESSABLE (istart0) = 1; - TREE_ADDRESSABLE (iend0) = 1; + + if (!seq_loop) + { + TREE_ADDRESSABLE (istart0) = 1; + TREE_ADDRESSABLE (iend0) = 1; + } /* See if we need to bias by LLONG_MIN. */ if (fd->iter_type == long_long_unsigned_type_node @@ -8366,7 +8374,20 @@ expand_omp_for_generic (struct omp_region *region, gsi_prev (&gsif); tree arr = NULL_TREE; - if (in_combined_parallel) + if (seq_loop) + { + tree n1 = fold_convert (fd->iter_type, fd->loop.n1); + tree n2 = fold_convert (fd->iter_type, fd->loop.n2); + + assign_stmt = gimple_build_assign (istart0, n1); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + assign_stmt = gimple_build_assign (iend0, n2); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + t = fold_build2 (NE_EXPR, boolean_type_node, istart0, iend0); + } + else if (in_combined_parallel) { gcc_assert (fd->ordered == 0); /* In a combined parallel loop, emit a call to @@ -8788,39 +8809,45 @@ expand_omp_for_generic (struct omp_region *region, collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); /* Emit code to get the next parallel iteration in L2_BB. */ - gsi = gsi_start_bb (l2_bb); + if (!seq_loop) + { + gsi = gsi_start_bb (l2_bb); - t = build_call_expr (builtin_decl_explicit (next_fn), 2, - build_fold_addr_expr (istart0), - build_fold_addr_expr (iend0)); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (TREE_TYPE (t) != boolean_type_node) - t = fold_build2 (NE_EXPR, boolean_type_node, - t, build_int_cst (TREE_TYPE (t), 0)); - gcond *cond_stmt = gimple_build_cond_empty (t); - gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); + t = build_call_expr (builtin_decl_explicit (next_fn), 2, + build_fold_addr_expr (istart0), + build_fold_addr_expr (iend0)); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (TREE_TYPE (t) != boolean_type_node) + t = fold_build2 (NE_EXPR, boolean_type_node, + t, build_int_cst (TREE_TYPE (t), 0)); + gcond *cond_stmt = gimple_build_cond_empty (t); + gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); + } } /* Add the loop cleanup function. */ gsi = gsi_last_bb (exit_bb); - if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); - else if (gimple_omp_return_lhs (gsi_stmt (gsi))) - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); - else - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); - gcall *call_stmt = gimple_build_call (t, 0); - if (gimple_omp_return_lhs (gsi_stmt (gsi))) - gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); - gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); - if (fd->ordered) + if (!seq_loop) { - tree arr = counts[fd->ordered]; - tree clobber = build_constructor (TREE_TYPE (arr), NULL); - TREE_THIS_VOLATILE (clobber) = 1; - gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), - GSI_SAME_STMT); + if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); + else if (gimple_omp_return_lhs (gsi_stmt (gsi))) + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); + else + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); + gcall *call_stmt = gimple_build_call (t, 0); + if (gimple_omp_return_lhs (gsi_stmt (gsi))) + gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); + gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); + if (fd->ordered) + { + tree arr = counts[fd->ordered]; + tree clobber = build_constructor (TREE_TYPE (arr), NULL); + TREE_THIS_VOLATILE (clobber) = 1; + gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), + GSI_SAME_STMT); + } } gsi_remove (&gsi, true); @@ -8833,7 +8860,9 @@ expand_omp_for_generic (struct omp_region *region, gimple_seq phis; e = find_edge (cont_bb, l3_bb); - ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); + ne = make_edge (l2_bb, l3_bb, (seq_loop + ? EDGE_FALLTHRU + : EDGE_FALSE_VALUE)); phis = phi_nodes (l3_bb); for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) @@ -8873,7 +8902,8 @@ expand_omp_for_generic (struct omp_region *region, e = find_edge (cont_bb, l2_bb); e->flags = EDGE_FALLTHRU; } - make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); + if (!seq_loop) + make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); if (gimple_in_ssa_p (cfun)) { @@ -8929,12 +8959,16 @@ expand_omp_for_generic (struct omp_region *region, add_bb_to_loop (l2_bb, outer_loop); - /* We've added a new loop around the original loop. Allocate the - corresponding loop struct. */ - struct loop *new_loop = alloc_loop (); - new_loop->header = l0_bb; - new_loop->latch = l2_bb; - add_loop (new_loop, outer_loop); + struct loop *new_loop = NULL; + if (!seq_loop) + { + /* We've added a new loop around the original loop. Allocate the + corresponding loop struct. */ + new_loop = alloc_loop (); + new_loop->header = l0_bb; + new_loop->latch = l2_bb; + add_loop (new_loop, outer_loop); + } /* Allocate a loop structure for the original loop unless we already had one. */ @@ -8944,7 +8978,9 @@ expand_omp_for_generic (struct omp_region *region, struct loop *orig_loop = alloc_loop (); orig_loop->header = l1_bb; /* The loop may have multiple latches. */ - add_loop (orig_loop, new_loop); + add_loop (orig_loop, (new_loop != NULL + ? new_loop + : outer_loop)); } } } @@ -11348,7 +11384,10 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt) original loops from being detected. Fix that up. */ loops_state_set (LOOPS_NEED_FIXUP); - if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) + if (region->inside_kernels_p) + expand_omp_for_generic (region, &fd, BUILT_IN_NONE, BUILT_IN_NONE, + inner_stmt); + else if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) expand_omp_simd (region, &fd); else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR) expand_cilk_for (region, &fd); @@ -13030,6 +13069,14 @@ expand_omp (struct omp_region *region) if (region->type == GIMPLE_OMP_PARALLEL) determine_parallel_type (region); + if (region->type == GIMPLE_OMP_TARGET && region->inner) + { + gomp_target *entry = as_a (last_stmt (region->entry)); + if (gimple_omp_target_kind (entry) == GF_OMP_TARGET_KIND_OACC_KERNELS + || region->inside_kernels_p) + region->inner->inside_kernels_p = true; + } + if (region->type == GIMPLE_OMP_FOR && gimple_omp_for_combined_p (last_stmt (region->entry))) inner_stmt = last_stmt (region->inner->entry); -- 1.9.1