From f4768a88a4e2ab5dc80feb7bfb06cd273c849f72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C3=B6=20Barany?= Date: Mon, 21 Jan 2019 03:08:57 -0800 Subject: [PATCH] Rework OpenACC Fortran DO loop initialization Fortran DO loops on arrays with non-constant bounds (like a(lo:hi)) need special setup code to compute the bounds and offsets for the iteration. In an OpenACC region containing multiple loops, this used to be done in a block of code at the start of the region for all of the loops. But the upcoming kernels conversion expects this kind of setup code to immediately precede the corresponding loop, and variables are not mapped correctly otherwise. This patch separates out the initialization part for each loop and places it immediately before the loop. gcc/fortran/ * trans-openmp.c (gfc_privatize_nodesc_array_clauses): Renamed from gfc_privatize_nodesc_arrays, initialization part factored out to... (gfc_reinitialize_privatized_arrays): ... this new function, called... (gfc_trans_omp_do): ... from here for OpenACC loops. libgomp/ * testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90: New test. --- gcc/fortran/ChangeLog.openacc | 7 ++ gcc/fortran/trans-openmp.c | 86 +++++++++++++--------- libgomp/ChangeLog.openacc | 4 + .../initialize_kernels_loops.f90 | 31 ++++++++ 4 files changed, 92 insertions(+), 36 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90 diff --git a/gcc/fortran/ChangeLog.openacc b/gcc/fortran/ChangeLog.openacc index 0f31f3e..450056d 100644 --- a/gcc/fortran/ChangeLog.openacc +++ b/gcc/fortran/ChangeLog.openacc @@ -1,3 +1,10 @@ +2019-01-24 Gergö Barany + + * trans-openmp.c (gfc_privatize_nodesc_array_clauses): Renamed from + gfc_privatize_nodesc_arrays, initialization part factored out to... + (gfc_reinitialize_privatized_arrays): ... this new function, called... + (gfc_trans_omp_do): ... from here for OpenACC loops. + 2019-01-09 Julian Brown * cpp.c (cpp_define_builtins): Update _OPENACC define to 201711. diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index d5dbf18..5a444c3 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -3198,6 +3198,44 @@ gfc_scan_nodesc_arrays (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED, return 0; } +/* Reinitialize any arrays used inside CODE. Place the initialization + sequences in CODE. */ + +static void +gfc_reinitialize_privatized_arrays (gfc_code *code, stmtblock_t *block) +{ + hash_set *array_set = new hash_set (); + gfc_code_walker (&code, gfc_dummy_code_callback, gfc_scan_nodesc_arrays, + array_set); + + hash_set::iterator its = array_set->begin (); + + for (; its != array_set->end (); ++its) + { + gfc_symbol *sym = *its; + tree parm = gfc_get_symbol_decl (sym); + tree type = TREE_TYPE (parm); + tree offset, tmp; + + /* Evaluate the bounds of the array. */ + gfc_trans_array_bounds (type, sym, &offset, block, false); + + /* Set the offset. */ + if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL) + gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset); + + /* Set the pointer itself if we aren't using the parameter + directly. */ + if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm) + && GFC_DECL_SAVED_DESCRIPTOR (parm)) + { + tmp = convert (TREE_TYPE (parm), + GFC_DECL_SAVED_DESCRIPTOR (parm)); + gfc_add_modify (block, parm, tmp); + } + } +} + /* Build a set of internal array variables (lbound, ubound, stride, etc.) that need privatization. */ @@ -3219,41 +3257,12 @@ gfc_privatize_nodesc_arrays_1 (tree *tp, int *walk_subtrees, void *data) return NULL; } -/* Reinitialize all of the arrays inside ARRAY_SET in BLOCK. Append private - clauses for those arrays in CLAUSES. */ +/* Append private clauses for the arrays in BLOCK to CLAUSES. */ static tree -gfc_privatize_nodesc_arrays (hash_set *array_set, - stmtblock_t *block, tree clauses) +gfc_privatize_nodesc_array_clauses (stmtblock_t *block, tree clauses) { - hash_set::iterator its = array_set->begin (); hash_set *private_decls = new hash_set; - - for (; its != array_set->end (); ++its) - { - gfc_symbol *sym = *its; - tree parm = gfc_get_symbol_decl (sym); - tree type = TREE_TYPE (parm); - tree offset, tmp; - - /* Evaluate the bounds of the array. */ - gfc_trans_array_bounds (type, sym, &offset, block, false); - - /* Set the offset. */ - if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL) - gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset); - - /* Set the pointer itself if we aren't using the parameter - directly. */ - if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm) - && GFC_DECL_SAVED_DESCRIPTOR (parm)) - { - tmp = convert (TREE_TYPE (parm), - GFC_DECL_SAVED_DESCRIPTOR (parm)); - gfc_add_modify (block, parm, tmp); - } - } - /* Add private clauses for any variables that are used by gfc_trans_array_bounds. */ walk_tree_without_duplicates (&block->head, gfc_privatize_nodesc_arrays_1, @@ -3274,10 +3283,9 @@ gfc_privatize_nodesc_arrays (hash_set *array_set, return clauses; } -/* Reinitialize any arrays in CLAUSES used inside CODE which do not contain - array descriptors if SCAN_NODESC_ARRAYS is TRUE. Place the initialization - sequences in CODE. Update CLAUSES to contain OMP_CLAUSE_PRIVATE for any - arrays which were initialized. */ +/* Collect any arrays in CLAUSES used inside CODE which do not contain + array descriptors if SCAN_NODESC_ARRAYS is TRUE. Update CLAUSES to + contain OMP_CLAUSE_PRIVATE for any arrays found. */ static hash_set * gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code, @@ -3296,7 +3304,7 @@ gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code, { gfc_start_block (inner); pushlevel (); - *clauses = gfc_privatize_nodesc_arrays (array_set, inner, *clauses); + *clauses = gfc_privatize_nodesc_array_clauses (inner, *clauses); } else { @@ -3856,6 +3864,12 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock, omp_clauses = gfc_trans_omp_clauses (pblock, do_clauses, code->loc); + /* Make sure that setup code reinitializing array bounds, offsets, and + strides immediately precedes the loop. This is where the conversion of + OpenACC kernels to parallel regions expects it. */ + if (op == EXEC_OACC_LOOP) + gfc_reinitialize_privatized_arrays (code, pblock); + for (i = 0; i < collapse; i++) { int simple = 0; diff --git a/libgomp/ChangeLog.openacc b/libgomp/ChangeLog.openacc index c0ee88b..2e23feb 100644 --- a/libgomp/ChangeLog.openacc +++ b/libgomp/ChangeLog.openacc @@ -1,3 +1,7 @@ +2019-01-24 Gergö Barany + + * testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90: New test. + 2019-01-09 Julian Brown * acc_prof.h (_ACC_PROF_INFO_VERSION): Update to 201711. diff --git a/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90 b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90 new file mode 100644 index 0000000..fbae8cf --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90 @@ -0,0 +1,31 @@ +subroutine kernel(lo, hi, a, b, c) + implicit none + integer :: lo, hi, i + real, dimension(lo:hi) :: a, b, c + +!$acc kernels +!$acc loop independent + do i = lo, hi + b(i) = a(i) + end do +!$acc loop independent + do i = lo, hi + c(i) = b(i) + end do +!$acc end kernels +end subroutine kernel + +program main + integer :: n = 20 + real, dimension(1:20) :: a, b, c + + a(:) = 1 + b(:) = 2 + c(:) = 3 + + call kernel(1, n, a, b, c) + + do i = 1, n + if (c(i) .ne. 1) call abort + end do +end program main -- 2.8.1