2017-04-05 Cesar Philippidis gcc/fortran/ * gfortran.h (enum gfc_omp_map_op): Add OMP_MAP_DECLARE_ALLOCATE, OMP_MAP_DECLARE_DEALLOCATE. * openmp.c (gfc_match_oacc_declare): Add support for OMP_MAP_ALLOC and OMP_MAP_TO, as those match the OpenACC 2.5 semantics. * trans-array.c (gfc_array_allocate): Call gfc_trans_oacc_declare_allocate for decls with oacc_decalre_create attributes set. (gfc_array_deallocate): Likewise. * trans-decl.c (add_attributes_to_decl): Enable lowering of OpenACC declared create, copyin and deviceptr clauses. (add_clause): Don't duplicate OpenACC declare clauses. (find_module_oacc_declare_clauses): Relax oacc_declare_create to OMP_MAP_ALLOC, and oacc_declare_copying to OMP_MAP_TO. This matches the OpenACC 2.5 semantics. * trans-openmp.c (gfc_trans_omp_clauses_1): Handle OMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. (gfc_trans_oacc_declare_allocate): New function. * trans-stmt.h: Declare gfc_trans_oacc_declare_allocate. gcc/ * omp-low.c (scan_sharing_clauses): Update handling of OpenACC declare create, copyin and deviceptr to have local lifetimes. (lower_omp_target): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. * tree-pretty-print.c (dump_omp_clause): Likewise. gcc/testsuite/ * gfortran.dg/goacc/declare-allocatable-1.f90: New test. include/ * gomp-constants.h (enum gomp_map_kind): Define GOMP_MAP_DECLARE, GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. libgomp/ * libgomp.h: Declare gomp_acc_declare_allocate. * oacc-mem.c (gomp_acc_declare_allocate): New function. * oacc-parallel.c (GOACC_enter_exit_data): Handle GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE}. * testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90: New test. diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 2adbe4c..75217c7 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1156,7 +1156,9 @@ enum gfc_omp_map_op OMP_MAP_RELEASE, OMP_MAP_ALWAYS_TO, OMP_MAP_ALWAYS_FROM, - OMP_MAP_ALWAYS_TOFROM + OMP_MAP_ALWAYS_TOFROM, + OMP_MAP_DECLARE_ALLOCATE, + OMP_MAP_DECLARE_DEALLOCATE }; enum gfc_omp_linear_op diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c index 354e6ff..31e4885 100644 --- a/gcc/fortran/openmp.c +++ b/gcc/fortran/openmp.c @@ -2233,10 +2233,12 @@ gfc_match_oacc_declare (void) switch (n->u.map_op) { case OMP_MAP_FORCE_ALLOC: + case OMP_MAP_ALLOC: s->attr.oacc_declare_create = 1; break; case OMP_MAP_FORCE_TO: + case OMP_MAP_TO: s->attr.oacc_declare_copyin = 1; break; diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c index 278eaff..de84a67 100644 --- a/gcc/fortran/trans-array.c +++ b/gcc/fortran/trans-array.c @@ -88,6 +88,7 @@ along with GCC; see the file COPYING3. If not see #include "trans-types.h" #include "trans-array.h" #include "trans-const.h" +#include "trans-stmt.h" #include "dependency.h" static bool gfc_get_array_constructor_size (mpz_t *, gfc_constructor_base); @@ -5394,6 +5395,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, gfc_expr **upper; gfc_ref *ref, *prev_ref = NULL; bool allocatable, coarray, dimension, alloc_w_e3_arr_spec = false; + bool oacc_declare = false; ref = expr->ref; @@ -5408,6 +5410,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, allocatable = expr->symtree->n.sym->attr.allocatable; coarray = expr->symtree->n.sym->attr.codimension; dimension = expr->symtree->n.sym->attr.dimension; + oacc_declare = expr->symtree->n.sym->attr.oacc_declare_create; } else { @@ -5540,7 +5543,12 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, /* Update the array descriptors. */ if (dimension) - gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset); + { + gfc_conv_descriptor_offset_set (&set_descriptor_block, se->expr, offset); + + if (oacc_declare) + gfc_trans_oacc_declare_allocate (&set_descriptor_block, expr, true); + } set_descriptor = gfc_finish_block (&set_descriptor_block); if (status != NULL_TREE) @@ -5581,6 +5589,7 @@ gfc_array_deallocate (tree descriptor, tree pstat, tree errmsg, tree errlen, tree tmp; stmtblock_t block; bool coarray = gfc_is_coarray (expr); + gfc_symbol *sym = expr->symtree->n.sym; gfc_start_block (&block); @@ -5588,6 +5597,9 @@ gfc_array_deallocate (tree descriptor, tree pstat, tree errmsg, tree errlen, var = gfc_conv_descriptor_data_get (descriptor); STRIP_NOPS (var); + if (!coarray && sym->attr.oacc_declare_create) + gfc_trans_oacc_declare_allocate (&block, expr, false); + /* Parameter is the address of the data component. */ tmp = gfc_deallocate_with_status (coarray ? descriptor : var, pstat, errmsg, errlen, label_finish, false, expr, coarray); diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 109bdf7..b4db6b0 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -1324,10 +1324,10 @@ add_attributes_to_decl (symbol_attribute sym_attr, tree list) } if (sym_attr.omp_declare_target -#if 0 /* TODO */ || sym_attr.oacc_declare_create || sym_attr.oacc_declare_copyin || sym_attr.oacc_declare_deviceptr +#if 0 /* TODO */ || sym_attr.oacc_declare_device_resident #endif ) @@ -5932,13 +5932,17 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op map_op) { gfc_omp_namelist *n; + if (!module_oacc_clauses) + module_oacc_clauses = gfc_get_omp_clauses (); + + for (n = module_oacc_clauses->lists[OMP_LIST_MAP]; n != NULL; n = n->next) + if (n->sym->backend_decl == sym->backend_decl) + return; + n = gfc_get_omp_namelist (); n->sym = sym; n->u.map_op = map_op; - if (!module_oacc_clauses) - module_oacc_clauses = gfc_get_omp_clauses (); - if (module_oacc_clauses->lists[OMP_LIST_MAP]) n->next = module_oacc_clauses->lists[OMP_LIST_MAP]; @@ -5954,10 +5958,10 @@ find_module_oacc_declare_clauses (gfc_symbol *sym) gfc_omp_map_op map_op; if (sym->attr.oacc_declare_create) - map_op = OMP_MAP_FORCE_ALLOC; + map_op = OMP_MAP_ALLOC; if (sym->attr.oacc_declare_copyin) - map_op = OMP_MAP_FORCE_TO; + map_op = OMP_MAP_TO; if (sym->attr.oacc_declare_deviceptr) map_op = OMP_MAP_FORCE_DEVICEPTR; diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 17984bb..ba738a9 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -2360,6 +2360,12 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, case OMP_MAP_FORCE_DEVICEPTR: OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_DEVICEPTR); break; + case OMP_MAP_DECLARE_ALLOCATE: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_ALLOCATE); + break; + case OMP_MAP_DECLARE_DEALLOCATE: + OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DECLARE_DEALLOCATE); + break; default: gcc_unreachable (); } @@ -5369,6 +5375,41 @@ gfc_trans_oacc_declare (gfc_code *code) return gfc_finish_block (&block); } +/* Create an OpenACC enter or exit data construct for an OpenACC declared + variable that has been allocated or deallocated. */ + +tree +gfc_trans_oacc_declare_allocate (stmtblock_t *block, gfc_expr *expr, + bool allocate) +{ + gfc_omp_clauses *clauses = gfc_get_omp_clauses (); + gfc_omp_namelist *p = gfc_get_omp_namelist (); + tree oacc_clauses, stmt; + enum tree_code construct_code; + + p->sym = expr->symtree->n.sym; + p->where = expr->where; + + if (allocate) + { + p->u.map_op = OMP_MAP_DECLARE_ALLOCATE; + construct_code = OACC_ENTER_DATA; + } + else + { + p->u.map_op = OMP_MAP_DECLARE_DEALLOCATE; + construct_code = OACC_EXIT_DATA; + } + clauses->lists[OMP_LIST_MAP] = p; + + oacc_clauses = gfc_trans_omp_clauses (block, clauses, expr->where); + stmt = build1_loc (input_location, construct_code, void_type_node, + oacc_clauses); + gfc_add_expr_to_block (block, stmt); + + return stmt; +} + tree gfc_trans_oacc_directive (gfc_code *code) { diff --git a/gcc/fortran/trans-stmt.h b/gcc/fortran/trans-stmt.h index 6ca0c1b..aed3214 100644 --- a/gcc/fortran/trans-stmt.h +++ b/gcc/fortran/trans-stmt.h @@ -67,6 +67,7 @@ tree gfc_trans_omp_directive (gfc_code *); void gfc_trans_omp_declare_simd (gfc_namespace *); tree gfc_trans_oacc_directive (gfc_code *); tree gfc_trans_oacc_declare (gfc_namespace *); +tree gfc_trans_oacc_declare_allocate (stmtblock_t *, gfc_expr *, bool); /* trans-io.c */ tree gfc_trans_open (gfc_code *); diff --git a/gcc/omp-low.c b/gcc/omp-low.c index f98fa54..a584a44 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -2265,7 +2265,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)) && varpool_node::get_create (decl)->offloadable && !lookup_attribute ("omp declare target link", - DECL_ATTRIBUTES (decl))) + DECL_ATTRIBUTES (decl)) + && !(is_gimple_omp_oacc (ctx->stmt))) break; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER) @@ -16740,6 +16741,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case GOMP_MAP_FORCE_PRESENT: case GOMP_MAP_FORCE_DEVICEPTR: case GOMP_MAP_DEVICE_RESIDENT: + case GOMP_MAP_DECLARE_ALLOCATE: + case GOMP_MAP_DECLARE_DEALLOCATE: case GOMP_MAP_DYNAMIC_ARRAY_TO: case GOMP_MAP_DYNAMIC_ARRAY_FROM: case GOMP_MAP_DYNAMIC_ARRAY_TOFROM: diff --git a/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 new file mode 100644 index 0000000..9195055 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/declare-allocatable-1.f90 @@ -0,0 +1,25 @@ +! Verify that OpenACC declared allocatable arrays have implicit +! OpenACC enter and exit pragmas at the time of allocation and +! deallocation. + +! { dg-additional-options "-fdump-tree-original" } + +program allocate + implicit none + integer, allocatable :: a(:) + integer, parameter :: n = 100 + integer i + !$acc declare create(a) + + allocate (a(n)) + + !$acc parallel loop copyout(a) + do i = 1, n + a(i) = i + end do + + deallocate (a) +end program allocate + +! { dg-final { scan-tree-dump-times "pragma acc enter data map.declare_allocate" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "pragma acc exit data map.declare_deallocate" 1 "gimple" } } diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 16efdc4..aae409d 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -764,6 +764,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags) case GOMP_MAP_DYNAMIC_ARRAY_FORCE_PRESENT: pp_string (pp, "force_present,dynamic_array"); break; + case GOMP_MAP_DECLARE_ALLOCATE: + pp_string (pp, "declare_allocate"); + break; + case GOMP_MAP_DECLARE_DEALLOCATE: + pp_string (pp, "declare_deallocate"); + break; default: gcc_unreachable (); } diff --git a/include/gomp-constants.h b/include/gomp-constants.h index e60d07d..8f17f78 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -41,6 +41,7 @@ #define GOMP_MAP_FLAG_SPECIAL_1 (1 << 3) #define GOMP_MAP_FLAG_SPECIAL_2 (1 << 4) #define GOMP_MAP_FLAG_SPECIAL_3 (1 << 5) +#define GOMP_MAP_FLAG_SPECIAL_4 (1 << 6) #define GOMP_MAP_FLAG_SPECIAL (GOMP_MAP_FLAG_SPECIAL_1 \ | GOMP_MAP_FLAG_SPECIAL_0) /* Flag to force a specific behavior (or else, trigger a run-time error). */ @@ -149,6 +150,12 @@ enum gomp_map_kind | GOMP_MAP_FORCE_ALLOC), GOMP_MAP_DYNAMIC_ARRAY_FORCE_PRESENT = (GOMP_MAP_DYNAMIC_ARRAY | GOMP_MAP_FORCE_PRESENT), + /* Mapping kinds for allocatable arrays. */ + GOMP_MAP_DECLARE = (GOMP_MAP_FLAG_SPECIAL_4), + GOMP_MAP_DECLARE_ALLOCATE = (GOMP_MAP_DECLARE + | GOMP_MAP_FORCE_TO), + GOMP_MAP_DECLARE_DEALLOCATE = (GOMP_MAP_DECLARE + | GOMP_MAP_FORCE_FROM), /* Internal to GCC, not used in libgomp. */ /* Do not map, but pointer assign a pointer instead. */ GOMP_MAP_FIRSTPRIVATE_POINTER = (GOMP_MAP_LAST | 1), diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index f4bfc06..31cb103 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -974,6 +974,8 @@ enum gomp_map_vars_kind extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *); extern void gomp_acc_remove_pointer (void *, bool, int, int); +extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, + unsigned short *); extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *, size_t, void **, void **, diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 6cff777..14b39b2 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -704,6 +704,34 @@ acc_update_self_async (void *h, size_t s, int async) } void +gomp_acc_declare_allocate (bool allocate, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + gomp_debug (0, " %s: processing\n", __FUNCTION__); + + if (allocate) + { + assert (mapnum == 3); + + /* Allocate memory for the array data. */ + uintptr_t data = (uintptr_t) acc_create (hostaddrs[0], sizes[0]); + + /* Update the PSET. */ + acc_update_device (hostaddrs[1], sizes[1]); + void *pset = acc_deviceptr (hostaddrs[1]); + acc_memcpy_to_device (pset, &data, sizeof (uintptr_t)); + } + else + { + /* Deallocate memory for the array data. */ + void *data = acc_deviceptr (hostaddrs[0]); + acc_free (data); + } + + gomp_debug (0, " %s: end\n", __FUNCTION__); +} + +void gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds) { diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 36e2431..d6ced64 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -484,14 +484,16 @@ GOACC_enter_exit_data (int device, size_t mapnum, || kind == GOMP_MAP_FORCE_PRESENT || kind == GOMP_MAP_FORCE_TO || kind == GOMP_MAP_TO - || kind == GOMP_MAP_ALLOC) + || kind == GOMP_MAP_ALLOC + || kind == GOMP_MAP_DECLARE_ALLOCATE) { data_enter = true; break; } if (kind == GOMP_MAP_DELETE - || kind == GOMP_MAP_FORCE_FROM) + || kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_DECLARE_DEALLOCATE) break; gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", @@ -540,7 +542,10 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - if (!acc_is_present (hostaddrs[i], sizes[i])) + if (kind == GOMP_MAP_DECLARE_ALLOCATE) + gomp_acc_declare_allocate (true, pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); + else if (!acc_is_present (hostaddrs[i], sizes[i])) { gomp_acc_insert_pointer (pointer, &hostaddrs[i], &sizes[i], &kinds[i]); @@ -579,7 +584,10 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - if (acc_is_present (hostaddrs[i], sizes[i])) + if (kind == GOMP_MAP_DECLARE_DEALLOCATE) + gomp_acc_declare_allocate (false, pointer, &hostaddrs[i], + &sizes[i], &kinds[i]); + else if (acc_is_present (hostaddrs[i], sizes[i])) { gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff) == GOMP_MAP_FORCE_FROM, async, diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 new file mode 100644 index 0000000..5167dee --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-1.f90 @@ -0,0 +1,211 @@ +! Test declare create with allocatable arrays. + +! { dg-do run } + +module vars + implicit none + integer, parameter :: n = 100 + real*8, allocatable :: b(:) + !$acc declare create (b) +end module vars + +program test + use vars + use openacc + implicit none + real*8 :: a + integer :: i + + interface + subroutine sub1 + !$acc routine gang + end subroutine sub1 + + subroutine sub2 + end subroutine sub2 + + real*8 function fun1 (ix) + integer ix + !$acc routine seq + end function fun1 + + real*8 function fun2 (ix) + integer ix + !$acc routine seq + end function fun2 + end interface + + if (allocated (b)) call abort + + ! Test local usage of an allocated declared array. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + a = 2.0 + + !$acc parallel loop + do i = 1, n + b(i) = i * a + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*a) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside an acc + ! routine subroutine. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel + call sub1 + !$acc end parallel + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i*2) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! subroutine. + + call sub2 + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= 1.0) call abort + end do + + deallocate (b) + + if (allocated (b)) call abort + + ! Test the usage of an allocated declared array inside an acc + ! routine function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc parallel loop + do i = 1, n + b(i) = fun1 (i) + end do + + if (.not.acc_is_present (b)) call abort + + !$acc update host(b) + + do i = 1, n + if (b(i) /= i) call abort + end do + + deallocate (b) + + ! Test the usage of an allocated declared array inside a host + ! function. + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do + + !$acc update host(b) + + do i = 1, n + b(i) = fun2 (i) + end do + + if (.not.acc_is_present (b)) call abort + + do i = 1, n + if (b(i) /= i*i) call abort + end do + + deallocate (b) +end program test + +! Set each element in array 'b' at index i to i*2. + +subroutine sub1 ! { dg-warning "region is worker partitioned" } + use vars + implicit none + integer i + !$acc routine gang + + !$acc loop + do i = 1, n + b(i) = i*2 + end do +end subroutine sub1 + +! Allocate array 'b', and set it to all 1.0. + +subroutine sub2 + use vars + use openacc + implicit none + integer i + + allocate (b(n)) + + if (.not.allocated (b)) call abort + if (acc_is_present (b) .neqv. .true.) call abort + + !$acc parallel loop + do i = 1, n + b(i) = 1.0 + end do +end subroutine sub2 + +! Return b(i) * i; + +real*8 function fun1 (i) + use vars + implicit none + integer i + !$acc routine seq + + fun1 = b(i) * i +end function fun1 + +! Return b(i) * i * i; + +real*8 function fun2 (i) + use vars + implicit none + integer i + + fun2 = b(i) * i * i +end function fun2