From f90db664b6591e4a6525de4eb9ec15ad18d99688 Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Tue, 31 May 2022 18:39:00 +0000 Subject: [PATCH 2/2] OpenMP: Pointers and member mappings Implementing the "omp declare mapper" functionality, I noticed some cases where handling of derived type members that are pointers doesn't seem to be quite right. At present, a type such as this: type T integer, pointer, dimension(:) :: arrptr end type T type(T) :: tvar [...] !$omp target map(tofrom: tvar%arrptr) will be mapped using three mapping nodes: GOMP_MAP_TO tvar%arrptr (the descriptor) GOMP_MAP_TOFROM *tvar%arrptr%data (the actual array data) GOMP_MAP_ALWAYS_POINTER tvar%arrptr%data (a pointer to the array data) This follows OMP 5.0, 2.19.7.1 "map Clause": "If a list item in a map clause is an associated pointer and the pointer is not the base pointer of another list item in a map clause on the same construct, then it is treated as if its pointer target is implicitly mapped in the same clause. For the purposes of the map clause, the mapped pointer target is treated as if its base pointer is the associated pointer." However, we can also write this: map(to: tvar%arrptr) map(tofrom: tvar%arrptr(3:8)) and then instead we should follow: "If the structure sibling list item is a pointer then it is treated as if its association status is undefined, unless it appears as the base pointer of another list item in a map clause on the same construct." But, that's not implemented quite right at the moment (and completely breaks once we introduce declare mappers), because we still map the "to: tvar%arrptr" as the descriptor and the entire array, then we map the "tvar%arrptr(3:8)" part using the descriptor (again!) and the array slice. The solution is to detect when we're mapping a smaller part of the array (or a subcomponent) on the same directive, and only map the descriptor in that case. So we get mappings like this instead: map(to: tvar%arrptr) --> GOMP_MAP_ALLOC tvar%arrptr (the descriptor) map(tofrom: tvar%arrptr(3:8) --> GOMP_MAP_TOFROM tvar%arrptr%data(3) (size 8-3+1, etc.) GOMP_MAP_ALWAYS_POINTER tvar%arrptr%data (bias 3, etc.) This version of the patch builds a hash table separating candidate clauses for dependency checking by root symbol, to alleviate potential quadratic behaviour. 2022-09-15 Julian Brown gcc/fortran/ * gfortran.h (gfc_omp_namelist): Add "duplicate_of" field to "u" union. * trans-openmp.cc (dependency.h): Include. (gfc_trans_omp_array_section): Do not map descriptors here for OpenMP. (gfc_symbol_rooted_namelist): New function. (gfc_trans_omp_clauses): Check subcomponent and subarray/element accesses elsewhere in the clause list for pointers to derived types or array descriptors, and map just the pointer/descriptor if we have any. libgomp/ * testsuite/libgomp.fortran/map-subarray.f90: New test. * testsuite/libgomp.fortran/map-subarray-2.f90: New test. * testsuite/libgomp.fortran/map-subcomponents.f90: New test. * testsuite/libgomp.fortran/struct-elem-map-1.f90: Adjust for descriptor-mapping changes. --- gcc/fortran/gfortran.h | 1 + gcc/fortran/trans-openmp.cc | 207 ++++++++++++++++-- .../libgomp.fortran/map-subarray-2.f90 | 108 +++++++++ .../libgomp.fortran/map-subarray.f90 | 33 +++ .../libgomp.fortran/map-subcomponents.f90 | 35 +++ .../libgomp.fortran/struct-elem-map-1.f90 | 10 +- 6 files changed, 377 insertions(+), 17 deletions(-) create mode 100644 libgomp/testsuite/libgomp.fortran/map-subarray-2.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/map-subarray.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/map-subcomponents.f90 diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 4babd77924b..fe8c4e131f3 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1358,6 +1358,7 @@ typedef struct gfc_omp_namelist { struct gfc_omp_namelist_udr *udr; gfc_namespace *ns; + struct gfc_omp_namelist *duplicate_of; } u2; struct gfc_omp_namelist *next; locus where; diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 8e9d5346b05..444168d3cc3 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see #include "omp-general.h" #include "omp-low.h" #include "memmodel.h" /* For MEMMODEL_ enums. */ +#include "dependency.h" #undef GCC_DIAG_STYLE #define GCC_DIAG_STYLE __gcc_tdiag__ @@ -2470,22 +2471,18 @@ gfc_trans_omp_array_section (stmtblock_t *block, gfc_omp_namelist *n, } if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) { - tree desc_node; tree type = TREE_TYPE (decl); ptr2 = gfc_conv_descriptor_data_get (decl); - desc_node = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_DECL (desc_node) = decl; - OMP_CLAUSE_SIZE (desc_node) = TYPE_SIZE_UNIT (type); - if (ptr_kind == GOMP_MAP_ALWAYS_POINTER) + if (ptr_kind != GOMP_MAP_ALWAYS_POINTER) { - OMP_CLAUSE_SET_MAP_KIND (desc_node, GOMP_MAP_TO); - node2 = node; - node = desc_node; /* Needs to come first. */ - } - else - { - OMP_CLAUSE_SET_MAP_KIND (desc_node, GOMP_MAP_TO_PSET); - node2 = desc_node; + /* For OpenMP, the descriptor must be mapped with its own explicit + map clause (e.g. both "map(foo%arr)" and "map(foo%arr(:))" must + be present in the clause list if "foo%arr" is a pointer to an + array). So, we don't create a GOMP_MAP_TO_PSET node here. */ + node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_TO_PSET); + OMP_CLAUSE_DECL (node2) = decl; + OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); } node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); @@ -2592,6 +2589,72 @@ handle_iterator (gfc_namespace *ns, stmtblock_t *iter_block, tree block) return list; } +/* To alleviate quadratic behaviour in checking each entry of a + gfc_omp_namelist against every other entry, we build a hashtable indexed by + gfc_symbol pointer, which we can use in the (overwhelmingly common) case + that a map expression has a symbol as its root term. Return a namelist + based on the root symbol used by N, building a new table in SYM_ROOTED_NL + using the gfc_omp_namelist N2 (all clauses) if we haven't done so + already. */ + +static gfc_omp_namelist * +get_symbol_rooted_namelist (hash_map *sym_rooted_nl, + bool *built_sym_hash, gfc_omp_namelist *n, + gfc_omp_namelist *n2, bool *sym_based) +{ + /* Early-out if we have a NULL clause list (e.g. for OpenACC). */ + if (!n2) + return NULL; + + gfc_symbol *use_sym = NULL; + + /* We're only interested in cases where we have an expression, e.g. a + component access. */ + if (n->expr && n->expr->symtree) + use_sym = n->expr->symtree->n.sym; + + *sym_based = false; + + if (!use_sym) + return n2; + + if (!*built_sym_hash) + { + for (; n2 != NULL; n2 = n2->next) + { + if (!n2->expr || !n2->expr->symtree) + continue; + + gfc_omp_namelist *nl_copy + = gfc_get_omp_namelist (); + memcpy (nl_copy, n2, sizeof *nl_copy); + nl_copy->u2.duplicate_of = n2; + nl_copy->next = NULL; + + gfc_symbol *idx_sym = n2->expr->symtree->n.sym; + + bool existed; + gfc_omp_namelist *&entry + = sym_rooted_nl->get_or_insert (idx_sym, &existed); + if (existed) + nl_copy->next = entry; + entry = nl_copy; + } + *built_sym_hash = true; + } + + gfc_omp_namelist **n2_sym = sym_rooted_nl->get (use_sym); + + if (n2_sym) + { + *sym_based = true; + return *n2_sym; + } + + return NULL; +} + static tree gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, locus where, bool declare_simd = false, @@ -2609,6 +2672,9 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, if (clauses == NULL) return NULL_TREE; + hash_map sym_rooted_nl; + bool built_sym_hash = false; + for (list = 0; list < OMP_LIST_NUM; list++) { gfc_omp_namelist *n = clauses->lists[list]; @@ -3448,6 +3514,57 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, { if (pointer || (openacc && allocatable)) { + gfc_omp_namelist *n2 + = openacc ? NULL : clauses->lists[OMP_LIST_MAP]; + + bool sym_based; + n2 = get_symbol_rooted_namelist (&sym_rooted_nl, + &built_sym_hash, n, + n2, &sym_based); + + /* If the last reference is a pointer to a derived + type ("foo%dt_ptr"), check if any subcomponents + of the same derived type member are being mapped + elsewhere in the clause list ("foo%dt_ptr%x", + etc.). If we have such subcomponent mappings, + we only create an ALLOC node for the pointer + itself, and inhibit mapping the whole derived + type. */ + + for (; n2 != NULL; n2 = n2->next) + { + if ((!sym_based && n == n2) + || (sym_based && n == n2->u2.duplicate_of) + || !n2->expr) + continue; + + int dep + = gfc_dep_resolver (n->expr->ref, n2->expr->ref, + NULL, true); + if (dep == 0) + continue; + + gfc_ref *ref1 = n->expr->ref; + gfc_ref *ref2 = n2->expr->ref; + + while (ref1->next && ref2->next) + { + ref1 = ref1->next; + ref2 = ref2->next; + } + + if (ref2->next) + { + inner = build_fold_addr_expr (inner); + OMP_CLAUSE_SET_MAP_KIND (node, + GOMP_MAP_ALLOC); + OMP_CLAUSE_DECL (node) = inner; + OMP_CLAUSE_SIZE (node) + = TYPE_SIZE_UNIT (TREE_TYPE (inner)); + goto finalize_map_clause; + } + } + tree data, size; if (lastref->u.c.component->ts.type == BT_CLASS) @@ -3549,8 +3666,56 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, node2 = desc_node; else { + gfc_omp_namelist *n2 + = clauses->lists[OMP_LIST_MAP]; node2 = node; node = desc_node; /* Put first. */ + + bool sym_based; + n2 = get_symbol_rooted_namelist (&sym_rooted_nl, + &built_sym_hash, + n, n2, + &sym_based); + + for (; n2 != NULL; n2 = n2->next) + { + if ((!sym_based && n == n2) + || (sym_based && n == n2->u2.duplicate_of) + || !n2->expr) + continue; + + int dep + = gfc_dep_resolver (n->expr->ref, + n2->expr->ref, + NULL, true); + if (dep == 0) + continue; + + gfc_ref *ref1 = n->expr->ref; + gfc_ref *ref2 = n2->expr->ref; + + /* We know ref1 and ref2 overlap. We're + interested in whether ref2 describes a + smaller part of the array than ref1, which + we already know refers to the full + array. */ + + while (ref1->next && ref2->next) + { + ref1 = ref1->next; + ref2 = ref2->next; + } + + if (ref2->next + || (ref2->type == REF_ARRAY + && (ref2->u.ar.type == AR_ELEMENT + || (ref2->u.ar.type + == AR_SECTION)))) + { + node2 = NULL_TREE; + goto finalize_map_clause; + } + } } node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); @@ -3702,6 +3867,22 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, } } + /* Free hashmap if we built it. */ + if (built_sym_hash) + { + typedef hash_map::iterator hti; + for (hti it = sym_rooted_nl.begin (); it != sym_rooted_nl.end (); ++it) + { + gfc_omp_namelist *&nl = (*it).second; + while (nl) + { + gfc_omp_namelist *next = nl->next; + free (nl); + nl = next; + } + } + } + if (clauses->if_expr) { tree if_var; diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray-2.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray-2.f90 new file mode 100644 index 00000000000..02f08c52a8c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subarray-2.f90 @@ -0,0 +1,108 @@ +! { dg-do run } + +program myprog +type u + integer, dimension (:), pointer :: tarr1 + integer, dimension (:), pointer :: tarr2 + integer, dimension (:), pointer :: tarr3 +end type u + +type(u) :: myu1, myu2, myu3 + +integer, dimension (12), target :: myarray1 +integer, dimension (12), target :: myarray2 +integer, dimension (12), target :: myarray3 +integer, dimension (12), target :: myarray4 +integer, dimension (12), target :: myarray5 +integer, dimension (12), target :: myarray6 +integer, dimension (12), target :: myarray7 +integer, dimension (12), target :: myarray8 +integer, dimension (12), target :: myarray9 + +myu1%tarr1 => myarray1 +myu1%tarr2 => myarray2 +myu1%tarr3 => myarray3 +myu2%tarr1 => myarray4 +myu2%tarr2 => myarray5 +myu2%tarr3 => myarray6 +myu3%tarr1 => myarray7 +myu3%tarr2 => myarray8 +myu3%tarr3 => myarray9 + +myu1%tarr1 = 0 +myu1%tarr2 = 0 +myu1%tarr3 = 0 +myu2%tarr1 = 0 +myu2%tarr2 = 0 +myu2%tarr3 = 0 +myu3%tarr1 = 0 +myu3%tarr2 = 0 +myu3%tarr3 = 0 + +!$omp target map(to:myu1%tarr1) map(tofrom:myu1%tarr1(:)) & +!$omp& map(to:myu1%tarr2) map(tofrom:myu1%tarr2(:)) & +!$omp& map(to:myu1%tarr3) map(tofrom:myu1%tarr3(:)) & +!$omp& map(to:myu2%tarr1) map(tofrom:myu2%tarr1(:)) & +!$omp& map(to:myu2%tarr2) map(tofrom:myu2%tarr2(:)) & +!$omp& map(to:myu2%tarr3) map(tofrom:myu2%tarr3(:)) & +!$omp& map(to:myu3%tarr1) map(tofrom:myu3%tarr1(:)) & +!$omp& map(to:myu3%tarr2) map(tofrom:myu3%tarr2(:)) & +!$omp& map(to:myu3%tarr3) map(tofrom:myu3%tarr3(:)) +myu1%tarr1(1) = myu1%tarr1(1) + 1 +myu2%tarr1(1) = myu2%tarr1(1) + 1 +myu3%tarr1(1) = myu3%tarr1(1) + 1 +!$omp end target + +!$omp target map(to:myu1%tarr1) map(tofrom:myu1%tarr1(1:2)) & +!$omp& map(to:myu1%tarr2) map(tofrom:myu1%tarr2(1:2)) & +!$omp& map(to:myu1%tarr3) map(tofrom:myu1%tarr3(1:2)) & +!$omp& map(to:myu2%tarr1) map(tofrom:myu2%tarr1(1:2)) & +!$omp& map(to:myu2%tarr2) map(tofrom:myu2%tarr2(1:2)) & +!$omp& map(to:myu2%tarr3) map(tofrom:myu2%tarr3(1:2)) & +!$omp& map(to:myu3%tarr1) map(tofrom:myu3%tarr1(1:2)) & +!$omp& map(to:myu3%tarr2) map(tofrom:myu3%tarr2(1:2)) & +!$omp& map(to:myu3%tarr3) map(tofrom:myu3%tarr3(1:2)) +myu1%tarr2(1) = myu1%tarr2(1) + 1 +myu2%tarr2(1) = myu2%tarr2(1) + 1 +myu3%tarr2(1) = myu3%tarr2(1) + 1 +!$omp end target + +!$omp target map(to:myu1%tarr1) map(tofrom:myu1%tarr1(1)) & +!$omp& map(to:myu1%tarr2) map(tofrom:myu1%tarr2(1)) & +!$omp& map(to:myu1%tarr3) map(tofrom:myu1%tarr3(1)) & +!$omp& map(to:myu2%tarr1) map(tofrom:myu2%tarr1(1)) & +!$omp& map(to:myu2%tarr2) map(tofrom:myu2%tarr2(1)) & +!$omp& map(to:myu2%tarr3) map(tofrom:myu2%tarr3(1)) & +!$omp& map(to:myu3%tarr1) map(tofrom:myu3%tarr1(1)) & +!$omp& map(to:myu3%tarr2) map(tofrom:myu3%tarr2(1)) & +!$omp& map(to:myu3%tarr3) map(tofrom:myu3%tarr3(1)) +myu1%tarr3(1) = myu1%tarr3(1) + 1 +myu2%tarr3(1) = myu2%tarr3(1) + 1 +myu3%tarr3(1) = myu3%tarr3(1) + 1 +!$omp end target + +!$omp target map(tofrom:myu1%tarr1) & +!$omp& map(tofrom:myu1%tarr2) & +!$omp& map(tofrom:myu1%tarr3) & +!$omp& map(tofrom:myu2%tarr1) & +!$omp& map(tofrom:myu2%tarr2) & +!$omp& map(tofrom:myu2%tarr3) & +!$omp& map(tofrom:myu3%tarr1) & +!$omp& map(tofrom:myu3%tarr2) & +!$omp& map(tofrom:myu3%tarr3) +myu1%tarr2(1) = myu1%tarr2(1) + 1 +myu2%tarr2(1) = myu2%tarr2(1) + 1 +myu3%tarr2(1) = myu3%tarr2(1) + 1 +!$omp end target + +if (myu1%tarr1(1).ne.1) stop 1 +if (myu2%tarr1(1).ne.1) stop 2 +if (myu3%tarr1(1).ne.1) stop 3 +if (myu1%tarr2(1).ne.2) stop 4 +if (myu2%tarr2(1).ne.2) stop 5 +if (myu3%tarr2(1).ne.2) stop 6 +if (myu1%tarr3(1).ne.1) stop 7 +if (myu2%tarr3(1).ne.1) stop 8 +if (myu3%tarr3(1).ne.1) stop 9 + +end program myprog diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray.f90 new file mode 100644 index 00000000000..85f5af3a2a6 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subarray.f90 @@ -0,0 +1,33 @@ +! { dg-do run } + +program myprog +type u + integer, dimension (:), pointer :: tarr +end type u + +type(u) :: myu +integer, dimension (12), target :: myarray + +myu%tarr => myarray + +myu%tarr = 0 + +!$omp target map(to:myu%tarr) map(tofrom:myu%tarr(:)) +myu%tarr(1) = myu%tarr(1) + 1 +!$omp end target + +!$omp target map(to:myu%tarr) map(tofrom:myu%tarr(1:2)) +myu%tarr(1) = myu%tarr(1) + 1 +!$omp end target + +!$omp target map(to:myu%tarr) map(tofrom:myu%tarr(1)) +myu%tarr(1) = myu%tarr(1) + 1 +!$omp end target + +!$omp target map(tofrom:myu%tarr) +myu%tarr(1) = myu%tarr(1) + 1 +!$omp end target + +if (myu%tarr(1).ne.4) stop 1 + +end program myprog diff --git a/libgomp/testsuite/libgomp.fortran/map-subcomponents.f90 b/libgomp/testsuite/libgomp.fortran/map-subcomponents.f90 new file mode 100644 index 00000000000..4074a952dd1 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subcomponents.f90 @@ -0,0 +1,35 @@ +! { dg-do run } + +module mymod +type F +integer :: a, b, c +integer, dimension(10) :: d +end type F + +type G +integer :: x, y +type(F), pointer :: myf +integer :: z +end type G +end module mymod + +program myprog +use mymod + +type(F), target :: ftmp +type(G) :: gvar + +gvar%myf => ftmp + +gvar%myf%d = 0 + +!$omp target map(to:gvar%myf) map(tofrom: gvar%myf%b, gvar%myf%d) +gvar%myf%d(1) = gvar%myf%d(1) + 1 +!$omp end target + +if (gvar%myf%d(1).ne.1) stop 1 + +end program myprog + +! This is fixed by the address inspector/address tokenization patch. +! { dg-xfail-run-if TODO { offload_device_nonshared_as } } diff --git a/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 b/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 index 58550c79d69..f128ebcffc1 100644 --- a/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 @@ -229,7 +229,8 @@ contains ! !$omp target map(tofrom: var%d(4:7), var%f(2:3), var%str2(2:3)) & ! !$omp& map(tofrom: var%str4(2:2), var%uni2(2:3), var%uni4(2:2)) - !$omp target map(tofrom: var%d(4:7), var%f(2:3), var%str2(2:3), var%uni2(2:3)) + !$omp target map(to: var%f) map(tofrom: var%d(4:7), var%f(2:3), & + !$omp& var%str2(2:3), var%uni2(2:3)) if (any (var%d(4:7) /= [(-3*i, i = 4, 7)])) stop 4 if (any (var%str2(2:3) /= ["67890", "ABCDE"])) stop 6 @@ -274,7 +275,7 @@ contains if (any (var%str2(2:3) /= ["67890", "ABCDE"])) stop 6 !$omp end target - !$omp target map(tofrom: var%f(2:3)) + !$omp target map(to: var%f) map(tofrom: var%f(2:3)) if (.not. associated (var%f)) stop 9 if (size (var%f) /= 4) stop 10 if (any (var%f(2:3) /= [33, 44])) stop 11 @@ -314,7 +315,8 @@ contains ! !$omp target map(tofrom: var%d(5), var%f(3), var%str2(3), & ! !$omp var%str4(2), var%uni2(3), var%uni4(2)) - !$omp target map(tofrom: var%d(5), var%f(3), var%str2(3), var%uni2(3)) + !$omp target map(to: var%f) map(tofrom: var%d(5), var%f(3), & + !$omp& var%str2(3), var%uni2(3)) if (var%d(5) /= -3*5) stop 4 if (var%str2(3) /= "ABCDE") stop 6 if (var%uni2(3) /= 4_"ABCDE") stop 7 @@ -362,7 +364,7 @@ contains if (any (var%uni2(2:3) /= [4_"67890", 4_"ABCDE"])) stop 7 !$omp end target - !$omp target map(tofrom: var%f(2:3)) + !$omp target map(to: var%f) map(tofrom: var%f(2:3)) if (.not. associated (var%f)) stop 9 if (size (var%f) /= 4) stop 10 if (any (var%f(2:3) /= [33, 44])) stop 11 -- 2.29.2