From f6ce1e77bbf5d3a096f52e674bfd7354c6537d10 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 28 Oct 2022 15:06:45 +0200 Subject: [PATCH] Support OpenACC 'declare create' with Fortran allocatable arrays, part II [PR106643, PR96668] PR libgomp/106643 PR fortran/96668 libgomp/ * oacc-mem.c (goacc_enter_data_internal): Support OpenACC 'declare create' with Fortran allocatable arrays, part II. * testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90: Adjust. * testsuite/libgomp.oacc-fortran/pr106643-1.f90: New. --- libgomp/oacc-mem.c | 15 +++- ...locatable-array_descriptor-1-directive.f90 | 90 +++++++++++++------ .../libgomp.oacc-fortran/pr106643-1.f90 | 83 +++++++++++++++++ 3 files changed, 160 insertions(+), 28 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/pr106643-1.f90 diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index ba010fddbb3..233fe0e4c1d 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -1166,7 +1166,10 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, struct target_mem_desc *tgt = n->tgt; - /* Arrange so that OpenACC 'declare' code à la PR106643 + /* Minimal OpenACC variant corresponding to PR96668 + "[OpenMP] Re-mapping allocated but previously unallocated + allocatable does not work" 'libgomp/target.c' changes, so that + OpenACC 'declare' code à la PR106643 "[gfortran + OpenACC] Allocate in module causes refcount error" has a chance to work. */ if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET @@ -1181,6 +1184,16 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, assert ((kinds[i + k] & 0xff) == GOMP_MAP_POINTER); } + /* Let 'goacc_map_vars' -> 'gomp_map_vars_internal' handle + this. */ + gomp_mutex_unlock (&acc_dev->lock); + struct target_mem_desc *tgt_ + = goacc_map_vars (acc_dev, aq, groupnum, &hostaddrs[i], NULL, + &sizes[i], &kinds[i], true, + GOMP_MAP_VARS_ENTER_DATA); + assert (tgt_ == NULL); + gomp_mutex_lock (&acc_dev->lock); + /* Given that 'goacc_exit_data_internal'/'goacc_exit_datum_1' will always see 'n->refcount == REFCOUNT_INFINITY', there's no need to adjust 'n->dynamic_refcount' here. */ diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 index 10e1d5bc378..6604f72c5c1 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-allocatable-array_descriptor-1-directive.f90 @@ -105,27 +105,50 @@ program test !$acc enter data create (b) ! This is now OpenACC "present": if (.not.acc_is_present (b)) error stop - ! This still has the initial array descriptor: + ! ..., and got the actual array descriptor installed: !$acc serial - call verify_initial + call verify_n1_allocated !$acc end serial do i = n1_lb, n1_ub b(i) = i - 1 end do - ! Verify that host-to-device copy doesn't touch the device-side (still - ! initial) array descriptor (but it does copy the array data). + ! In 'declare-allocatable-array_descriptor-1-runtime.f90', this does "verify + ! that host-to-device copy doesn't touch the device-side (still initial) + ! array descriptor (but it does copy the array data"). This is here not + ! applicable anymore, as we've already gotten the actual array descriptor + ! installed. Thus now verify that it does copy the array data. call acc_update_device (b) !$acc serial - call verify_initial + call verify_n1_allocated !$acc end serial b = 40 - ! Verify that device-to-host copy doesn't touch the host-side array - ! descriptor, doesn't copy out the device-side (still initial) array - ! descriptor (but it does copy the array data). + !$acc parallel copyout (id1_1) ! No data clause for 'b' (explicit or implicit): no 'GOMP_MAP_TO_PSET'. + call verify_n1_values (-1) + id1_1 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(from:id1_1\)$} 1 original } } + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + + !$acc parallel copy (b) copyout (id1_2) + ! As already present, 'copy (b)' doesn't copy; addend is still '-1'. + call verify_n1_values (-1) + id1_2 = 0 + !$acc end parallel + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc parallel map\(tofrom:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2\)$} 1 original } } + !TODO ..., but without an actual use of 'b', the gimplifier removes the + !TODO 'GOMP_MAP_TO_PSET': + ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_parallel map\(tofrom:MEM \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(from:id1_2 \[len: [0-9]+\]\)$} 1 gimple } } + + ! In 'declare-allocatable-array_descriptor-1-runtime.f90', this does "verify + ! that device-to-host copy doesn't touch the host-side array descriptor, + ! doesn't copy out the device-side (still initial) array descriptor (but it + ! does copy the array data)". This is here not applicable anymore, as we've + ! already gotten the actual array descriptor installed. Thus now verify that + ! it does copy the array data. call acc_update_self (b) call verify_n1_allocated @@ -142,11 +165,19 @@ program test ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_update map\(force_to:MEM \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } ! ..., but it's silently skipped in 'GOACC_update'. !$acc serial - call verify_initial + call verify_n1_allocated !$acc end serial b = 41 + !$acc parallel + call verify_n1_values (1) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n1_values (1) + !$acc end parallel + !$acc update self (b) self (id1_2) ! We do have 'GOMP_MAP_TO_PSET' here: ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc update map\(force_from:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(force_from:id1_2\);$} 1 original } } @@ -159,20 +190,9 @@ program test b(i) = b(i) + 2 end do - ! Now install the actual array descriptor, via a data clause for 'b' - ! (explicit or implicit): must get a 'GOMP_MAP_TO_PSET', which then in - ! 'gomp_map_vars_internal' is handled as 'declare target', and because of - ! '*(void **) hostaddrs[i] != NULL', we've got 'has_always_ptrset == true', - ! 'always_to_cnt == 1', and therefore 'gomp_map_vars_existing' does update - ! the 'GOMP_MAP_TO_PSET'. - !$acc serial present (b) copyin (id1_1) - call verify_initial - id1_1 = 0 - !$acc end serial - ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma acc serial map\(force_present:\*\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[len: [^\]]+\]\) map\(to:b \[pointer set, len: [0-9]+\]\) map\(alloc:\(integer\(kind=[0-9]+\)\[0:\] \* restrict\) b\.data \[pointer assign, bias: 0\]\) map\(to:id1_1\)$} 1 original } } - !TODO ..., but without an actual use of 'b', the gimplifier removes the - !TODO 'GOMP_MAP_TO_PSET': - ! { dg-final { scan-tree-dump-times {(?n)^ *#pragma omp target oacc_serial map\(force_present:MEM \[\(integer\(kind=[0-9]+\)\[0:\] \*\)[^\]]+\] \[len: [^\]]+\]\) map\(alloc:b\.data \[pointer assign, bias: 0\]\) map\(to:id1_1 \[len: [0-9]+\]\)$} 1 gimple } } + ! Now test that (potentially re-)installing the actual array descriptor is a + ! no-op, via a data clause for 'b' (explicit or implicit): must get a + ! 'GOMP_MAP_TO_PSET'. !$acc serial present (b) copyin (id1_2) call verify_n1_allocated !TODO Use of 'b': @@ -243,9 +263,9 @@ program test if (acc_is_present (b)) error stop !$acc enter data create (b) if (.not.acc_is_present (b)) error stop - ! This still has the previous (n1) array descriptor: + ! ..., and got the actual array descriptor installed: !$acc serial - call verify_n1_deallocated (.true.) + call verify_n2_allocated !$acc end serial do i = n2_lb, n2_ub @@ -254,11 +274,19 @@ program test call acc_update_device (b) !$acc serial - call verify_n1_deallocated (.true.) + call verify_n2_allocated !$acc end serial b = -40 + !$acc parallel + call verify_n2_values (20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (20) + !$acc end parallel + call acc_update_self (b) call verify_n2_allocated @@ -269,11 +297,19 @@ program test !$acc update device (b) !$acc serial - call verify_n1_deallocated (.true.) + call verify_n2_allocated !$acc end serial b = -41 + !$acc parallel + call verify_n2_values (-20) + !$acc end parallel + + !$acc parallel copy (b) + call verify_n2_values (-20) + !$acc end parallel + !$acc update self (b) call verify_n2_allocated diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr106643-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr106643-1.f90 new file mode 100644 index 00000000000..a9c969e3361 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr106643-1.f90 @@ -0,0 +1,83 @@ +! { dg-do run } +! { dg-additional-options -cpp } + + +!TODO OpenACC 'serial' vs. GCC/nvptx: +!TODO { dg-prune-output {using 'vector_length \(32\)', ignoring 1} } + + +module m_macron + + implicit none + + real(kind(0d0)), allocatable, dimension(:) :: valls + !$acc declare create(valls) + +contains + + subroutine s_macron_compute(size) + + integer :: size + + !$acc routine seq + +#if ACC_MEM_SHARED + if (valls(size) /= 1) error stop +#else + if (valls(size) /= size - 2) error stop +#endif + + valls(size) = size + 2 + + end subroutine s_macron_compute + + subroutine s_macron_init(size) + + integer :: size + + print*, "size=", size + + print*, "allocate(valls(1:size))" + allocate(valls(1:size)) + + print*, "acc enter data create(valls(1:size))" + !$acc enter data create(valls(1:size)) + + print*, "!$acc update device(valls(1:size))" + valls(size) = size - 2 + !$acc update device(valls(1:size)) + + valls(size) = 1 + + !$acc serial + call s_macron_compute(size) + !$acc end serial + + valls(size) = -1 + + !$acc update host(valls(1:size)) +#if ACC_MEM_SHARED + if (valls(size) /= -1) error stop +#else + if (valls(size) /= size + 2) error stop +#endif + + print*, valls(1:size) + + print*, "acc exit data delete(valls)" + !$acc exit data delete(valls) + + end subroutine s_macron_init + +end module m_macron + + +program p_main + + use m_macron + + implicit none + + call s_macron_init(10) + +end program p_main -- 2.35.1