Hi! On 2021-06-08T19:32:22+0200, I wrote: > Hi Chung-Lin! > > ;-) It's been a while: > > On 2018-09-10T23:04:18+0800, Chung-Lin Tang wrote: >> * testsuite/libgomp.oacc-c-c++-common/lib-94.c: New test. >> * testsuite/libgomp.oacc-c-c++-common/lib-95.c: New test. >> * testsuite/libgomp.oacc-fortran/lib-16.f90: New test. > > Do you happen to remember why in these testcases you're using the > following pattern: Apparently not ;-) -- no answer/objection, I've thus now pushed "Fix OpenACC 'async'/'wait' issues in 'libgomp.oacc-c-c++-common/lib-{94,95}.c', 'libgomp.oacc-fortran/lib-16{,-2}.f90'" to master branch in commit 599e275d7e0b3fb79ff704d4cb2d8fdb0231116e, see attached. Grüße Thomas >> --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c (nonexistent) >> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c (working copy) >> @@ -0,0 +1,42 @@ >> +/* { dg-do run } */ >> +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ >> + >> +#include >> +#include >> +#include >> + >> +int >> +main (int argc, char **argv) >> +{ >> + const int N = 256; >> + int i; >> + int async = 8; >> + unsigned char *h; >> + >> + h = (unsigned char *) malloc (N); >> + >> + for (i = 0; i < N; i++) >> + { >> + h[i] = i; >> + } >> + >> + acc_copyin_async (h, N, async); >> + >> + memset (h, 0, N); >> + >> + acc_wait (async); > > You first issue 'acc_copyin_async', then (while potentially that's still > accessing 'h') already 'memset' 'h' (potentially overwriting data that > 'acc_copyin_async' is still working on), and only then 'acc_wait'? > > My understanding of OpenACC would swap 'memset' and 'acc_wait', but maybe > you have a specific reason to do it in this way? > > In particular, the GCC nvptx offloading implementation "doesn't seem to > care" (as discussed elsewhere; 'OpenACC "ephemeral" asynchronous > host-to-device copies', etc.) -- but I suppose if you meant to test such > implementation traits here, you'd have commented that? > >> + >> + acc_copyout_async (h, N, async + 1); >> + >> + acc_wait (async + 1); >> + >> + for (i = 0; i < N; i++) >> + { >> + if (h[i] != i) >> + abort (); >> + } >> + >> + free (h); >> + >> + return 0; >> +} > >> --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c (nonexistent) >> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c (working copy) >> @@ -0,0 +1,45 @@ >> +/* { dg-do run } */ >> +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ >> + >> +#include >> +#include >> +#include >> + >> +int >> +main (int argc, char **argv) >> +{ >> + const int N = 256; >> + int i, q = 5; >> + unsigned char *h, *g; >> + void *d; >> + >> + h = (unsigned char *) malloc (N); >> + g = (unsigned char *) malloc (N); >> + for (i = 0; i < N; i++) >> + { >> + g[i] = i; >> + } >> + >> + acc_create_async (h, N, q); >> + >> + acc_memcpy_to_device_async (acc_deviceptr (h), g, N, q); >> + memset (&h[0], 0, N); >> + >> + acc_wait (q); > > Similar here. > >> + acc_update_self_async (h, N, q + 1); >> + acc_delete_async (h, N, q + 1); >> + >> + acc_wait (q + 1); >> + >> + for (i = 0; i < N; i++) >> + { >> + if (h[i] != i) >> + abort (); >> + } >> + >> + free (h); >> + free (g); >> + >> + return 0; >> +} > >> --- libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 (nonexistent) >> +++ libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 (working copy) > > (Later also similarly copied into 'libgomp.oacc-fortran/lib-16-2.f90'.) > > Similar: > >> @@ -0,0 +1,57 @@ >> +! { dg-do run } >> +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } >> + >> +program main >> + use openacc >> + implicit none >> + >> + integer, parameter :: N = 256 >> + integer, allocatable :: h(:) >> + integer :: i >> + integer :: async = 5 >> + >> + allocate (h(N)) >> + >> + do i = 1, N >> + h(i) = i >> + end do >> + >> + call acc_copyin (h) >> + >> + do i = 1, N >> + h(i) = i + i >> + end do >> + >> + call acc_update_device_async (h, sizeof (h), async) >> + >> + if (acc_is_present (h) .neqv. .TRUE.) call abort > > Don't we need 'acc_wait' here (while 'acc_update_device_async' may still > be reading from 'h'), before overwriting 'h' here: > >> + >> + h(:) = 0 >> + >> + call acc_copyout_async (h, sizeof (h), async) >> + >> + call acc_wait (async) >> + >> + do i = 1, N >> + if (h(i) /= i + i) call abort >> + end do >> + >> + call acc_copyin (h, sizeof (h)) >> + >> + h(:) = 0 >> + >> + call acc_update_self_async (h, sizeof (h), async) >> + >> + if (acc_is_present (h) .neqv. .TRUE.) call abort > > Don't we need 'acc_wait' here (to make sure we finish device to host copy > of 'h'), before evaluating 'h' here: > >> + >> + do i = 1, N >> + if (h(i) /= i + i) call abort >> + end do >> + >> + call acc_delete_async (h, async) >> + >> + call acc_wait (async) >> + >> + if (acc_is_present (h) .neqv. .FALSE.) call abort >> + >> +end program > > Julian has patches for most of these (as part of other commits). ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955