diff --git a/libgomp/ChangeLog.gomp b/libgomp/ChangeLog.gomp index a2f1c31..791aa4c 100644 --- a/libgomp/ChangeLog.gomp +++ b/libgomp/ChangeLog.gomp @@ -1,3 +1,10 @@ +2015-12-06 James Norris + + * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start): + Handle Fortran deviceptr clause combination. + * testsuite/libgomp.oacc-fortran/deviceptr-1.f90: New test. + * testsuite/libgomp.oacc-fortran/declare-1.f90: Remove erroneous test. + 2015-12-05 Chung-Lin Tang * oacc-plugin.h (GOMP_PLUGIN_async_unmap_vars): Add int parameter. diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index a4b2c01..a606152 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -99,18 +99,37 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), thr = goacc_thread (); acc_dev = thr->dev; - for (i = 0; i < (signed)(mapnum - 1); i++) + for (i = 0; i < mapnum; i++) { unsigned short kind1 = kinds[i] & 0xff; - unsigned short kind2 = kinds[i+1] & 0xff; /* Handle Fortran deviceptr clause. */ - if ((kind1 == GOMP_MAP_FORCE_DEVICEPTR && kind2 == GOMP_MAP_POINTER) - && (sizes[i + 1] == 0) - && (hostaddrs[i] == *(void **)hostaddrs[i + 1])) + if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) { - kinds[i+1] = kinds[i]; - sizes[i+1] = sizeof (void *); + unsigned short kind2; + + if (i < (signed)mapnum - 1) + kind2 = kinds[i + 1] & 0xff; + else + kind2 = 0xffff; + + if (sizes[i] == sizeof (void *)) + continue; + + /* At this point, we're dealing with a Fortran deviceptr. + If the next element is not what we're expecting, then + this is an instance of where the deviceptr variable was + not used within the region and the pointer was removed + by the gimplifier. */ + if (kind2 == GOMP_MAP_POINTER + && sizes[i + 1] == 0 + && hostaddrs[i] == *(void **)hostaddrs[i + 1]) + { + kinds[i+1] = kinds[i]; + sizes[i+1] = sizeof (void *); + } + + /* Invalidate the entry. */ hostaddrs[i] = NULL; } } @@ -254,18 +273,38 @@ GOACC_data_start (int device, size_t mapnum, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; - for (i = 0; i < (signed)(mapnum - 1); i++) + for (i = 0; i < mapnum; i++) { unsigned short kind1 = kinds[i] & 0xff; - unsigned short kind2 = kinds[i+1] & 0xff; /* Handle Fortran deviceptr clause. */ - if ((kind1 == GOMP_MAP_FORCE_DEVICEPTR && kind2 == GOMP_MAP_POINTER) - && (sizes[i + 1] == 0) - && (hostaddrs[i] == *(void **)hostaddrs[i + 1])) + if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) { - kinds[i+1] = kinds[i]; - sizes[i+1] = sizeof (void *); + unsigned short kind2; + + if (i < (signed)mapnum - 1) + kind2 = kinds[i + 1] & 0xff; + else + kind2 = 0xffff; + + /* If the size is right, skip it. */ + if (sizes[i] == sizeof (void *)) + continue; + + /* At this point, we're dealing with a Fortran deviceptr. + If the next element is not what we're expecting, then + this is an instance of where the deviceptr variable was + not used within the region and the pointer was removed + by the gimplifier. */ + if (kind2 == GOMP_MAP_POINTER + && sizes[i + 1] == 0 + && hostaddrs[i] == *(void **)hostaddrs[i + 1]) + { + kinds[i+1] = kinds[i]; + sizes[i+1] = sizeof (void *); + } + + /* Invalidate the entry. */ hostaddrs[i] = NULL; } } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 index 430cd24..e781878 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 @@ -1,6 +1,4 @@ ! { dg-do run { target openacc_nvidia_accel_selected } } -! libgomp: cuStreamSynchronize error: an illegal memory access was encountered -! { dg-xfail-run-if "TODO" { *-*-* } } module vars implicit none @@ -8,24 +6,6 @@ module vars !$acc declare create (z) end module vars -subroutine subr6 (a, d) - implicit none - integer, parameter :: N = 8 - integer :: i - integer :: a(N) - !$acc declare deviceptr (a) - integer :: d(N) - - i = 0 - - !$acc parallel copy (d) - do i = 1, N - d(i) = a(i) + a(i) - end do - !$acc end parallel - -end subroutine - subroutine subr5 (a, b, c, d) implicit none integer, parameter :: N = 8 @@ -203,15 +183,6 @@ subroutine subr0 (a, b, c, d) if (d(i) .ne. 13) call abort end do - call subr6 (a, d) - - call test (a, .true.) - call test (d, .false.) - - do i = 1, N - if (d(i) .ne. 16) call abort - end do - end subroutine program main @@ -243,8 +214,7 @@ program main if (a(i) .ne. 8) call abort if (b(i) .ne. 8) call abort if (c(i) .ne. 8) call abort - if (d(i) .ne. 16) call abort + if (d(i) .ne. 13) call abort end do - end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 new file mode 100644 index 0000000..879cbf1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 @@ -0,0 +1,197 @@ +! { dg-do run } + +!! Test the deviceptr clause with various directives +!! and in combination with other directives where +!! the deviceptr variable is implied. + +subroutine subr1 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc data deviceptr (a) + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc end data + +end subroutine + +subroutine subr2 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + !$acc declare deviceptr (a) + integer :: b(N) + integer :: i = 0 + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 4 + b(i) = a(i) + end do + !$acc end parallel + +end subroutine + +subroutine subr3 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + !$acc declare deviceptr (a) + integer :: b(N) + integer :: i = 0 + + !$acc kernels copy (b) + do i = 1, N + a(i) = i * 8 + b(i) = a(i) + end do + !$acc end kernels + +end subroutine + +subroutine subr4 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc parallel deviceptr (a) copy (b) + do i = 1, N + a(i) = i * 16 + b(i) = a(i) + end do + !$acc end parallel + +end subroutine + +subroutine subr5 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc kernels deviceptr (a) copy (b) + do i = 1, N + a(i) = i * 32 + b(i) = a(i) + end do + !$acc end kernels + +end subroutine + +subroutine subr6 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc parallel deviceptr (a) copy (b) + do i = 1, N + b(i) = i + end do + !$acc end parallel + +end subroutine + +subroutine subr7 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc data deviceptr (a) + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc parallel copy (b) + do i = 1, N + a(i) = b(i) * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc end data + +end subroutine + +program main + use iso_c_binding, only: c_ptr, c_f_pointer + implicit none + type (c_ptr) :: cp + integer, parameter :: N = 8 + integer, pointer :: fp(:) + integer :: i = 0 + integer :: b(N) + + interface + function acc_malloc (s) bind (C) + use iso_c_binding, only: c_ptr, c_size_t + integer (c_size_t), value :: s + type (c_ptr) :: acc_malloc + end function + end interface + + cp = acc_malloc (N * sizeof (fp(N))) + call c_f_pointer (cp, fp, [N]) + + call subr1 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 2) call abort + end do + + call subr2 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 4) call abort + end do + + call subr3 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 8) call abort + end do + + call subr4 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 16) call abort + end do + + call subr5 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 32) call abort + end do + + call subr6 (fp, b) + + do i = 1, N + if (b(i) .ne. i) call abort + end do + + call subr7 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 4) call abort + end do + +end program main