From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 125573 invoked by alias); 6 Dec 2015 14:52:54 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 125555 invoked by uid 89); 6 Dec 2015 14:52:53 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.1 required=5.0 tests=AWL,BAYES_00,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 X-Spam-User: qpsmtpd, 2 recipients X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Sun, 06 Dec 2015 14:52:51 +0000 Received: from svr-orw-fem-06.mgc.mentorg.com ([147.34.97.120]) by relay1.mentorg.com with esmtp id 1a5agC-0001rr-0g from James_Norris@mentor.com ; Sun, 06 Dec 2015 06:52:48 -0800 Received: from [172.30.80.179] (147.34.91.1) by SVR-ORW-FEM-06.mgc.mentorg.com (147.34.97.120) with Microsoft SMTP Server id 14.3.224.2; Sun, 6 Dec 2015 06:52:45 -0800 Message-ID: <56644BBC.1050602@codesourcery.com> Date: Sun, 06 Dec 2015 14:52:00 -0000 From: James Norris User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.7.0 MIME-Version: 1.0 To: GCC Patches CC: , Thomas Schwinge Subject: [gomp4] Fix Fortran deviceptr Content-Type: multipart/mixed; boundary="------------080900000501070509000503" X-SW-Source: 2015-12/txt/msg00676.txt.bz2 --------------080900000501070509000503 Content-Type: text/plain; charset="utf-8"; format=flowed Content-Transfer-Encoding: 7bit Content-length: 291 Hi, This patch fixes a some runtime issues when dealing with the deviceptr clause in Fortran. There were some corner cases that were not being dealt with correctly, and the patch resolves these. Also a new set of test cases has been added. I've applied this patch to gomp-4_0-branch. Jim --------------080900000501070509000503 Content-Type: text/x-patch; name="deviceptr.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="deviceptr.patch" Content-length: 8722 diff --git a/libgomp/ChangeLog.gomp b/libgomp/ChangeLog.gomp index a2f1c31..791aa4c 100644 --- a/libgomp/ChangeLog.gomp +++ b/libgomp/ChangeLog.gomp @@ -1,3 +1,10 @@ +2015-12-06 James Norris + + * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start): + Handle Fortran deviceptr clause combination. + * testsuite/libgomp.oacc-fortran/deviceptr-1.f90: New test. + * testsuite/libgomp.oacc-fortran/declare-1.f90: Remove erroneous test. + 2015-12-05 Chung-Lin Tang * oacc-plugin.h (GOMP_PLUGIN_async_unmap_vars): Add int parameter. diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index a4b2c01..a606152 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -99,18 +99,37 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), thr = goacc_thread (); acc_dev = thr->dev; - for (i = 0; i < (signed)(mapnum - 1); i++) + for (i = 0; i < mapnum; i++) { unsigned short kind1 = kinds[i] & 0xff; - unsigned short kind2 = kinds[i+1] & 0xff; /* Handle Fortran deviceptr clause. */ - if ((kind1 == GOMP_MAP_FORCE_DEVICEPTR && kind2 == GOMP_MAP_POINTER) - && (sizes[i + 1] == 0) - && (hostaddrs[i] == *(void **)hostaddrs[i + 1])) + if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) { - kinds[i+1] = kinds[i]; - sizes[i+1] = sizeof (void *); + unsigned short kind2; + + if (i < (signed)mapnum - 1) + kind2 = kinds[i + 1] & 0xff; + else + kind2 = 0xffff; + + if (sizes[i] == sizeof (void *)) + continue; + + /* At this point, we're dealing with a Fortran deviceptr. + If the next element is not what we're expecting, then + this is an instance of where the deviceptr variable was + not used within the region and the pointer was removed + by the gimplifier. */ + if (kind2 == GOMP_MAP_POINTER + && sizes[i + 1] == 0 + && hostaddrs[i] == *(void **)hostaddrs[i + 1]) + { + kinds[i+1] = kinds[i]; + sizes[i+1] = sizeof (void *); + } + + /* Invalidate the entry. */ hostaddrs[i] = NULL; } } @@ -254,18 +273,38 @@ GOACC_data_start (int device, size_t mapnum, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; - for (i = 0; i < (signed)(mapnum - 1); i++) + for (i = 0; i < mapnum; i++) { unsigned short kind1 = kinds[i] & 0xff; - unsigned short kind2 = kinds[i+1] & 0xff; /* Handle Fortran deviceptr clause. */ - if ((kind1 == GOMP_MAP_FORCE_DEVICEPTR && kind2 == GOMP_MAP_POINTER) - && (sizes[i + 1] == 0) - && (hostaddrs[i] == *(void **)hostaddrs[i + 1])) + if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) { - kinds[i+1] = kinds[i]; - sizes[i+1] = sizeof (void *); + unsigned short kind2; + + if (i < (signed)mapnum - 1) + kind2 = kinds[i + 1] & 0xff; + else + kind2 = 0xffff; + + /* If the size is right, skip it. */ + if (sizes[i] == sizeof (void *)) + continue; + + /* At this point, we're dealing with a Fortran deviceptr. + If the next element is not what we're expecting, then + this is an instance of where the deviceptr variable was + not used within the region and the pointer was removed + by the gimplifier. */ + if (kind2 == GOMP_MAP_POINTER + && sizes[i + 1] == 0 + && hostaddrs[i] == *(void **)hostaddrs[i + 1]) + { + kinds[i+1] = kinds[i]; + sizes[i+1] = sizeof (void *); + } + + /* Invalidate the entry. */ hostaddrs[i] = NULL; } } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 index 430cd24..e781878 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 @@ -1,6 +1,4 @@ ! { dg-do run { target openacc_nvidia_accel_selected } } -! libgomp: cuStreamSynchronize error: an illegal memory access was encountered -! { dg-xfail-run-if "TODO" { *-*-* } } module vars implicit none @@ -8,24 +6,6 @@ module vars !$acc declare create (z) end module vars -subroutine subr6 (a, d) - implicit none - integer, parameter :: N = 8 - integer :: i - integer :: a(N) - !$acc declare deviceptr (a) - integer :: d(N) - - i = 0 - - !$acc parallel copy (d) - do i = 1, N - d(i) = a(i) + a(i) - end do - !$acc end parallel - -end subroutine - subroutine subr5 (a, b, c, d) implicit none integer, parameter :: N = 8 @@ -203,15 +183,6 @@ subroutine subr0 (a, b, c, d) if (d(i) .ne. 13) call abort end do - call subr6 (a, d) - - call test (a, .true.) - call test (d, .false.) - - do i = 1, N - if (d(i) .ne. 16) call abort - end do - end subroutine program main @@ -243,8 +214,7 @@ program main if (a(i) .ne. 8) call abort if (b(i) .ne. 8) call abort if (c(i) .ne. 8) call abort - if (d(i) .ne. 16) call abort + if (d(i) .ne. 13) call abort end do - end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 new file mode 100644 index 0000000..879cbf1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 @@ -0,0 +1,197 @@ +! { dg-do run } + +!! Test the deviceptr clause with various directives +!! and in combination with other directives where +!! the deviceptr variable is implied. + +subroutine subr1 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc data deviceptr (a) + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc end data + +end subroutine + +subroutine subr2 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + !$acc declare deviceptr (a) + integer :: b(N) + integer :: i = 0 + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 4 + b(i) = a(i) + end do + !$acc end parallel + +end subroutine + +subroutine subr3 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + !$acc declare deviceptr (a) + integer :: b(N) + integer :: i = 0 + + !$acc kernels copy (b) + do i = 1, N + a(i) = i * 8 + b(i) = a(i) + end do + !$acc end kernels + +end subroutine + +subroutine subr4 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc parallel deviceptr (a) copy (b) + do i = 1, N + a(i) = i * 16 + b(i) = a(i) + end do + !$acc end parallel + +end subroutine + +subroutine subr5 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc kernels deviceptr (a) copy (b) + do i = 1, N + a(i) = i * 32 + b(i) = a(i) + end do + !$acc end kernels + +end subroutine + +subroutine subr6 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc parallel deviceptr (a) copy (b) + do i = 1, N + b(i) = i + end do + !$acc end parallel + +end subroutine + +subroutine subr7 (a, b) + implicit none + integer, parameter :: N = 8 + integer :: a(N) + integer :: b(N) + integer :: i = 0 + + !$acc data deviceptr (a) + + !$acc parallel copy (b) + do i = 1, N + a(i) = i * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc parallel copy (b) + do i = 1, N + a(i) = b(i) * 2 + b(i) = a(i) + end do + !$acc end parallel + + !$acc end data + +end subroutine + +program main + use iso_c_binding, only: c_ptr, c_f_pointer + implicit none + type (c_ptr) :: cp + integer, parameter :: N = 8 + integer, pointer :: fp(:) + integer :: i = 0 + integer :: b(N) + + interface + function acc_malloc (s) bind (C) + use iso_c_binding, only: c_ptr, c_size_t + integer (c_size_t), value :: s + type (c_ptr) :: acc_malloc + end function + end interface + + cp = acc_malloc (N * sizeof (fp(N))) + call c_f_pointer (cp, fp, [N]) + + call subr1 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 2) call abort + end do + + call subr2 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 4) call abort + end do + + call subr3 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 8) call abort + end do + + call subr4 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 16) call abort + end do + + call subr5 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 32) call abort + end do + + call subr6 (fp, b) + + do i = 1, N + if (b(i) .ne. i) call abort + end do + + call subr7 (fp, b) + + do i = 1, N + if (b(i) .ne. i * 4) call abort + end do + +end program main --------------080900000501070509000503--