From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 105334 invoked by alias); 12 Jan 2019 22:21:22 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 105242 invoked by uid 89); 12 Jan 2019 22:21:21 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.2 required=5.0 tests=BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_SOFTFAIL autolearn=ham version=3.3.2 spammy=HX-detected-operating-system:timestamps, m*, exercise, ij X-HELO: eggs.gnu.org Received: from eggs.gnu.org (HELO eggs.gnu.org) (209.51.188.92) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Sat, 12 Jan 2019 22:21:17 +0000 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1giReU-0007Fb-L4 for gcc-patches@gcc.gnu.org; Sat, 12 Jan 2019 17:21:15 -0500 Received: from mx2.suse.de ([195.135.220.15]:52526 helo=mx1.suse.de) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1giReU-0007EZ-9q for gcc-patches@gcc.gnu.org; Sat, 12 Jan 2019 17:21:14 -0500 Received: from relay1.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 33406AE7A; Sat, 12 Jan 2019 22:21:11 +0000 (UTC) From: Tom de Vries To: gcc-patches@gcc.gnu.org Cc: Thomas Schwinge Subject: [PATCH 9/9] [nvptx] Enable setting vector length using -fopenacc-dim -- testcases Date: Sat, 12 Jan 2019 22:21:00 -0000 Message-Id: <20190112222131.29519-10-tdevries@suse.de> In-Reply-To: <20190112222131.29519-1-tdevries@suse.de> References: <20190112222131.29519-1-tdevries@suse.de> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x (no timestamps) [generic] X-Received-From: 195.135.220.15 X-IsSubscribed: yes X-SW-Source: 2019-01/txt/msg00716.txt.bz2 Add some test-cases that set vector length using -fopenacc-dim. 2019-01-12 Tom de Vries * testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test. * testsuite/libgomp.oacc-fortran/gemm-2.f90: New test. --- .../libgomp.oacc-c-c++-common/pr85486-2.c | 52 ++++++++++++++ .../vector-length-128-2.c | 39 +++++++++++ .../vector-length-128-5.c | 41 +++++++++++ libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 | 80 ++++++++++++++++++++++ 4 files changed, 212 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c new file mode 100644 index 00000000000..f6ca263166d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c @@ -0,0 +1,52 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-fopenacc-dim=::128" } */ + +/* Minimized from ref-1.C. */ + +#include + +#pragma acc routine vector +void __attribute__((noinline, noclone)) +Vector (int *ptr, int n, const int inc) +{ + #pragma acc loop vector + for (unsigned ix = 0; ix < n; ix++) + ptr[ix] += inc; +} + +int +main (void) +{ + const int n = 32, m=32; + + int ary[m][n]; + unsigned ix, iy; + + for (ix = m; ix--;) + for (iy = n; iy--;) + ary[ix][iy] = (1 << 16) + (ix << 8) + iy; + + int err = 0; + +#pragma acc parallel copy (ary) + { + Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16)); + } + + for (ix = m; ix--;) + for (iy = n; iy--;) + if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy)) + { + printf ("ary[%u][%u] = %x expected %x\n", + ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy)); + err++; + } + + if (err) + { + printf ("%d failed\n", err); + return 1; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c new file mode 100644 index 00000000000..8b5b2a4a92d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c @@ -0,0 +1,39 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-fopenacc-dim=::128" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) + { + a[i] = i % 3; + b[i] = i % 5; + } + +#pragma acc parallel copyin (a,b) copyout (c) + { +#pragma acc loop vector + for (unsigned int i = 0; i < n; i++) + c[i] = a[i] + b[i]; + } + + for (unsigned int i = 0; i < n; ++i) + if (c[i] != (i % 3) + (i % 5)) + abort (); + + return 0; +} + +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c new file mode 100644 index 00000000000..e60f1c28db4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c @@ -0,0 +1,41 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-fopenacc-dim=:2:128" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) + { + a[i] = i % 3; + b[i] = i % 5; + } + +#pragma acc parallel copyin (a,b) copyout (c) + { +#pragma acc loop worker + for (unsigned int i = 0; i < 4; i++) +#pragma acc loop vector + for (unsigned int j = 0; j < n / 4; j++) + c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j]; + } + + for (unsigned int i = 0; i < n; ++i) + if (c[i] != (i % 3) + (i % 5)) + abort (); + + return 0; +} + +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 new file mode 100644 index 00000000000..fe108732a5f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 @@ -0,0 +1,80 @@ +! Exercise three levels of parallelism using SGEMM from BLAS. + +! { dg-do run } +! { dg-additional-options "-fopenacc-dim=::128" } + +! Implicitly set vector_length to 128 using -fopenacc-dim. +subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c) + integer :: m, n, k + real :: alpha, beta + real :: a(k,*), b(k,*), c(m,*) + + integer :: i, j, l + real :: temp + + !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) firstprivate (temp) + do j = 1, n + !$acc loop + do i = 1, m + temp = 0.0 + !$acc loop reduction(+:temp) + do l = 1, k + temp = temp + a(l,i)*b(l,j) + end do + if(beta == 0.0) then + c(i,j) = alpha*temp + else + c(i,j) = alpha*temp + beta*c(i,j) + end if + end do + end do +end subroutine openacc_sgemm + +subroutine host_sgemm (m, n, k, alpha, a, b, beta, c) + integer :: m, n, k + real :: alpha, beta + real :: a(k,*), b(k,*), c(m,*) + + integer :: i, j, l + real :: temp + + do j = 1, n + do i = 1, m + temp = 0.0 + do l = 1, k + temp = temp + a(l,i)*b(l,j) + end do + if(beta == 0.0) then + c(i,j) = alpha*temp + else + c(i,j) = alpha*temp + beta*c(i,j) + end if + end do + end do +end subroutine host_sgemm + +program main + integer, parameter :: M = 100, N = 50, K = 2000 + real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N) + real alpha, beta + integer i, j + + a(:,:) = 1.0 + b(:,:) = 0.25 + + c(:,:) = 0.0 + d(:,:) = 0.0 + e(:,:) = 0.0 + + alpha = 1.05 + beta = 1.25 + + call openacc_sgemm (M, N, K, alpha, a, b, beta, c) + call host_sgemm (M, N, K, alpha, a, b, beta, e) + + do i = 1, m + do j = 1, n + if (c(i,j) /= e(i,j)) call abort + end do + end do +end program main -- 2.16.4