* [PATCH 3/9] [nvptx] Enable large vectors -- test-cases
2019-01-12 22:21 [PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length Tom de Vries
@ 2019-01-12 22:21 ` Tom de Vries
2019-01-12 22:21 ` [PATCH 9/9] [nvptx] Enable setting vector length using -fopenacc-dim -- testcases Tom de Vries
` (7 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Tom de Vries @ 2019-01-12 22:21 UTC (permalink / raw)
To: gcc-patches; +Cc: Thomas Schwinge
Add various test-cases with vector length 128.
2018-12-17 Tom de Vries <tdevries@suse.de>
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c: New test.
---
.../vector-length-128-4.c | 40 +++++++++++++++++++++
.../vector-length-128-6.c | 41 ++++++++++++++++++++++
.../vector-length-128-7.c | 40 +++++++++++++++++++++
3 files changed, 121 insertions(+)
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c
new file mode 100644
index 00000000000..e5d1df09b8a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-4.c
@@ -0,0 +1,40 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+ for (unsigned int i = 0; i < n; ++i)
+ {
+ a[i] = i % 3;
+ b[i] = i % 5;
+ }
+
+#pragma acc parallel num_workers (2) vector_length (128) copyin (a,b) copyout (c)
+ {
+#pragma acc loop worker
+ for (unsigned int i = 0; i < 4; i++)
+#pragma acc loop vector
+ for (unsigned int j = 0; j < n / 4; j++)
+ c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
+ }
+
+ for (unsigned int i = 0; i < n; ++i)
+ if (c[i] != (i % 3) + (i % 5))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c
new file mode 100644
index 00000000000..a1f67622f84
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-6.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-set-target-env-var "GOMP_OPENACC_DIM" ":2:" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+ for (unsigned int i = 0; i < n; ++i)
+ {
+ a[i] = i % 3;
+ b[i] = i % 5;
+ }
+
+#pragma acc parallel vector_length (128) copyin (a,b) copyout (c)
+ {
+#pragma acc loop worker
+ for (unsigned int i = 0; i < 4; i++)
+#pragma acc loop vector
+ for (unsigned int j = 0; j < n / 4; j++)
+ c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
+ }
+
+ for (unsigned int i = 0; i < n; ++i)
+ if (c[i] != (i % 3) + (i % 5))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 0, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c
new file mode 100644
index 00000000000..c419f6499b5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-7.c
@@ -0,0 +1,40 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+ for (unsigned int i = 0; i < n; ++i)
+ {
+ a[i] = i % 3;
+ b[i] = i % 5;
+ }
+
+#pragma acc parallel vector_length (128) copyin (a,b) copyout (c)
+ {
+#pragma acc loop worker
+ for (unsigned int i = 0; i < 4; i++)
+#pragma acc loop vector
+ for (unsigned int j = 0; j < n / 4; j++)
+ c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
+ }
+
+ for (unsigned int i = 0; i < n; ++i)
+ if (c[i] != (i % 3) + (i % 5))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 0, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=8, vectors=128" } */
--
2.16.4
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 9/9] [nvptx] Enable setting vector length using -fopenacc-dim -- testcases
2019-01-12 22:21 [PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length Tom de Vries
2019-01-12 22:21 ` [PATCH 3/9] [nvptx] Enable large vectors -- test-cases Tom de Vries
@ 2019-01-12 22:21 ` Tom de Vries
2019-01-12 22:21 ` [PATCH 6/9] [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases Tom de Vries
` (6 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Tom de Vries @ 2019-01-12 22:21 UTC (permalink / raw)
To: gcc-patches; +Cc: Thomas Schwinge
Add some test-cases that set vector length using -fopenacc-dim.
2019-01-12 Tom de Vries <tdevries@suse.de>
* testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test.
* testsuite/libgomp.oacc-fortran/gemm-2.f90: New test.
---
.../libgomp.oacc-c-c++-common/pr85486-2.c | 52 ++++++++++++++
.../vector-length-128-2.c | 39 +++++++++++
.../vector-length-128-5.c | 41 +++++++++++
libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 | 80 ++++++++++++++++++++++
4 files changed, 212 insertions(+)
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
new file mode 100644
index 00000000000..f6ca263166d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=::128" } */
+
+/* Minimized from ref-1.C. */
+
+#include <stdio.h>
+
+#pragma acc routine vector
+void __attribute__((noinline, noclone))
+Vector (int *ptr, int n, const int inc)
+{
+ #pragma acc loop vector
+ for (unsigned ix = 0; ix < n; ix++)
+ ptr[ix] += inc;
+}
+
+int
+main (void)
+{
+ const int n = 32, m=32;
+
+ int ary[m][n];
+ unsigned ix, iy;
+
+ for (ix = m; ix--;)
+ for (iy = n; iy--;)
+ ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
+
+ int err = 0;
+
+#pragma acc parallel copy (ary)
+ {
+ Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
+ }
+
+ for (ix = m; ix--;)
+ for (iy = n; iy--;)
+ if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
+ {
+ printf ("ary[%u][%u] = %x expected %x\n",
+ ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
+ err++;
+ }
+
+ if (err)
+ {
+ printf ("%d failed\n", err);
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
new file mode 100644
index 00000000000..8b5b2a4a92d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=::128" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+ for (unsigned int i = 0; i < n; ++i)
+ {
+ a[i] = i % 3;
+ b[i] = i % 5;
+ }
+
+#pragma acc parallel copyin (a,b) copyout (c)
+ {
+#pragma acc loop vector
+ for (unsigned int i = 0; i < n; i++)
+ c[i] = a[i] + b[i];
+ }
+
+ for (unsigned int i = 0; i < n; ++i)
+ if (c[i] != (i % 3) + (i % 5))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c
new file mode 100644
index 00000000000..e60f1c28db4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=:2:128" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+ for (unsigned int i = 0; i < n; ++i)
+ {
+ a[i] = i % 3;
+ b[i] = i % 5;
+ }
+
+#pragma acc parallel copyin (a,b) copyout (c)
+ {
+#pragma acc loop worker
+ for (unsigned int i = 0; i < 4; i++)
+#pragma acc loop vector
+ for (unsigned int j = 0; j < n / 4; j++)
+ c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
+ }
+
+ for (unsigned int i = 0; i < n; ++i)
+ if (c[i] != (i % 3) + (i % 5))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90
new file mode 100644
index 00000000000..fe108732a5f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90
@@ -0,0 +1,80 @@
+! Exercise three levels of parallelism using SGEMM from BLAS.
+
+! { dg-do run }
+! { dg-additional-options "-fopenacc-dim=::128" }
+
+! Implicitly set vector_length to 128 using -fopenacc-dim.
+subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c)
+ integer :: m, n, k
+ real :: alpha, beta
+ real :: a(k,*), b(k,*), c(m,*)
+
+ integer :: i, j, l
+ real :: temp
+
+ !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) firstprivate (temp)
+ do j = 1, n
+ !$acc loop
+ do i = 1, m
+ temp = 0.0
+ !$acc loop reduction(+:temp)
+ do l = 1, k
+ temp = temp + a(l,i)*b(l,j)
+ end do
+ if(beta == 0.0) then
+ c(i,j) = alpha*temp
+ else
+ c(i,j) = alpha*temp + beta*c(i,j)
+ end if
+ end do
+ end do
+end subroutine openacc_sgemm
+
+subroutine host_sgemm (m, n, k, alpha, a, b, beta, c)
+ integer :: m, n, k
+ real :: alpha, beta
+ real :: a(k,*), b(k,*), c(m,*)
+
+ integer :: i, j, l
+ real :: temp
+
+ do j = 1, n
+ do i = 1, m
+ temp = 0.0
+ do l = 1, k
+ temp = temp + a(l,i)*b(l,j)
+ end do
+ if(beta == 0.0) then
+ c(i,j) = alpha*temp
+ else
+ c(i,j) = alpha*temp + beta*c(i,j)
+ end if
+ end do
+ end do
+end subroutine host_sgemm
+
+program main
+ integer, parameter :: M = 100, N = 50, K = 2000
+ real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N)
+ real alpha, beta
+ integer i, j
+
+ a(:,:) = 1.0
+ b(:,:) = 0.25
+
+ c(:,:) = 0.0
+ d(:,:) = 0.0
+ e(:,:) = 0.0
+
+ alpha = 1.05
+ beta = 1.25
+
+ call openacc_sgemm (M, N, K, alpha, a, b, beta, c)
+ call host_sgemm (M, N, K, alpha, a, b, beta, e)
+
+ do i = 1, m
+ do j = 1, n
+ if (c(i,j) /= e(i,j)) call abort
+ end do
+ end do
+end program main
--
2.16.4
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 6/9] [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases
2019-01-12 22:21 [PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length Tom de Vries
2019-01-12 22:21 ` [PATCH 3/9] [nvptx] Enable large vectors -- test-cases Tom de Vries
2019-01-12 22:21 ` [PATCH 9/9] [nvptx] Enable setting vector length using -fopenacc-dim -- testcases Tom de Vries
@ 2019-01-12 22:21 ` Tom de Vries
2020-10-30 16:16 ` Thomas Schwinge
2019-01-12 22:21 ` [PATCH 5/9] [nvptx] Don't emit barriers for empty loops " Tom de Vries
` (5 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: Tom de Vries @ 2019-01-12 22:21 UTC (permalink / raw)
To: gcc-patches; +Cc: Thomas Schwinge
Add test-cases for "[nvptx] Force vl32 if calling vector-partitionable
routines".
2018-12-17 Tom de Vries <tdevries@suse.de>
PR target/85486
* testsuite/libgomp.oacc-c-c++-common/pr85486-3.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr85486.c: New test.
---
.../libgomp.oacc-c-c++-common/pr85486-3.c | 54 ++++++++++++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/pr85486.c | 51 ++++++++++++++++++++
2 files changed, 105 insertions(+)
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
new file mode 100644
index 00000000000..a959b90c29a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
@@ -0,0 +1,54 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */
+
+/* Minimized from ref-1.C. */
+
+#include <stdio.h>
+
+#pragma acc routine vector
+void __attribute__((noinline, noclone))
+Vector (int *ptr, int n, const int inc)
+{
+ #pragma acc loop vector
+ for (unsigned ix = 0; ix < n; ix++)
+ ptr[ix] += inc;
+}
+
+int
+main (void)
+{
+ const int n = 32, m=32;
+
+ int ary[m][n];
+ unsigned ix, iy;
+
+ for (ix = m; ix--;)
+ for (iy = n; iy--;)
+ ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
+
+ int err = 0;
+
+#pragma acc parallel copy (ary)
+ {
+ Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
+ }
+
+ for (ix = m; ix--;)
+ for (iy = n; iy--;)
+ if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
+ {
+ printf ("ary[%u][%u] = %x expected %x\n",
+ ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
+ err++;
+ }
+
+ if (err)
+ {
+ printf ("%d failed\n", err);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
new file mode 100644
index 00000000000..99c08059d37
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+/* Minimized from ref-1.C. */
+
+#include <stdio.h>
+
+#pragma acc routine vector
+void __attribute__((noinline, noclone))
+Vector (int *ptr, int n, const int inc)
+{
+ #pragma acc loop vector
+ for (unsigned ix = 0; ix < n; ix++)
+ ptr[ix] += inc;
+}
+
+int
+main (void)
+{
+ const int n = 32, m=32;
+
+ int ary[m][n];
+ unsigned ix, iy;
+
+ for (ix = m; ix--;)
+ for (iy = n; iy--;)
+ ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
+
+ int err = 0;
+
+#pragma acc parallel copy (ary) vector_length (128) /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
+ {
+ Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
+ }
+
+ for (ix = m; ix--;)
+ for (iy = n; iy--;)
+ if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
+ {
+ printf ("ary[%u][%u] = %x expected %x\n",
+ ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
+ err++;
+ }
+
+ if (err)
+ {
+ printf ("%d failed\n", err);
+ return 1;
+ }
+
+ return 0;
+}
--
2.16.4
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 6/9] [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases
2019-01-12 22:21 ` [PATCH 6/9] [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases Tom de Vries
@ 2020-10-30 16:16 ` Thomas Schwinge
2020-10-30 16:32 ` Tom de Vries
0 siblings, 1 reply; 14+ messages in thread
From: Thomas Schwinge @ 2020-10-30 16:16 UTC (permalink / raw)
To: Tom de Vries; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 2710 bytes --]
Hi Tom!
While working on something completely different, I had to dig deeper, and
noticed a thing there, and deeper, and notice another thing, and deeper,
and noticed this other thing here... (So, business as usual...) ;-)
On 2019-01-12T23:21:28+0100, Tom de Vries <tdevries@suse.de> wrote:
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
> +#pragma acc routine vector
> +void __attribute__((noinline, noclone))
> +Vector (int *ptr, int n, const int inc)
> +{
> +#pragma acc parallel copy (ary) vector_length (128) /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
> + {
> + Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
This works as diagnosed/expected.
On 2019-01-12T23:21:31+0100, Tom de Vries <tdevries@suse.de> wrote:
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
> @@ -0,0 +1,52 @@
> +/* { dg-do run { target openacc_nvidia_accel_selected } } */
> +/* { dg-additional-options "-fopenacc-dim=::128" } */
Via '-fopenacc-dim', we here request a default 'vector_length(128)'.
> +#pragma acc parallel copy (ary)
> + {
> + Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
As above, 'vector_length(128)' must be demoted to 'vector_length(32)'
(and in fact, it is) -- but we're not getting a diagnostic for that. Is
this expected?
On 2019-01-12T23:21:28+0100, Tom de Vries <tdevries@suse.de> wrote:
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
> @@ -0,0 +1,54 @@
> +/* { dg-do run { target openacc_nvidia_accel_selected } } */
> +/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */
This testcase needs 'dg-additional-options "-fopenacc-dim=::-"' (or
similar), but support for that is still missing in master branch (I'm
working on porting over the corresponding patch), so this currently
defaults to 'vector_length(32)', and...
> +#pragma acc parallel copy (ary)
> + {
> + Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
... thus no diagnostic here, and...
> +/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
... we're in fact not seeing this diagnostic.
In addition to the (presumedly unexpected) missing diagnostic for
'-fopenacc-dim=::128' mentioned above -- OK to simplify and enhance the
testcases as attached, "Simplify and enhance
'libgomp.oacc-c-c++-common/pr85486*.c' [PR85486]"?
Grüße
Thomas
-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Simplify-and-enhance-libgomp.oacc-c-c-common-pr85486.patch --]
[-- Type: text/x-diff, Size: 5634 bytes --]
From b0f9199a17911966ee24ec27b23bfb7ed7846700 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Wed, 28 Oct 2020 10:56:20 +0100
Subject: [PATCH] Simplify and enhance 'libgomp.oacc-c-c++-common/pr85486*.c'
[PR85486]
Avoid code duplication, and better test what we expect to happen.
libgomp/
PR target/85486
* testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: Simplify and enhance.
* testsuite/libgomp.oacc-c-c++-common/pr85486-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/pr85486.c: Likewise.
---
.../libgomp.oacc-c-c++-common/pr85486-2.c | 53 ++----------------
.../libgomp.oacc-c-c++-common/pr85486-3.c | 55 ++-----------------
.../libgomp.oacc-c-c++-common/pr85486.c | 9 ++-
3 files changed, 20 insertions(+), 97 deletions(-)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
index f6ca263166d7..d45326488cd8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
@@ -1,52 +1,11 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=" } */
/* { dg-additional-options "-fopenacc-dim=::128" } */
-/* Minimized from ref-1.C. */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
-#include <stdio.h>
+#include "pr85486.c"
-#pragma acc routine vector
-void __attribute__((noinline, noclone))
-Vector (int *ptr, int n, const int inc)
-{
- #pragma acc loop vector
- for (unsigned ix = 0; ix < n; ix++)
- ptr[ix] += inc;
-}
-
-int
-main (void)
-{
- const int n = 32, m=32;
-
- int ary[m][n];
- unsigned ix, iy;
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
-
- int err = 0;
-
-#pragma acc parallel copy (ary)
- {
- Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
- }
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
- {
- printf ("ary[%u][%u] = %x expected %x\n",
- ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
- err++;
- }
-
- if (err)
- {
- printf ("%d failed\n", err);
- return 1;
- }
-
- return 0;
-}
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
index a959b90c29ad..33480a4ae682 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
@@ -1,54 +1,11 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=" } */
/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */
-/* Minimized from ref-1.C. */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
-#include <stdio.h>
+#include "pr85486.c"
-#pragma acc routine vector
-void __attribute__((noinline, noclone))
-Vector (int *ptr, int n, const int inc)
-{
- #pragma acc loop vector
- for (unsigned ix = 0; ix < n; ix++)
- ptr[ix] += inc;
-}
-
-int
-main (void)
-{
- const int n = 32, m=32;
-
- int ary[m][n];
- unsigned ix, iy;
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
-
- int err = 0;
-
-#pragma acc parallel copy (ary)
- {
- Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
- }
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
- {
- printf ("ary[%u][%u] = %x expected %x\n",
- ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
- err++;
- }
-
- if (err)
- {
- printf ("%d failed\n", err);
- return 1;
- }
-
- return 0;
-}
-
-/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
index 99c08059d37c..0d98b82f9932 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
@@ -1,4 +1,8 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=vector_length(128)" } */
+
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
/* Minimized from ref-1.C. */
@@ -27,7 +31,7 @@ main (void)
int err = 0;
-#pragma acc parallel copy (ary) vector_length (128) /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
+#pragma acc parallel copy (ary) VECTOR_LENGTH /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
{
Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
}
@@ -49,3 +53,6 @@ main (void)
return 0;
}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
--
2.17.1
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 6/9] [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases
2020-10-30 16:16 ` Thomas Schwinge
@ 2020-10-30 16:32 ` Tom de Vries
2020-11-02 13:47 ` Thomas Schwinge
0 siblings, 1 reply; 14+ messages in thread
From: Tom de Vries @ 2020-10-30 16:32 UTC (permalink / raw)
To: Thomas Schwinge; +Cc: gcc-patches
On 10/30/20 5:16 PM, Thomas Schwinge wrote:
> Hi Tom!
>
> While working on something completely different, I had to dig deeper, and
> noticed a thing there, and deeper, and notice another thing, and deeper,
> and noticed this other thing here... (So, business as usual...) ;-)
>
> On 2019-01-12T23:21:28+0100, Tom de Vries <tdevries@suse.de> wrote:
>> --- /dev/null
>> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
>
>> +#pragma acc routine vector
>> +void __attribute__((noinline, noclone))
>> +Vector (int *ptr, int n, const int inc)
>> +{
>
>> +#pragma acc parallel copy (ary) vector_length (128) /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
>> + {
>> + Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
>
> This works as diagnosed/expected.
>
> On 2019-01-12T23:21:31+0100, Tom de Vries <tdevries@suse.de> wrote:
>> --- /dev/null
>> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
>> @@ -0,0 +1,52 @@
>> +/* { dg-do run { target openacc_nvidia_accel_selected } } */
>> +/* { dg-additional-options "-fopenacc-dim=::128" } */
>
> Via '-fopenacc-dim', we here request a default 'vector_length(128)'.
>
>> +#pragma acc parallel copy (ary)
>> + {
>> + Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
>
> As above, 'vector_length(128)' must be demoted to 'vector_length(32)'
> (and in fact, it is) -- but we're not getting a diagnostic for that. Is
> this expected?
>
I think it would be good to have. I don't know whether it's implemented.
> On 2019-01-12T23:21:28+0100, Tom de Vries <tdevries@suse.de> wrote:
>> --- /dev/null
>> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
>> @@ -0,0 +1,54 @@
>> +/* { dg-do run { target openacc_nvidia_accel_selected } } */
>> +/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */
>
> This testcase needs 'dg-additional-options "-fopenacc-dim=::-"' (or
> similar), but support for that is still missing in master branch (I'm
> working on porting over the corresponding patch), so this currently
> defaults to 'vector_length(32)', and...
>
>> +#pragma acc parallel copy (ary)
>> + {
>> + Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
>
> ... thus no diagnostic here, and...
>
>> +/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
>
> ... we're in fact not seeing this diagnostic.
>
>
> In addition to the (presumedly unexpected) missing diagnostic for
> '-fopenacc-dim=::128' mentioned above -- OK to simplify and enhance the
> testcases as attached, "Simplify and enhance
> 'libgomp.oacc-c-c++-common/pr85486*.c' [PR85486]"?
>
Yep, looks good.
Thanks,
- Tom
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 6/9] [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases
2020-10-30 16:32 ` Tom de Vries
@ 2020-11-02 13:47 ` Thomas Schwinge
0 siblings, 0 replies; 14+ messages in thread
From: Thomas Schwinge @ 2020-11-02 13:47 UTC (permalink / raw)
To: Tom de Vries, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 839 bytes --]
Hi Tom!
On 2020-10-30T17:32:56+0100, Tom de Vries <tdevries@suse.de> wrote:
> On 10/30/20 5:16 PM, Thomas Schwinge wrote:
>> OK to simplify and enhance the
>> testcases as attached, "Simplify and enhance
>> 'libgomp.oacc-c-c++-common/pr85486*.c' [PR85486]"?
>
> Yep, looks good.
As posted, pushed "Simplify and enhance
'libgomp.oacc-c-c++-common/pr85486*.c' [PR85486]" to master branch in
commit 79680c1d5cd3d89c2e7423e20dc8a6e1d6dc8151, and backported to
releases/gcc-10 branch in commit
28aaad48d5aafde3e5f269864ba934c602011328, releases/gcc-9 branch in commit
8860822a91e2e90a5eae726a478cd5ffc0d1fbfa.
Grüße
Thomas
-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Simplify-and-enhance-libgomp.oacc-c-c-common-pr85486.patch --]
[-- Type: text/x-diff, Size: 5634 bytes --]
From 79680c1d5cd3d89c2e7423e20dc8a6e1d6dc8151 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Wed, 28 Oct 2020 10:56:20 +0100
Subject: [PATCH] Simplify and enhance 'libgomp.oacc-c-c++-common/pr85486*.c'
[PR85486]
Avoid code duplication, and better test what we expect to happen.
libgomp/
PR target/85486
* testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: Simplify and enhance.
* testsuite/libgomp.oacc-c-c++-common/pr85486-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/pr85486.c: Likewise.
---
.../libgomp.oacc-c-c++-common/pr85486-2.c | 53 ++----------------
.../libgomp.oacc-c-c++-common/pr85486-3.c | 55 ++-----------------
.../libgomp.oacc-c-c++-common/pr85486.c | 9 ++-
3 files changed, 20 insertions(+), 97 deletions(-)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
index f6ca263166d7..d45326488cd8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
@@ -1,52 +1,11 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=" } */
/* { dg-additional-options "-fopenacc-dim=::128" } */
-/* Minimized from ref-1.C. */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
-#include <stdio.h>
+#include "pr85486.c"
-#pragma acc routine vector
-void __attribute__((noinline, noclone))
-Vector (int *ptr, int n, const int inc)
-{
- #pragma acc loop vector
- for (unsigned ix = 0; ix < n; ix++)
- ptr[ix] += inc;
-}
-
-int
-main (void)
-{
- const int n = 32, m=32;
-
- int ary[m][n];
- unsigned ix, iy;
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
-
- int err = 0;
-
-#pragma acc parallel copy (ary)
- {
- Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
- }
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
- {
- printf ("ary[%u][%u] = %x expected %x\n",
- ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
- err++;
- }
-
- if (err)
- {
- printf ("%d failed\n", err);
- return 1;
- }
-
- return 0;
-}
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
index a959b90c29ad..33480a4ae682 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
@@ -1,54 +1,11 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=" } */
/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */
-/* Minimized from ref-1.C. */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
-#include <stdio.h>
+#include "pr85486.c"
-#pragma acc routine vector
-void __attribute__((noinline, noclone))
-Vector (int *ptr, int n, const int inc)
-{
- #pragma acc loop vector
- for (unsigned ix = 0; ix < n; ix++)
- ptr[ix] += inc;
-}
-
-int
-main (void)
-{
- const int n = 32, m=32;
-
- int ary[m][n];
- unsigned ix, iy;
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
-
- int err = 0;
-
-#pragma acc parallel copy (ary)
- {
- Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
- }
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
- {
- printf ("ary[%u][%u] = %x expected %x\n",
- ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
- err++;
- }
-
- if (err)
- {
- printf ("%d failed\n", err);
- return 1;
- }
-
- return 0;
-}
-
-/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
index 99c08059d37c..0d98b82f9932 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
@@ -1,4 +1,8 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=vector_length(128)" } */
+
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
/* Minimized from ref-1.C. */
@@ -27,7 +31,7 @@ main (void)
int err = 0;
-#pragma acc parallel copy (ary) vector_length (128) /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
+#pragma acc parallel copy (ary) VECTOR_LENGTH /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
{
Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
}
@@ -49,3 +53,6 @@ main (void)
return 0;
}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
--
2.17.1
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: 0001-Simplify-and-enhance-libgomp.oacc-c-c-common-pr8.g10.patch --]
[-- Type: text/x-diff, Size: 5704 bytes --]
From 28aaad48d5aafde3e5f269864ba934c602011328 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Wed, 28 Oct 2020 10:56:20 +0100
Subject: [PATCH] Simplify and enhance 'libgomp.oacc-c-c++-common/pr85486*.c'
[PR85486]
Avoid code duplication, and better test what we expect to happen.
libgomp/
PR target/85486
* testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: Simplify and enhance.
* testsuite/libgomp.oacc-c-c++-common/pr85486-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/pr85486.c: Likewise.
(cherry picked from commit 79680c1d5cd3d89c2e7423e20dc8a6e1d6dc8151)
---
.../libgomp.oacc-c-c++-common/pr85486-2.c | 53 ++----------------
.../libgomp.oacc-c-c++-common/pr85486-3.c | 55 ++-----------------
.../libgomp.oacc-c-c++-common/pr85486.c | 9 ++-
3 files changed, 20 insertions(+), 97 deletions(-)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
index f6ca263166d7..d45326488cd8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
@@ -1,52 +1,11 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=" } */
/* { dg-additional-options "-fopenacc-dim=::128" } */
-/* Minimized from ref-1.C. */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
-#include <stdio.h>
+#include "pr85486.c"
-#pragma acc routine vector
-void __attribute__((noinline, noclone))
-Vector (int *ptr, int n, const int inc)
-{
- #pragma acc loop vector
- for (unsigned ix = 0; ix < n; ix++)
- ptr[ix] += inc;
-}
-
-int
-main (void)
-{
- const int n = 32, m=32;
-
- int ary[m][n];
- unsigned ix, iy;
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
-
- int err = 0;
-
-#pragma acc parallel copy (ary)
- {
- Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
- }
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
- {
- printf ("ary[%u][%u] = %x expected %x\n",
- ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
- err++;
- }
-
- if (err)
- {
- printf ("%d failed\n", err);
- return 1;
- }
-
- return 0;
-}
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
index a959b90c29ad..33480a4ae682 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
@@ -1,54 +1,11 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=" } */
/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */
-/* Minimized from ref-1.C. */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
-#include <stdio.h>
+#include "pr85486.c"
-#pragma acc routine vector
-void __attribute__((noinline, noclone))
-Vector (int *ptr, int n, const int inc)
-{
- #pragma acc loop vector
- for (unsigned ix = 0; ix < n; ix++)
- ptr[ix] += inc;
-}
-
-int
-main (void)
-{
- const int n = 32, m=32;
-
- int ary[m][n];
- unsigned ix, iy;
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
-
- int err = 0;
-
-#pragma acc parallel copy (ary)
- {
- Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
- }
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
- {
- printf ("ary[%u][%u] = %x expected %x\n",
- ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
- err++;
- }
-
- if (err)
- {
- printf ("%d failed\n", err);
- return 1;
- }
-
- return 0;
-}
-
-/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
index 99c08059d37c..0d98b82f9932 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
@@ -1,4 +1,8 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=vector_length(128)" } */
+
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
/* Minimized from ref-1.C. */
@@ -27,7 +31,7 @@ main (void)
int err = 0;
-#pragma acc parallel copy (ary) vector_length (128) /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
+#pragma acc parallel copy (ary) VECTOR_LENGTH /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
{
Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
}
@@ -49,3 +53,6 @@ main (void)
return 0;
}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
--
2.17.1
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #4: 0001-Simplify-and-enhance-libgomp.oacc-c-c-common-pr85.g9.patch --]
[-- Type: text/x-diff, Size: 5704 bytes --]
From 8860822a91e2e90a5eae726a478cd5ffc0d1fbfa Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Wed, 28 Oct 2020 10:56:20 +0100
Subject: [PATCH] Simplify and enhance 'libgomp.oacc-c-c++-common/pr85486*.c'
[PR85486]
Avoid code duplication, and better test what we expect to happen.
libgomp/
PR target/85486
* testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: Simplify and enhance.
* testsuite/libgomp.oacc-c-c++-common/pr85486-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/pr85486.c: Likewise.
(cherry picked from commit 79680c1d5cd3d89c2e7423e20dc8a6e1d6dc8151)
---
.../libgomp.oacc-c-c++-common/pr85486-2.c | 53 ++----------------
.../libgomp.oacc-c-c++-common/pr85486-3.c | 55 ++-----------------
.../libgomp.oacc-c-c++-common/pr85486.c | 9 ++-
3 files changed, 20 insertions(+), 97 deletions(-)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
index f6ca263166d7..d45326488cd8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c
@@ -1,52 +1,11 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=" } */
/* { dg-additional-options "-fopenacc-dim=::128" } */
-/* Minimized from ref-1.C. */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
-#include <stdio.h>
+#include "pr85486.c"
-#pragma acc routine vector
-void __attribute__((noinline, noclone))
-Vector (int *ptr, int n, const int inc)
-{
- #pragma acc loop vector
- for (unsigned ix = 0; ix < n; ix++)
- ptr[ix] += inc;
-}
-
-int
-main (void)
-{
- const int n = 32, m=32;
-
- int ary[m][n];
- unsigned ix, iy;
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
-
- int err = 0;
-
-#pragma acc parallel copy (ary)
- {
- Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
- }
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
- {
- printf ("ary[%u][%u] = %x expected %x\n",
- ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
- err++;
- }
-
- if (err)
- {
- printf ("%d failed\n", err);
- return 1;
- }
-
- return 0;
-}
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
index a959b90c29ad..33480a4ae682 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-3.c
@@ -1,54 +1,11 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=" } */
/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "::128" } */
-/* Minimized from ref-1.C. */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
-#include <stdio.h>
+#include "pr85486.c"
-#pragma acc routine vector
-void __attribute__((noinline, noclone))
-Vector (int *ptr, int n, const int inc)
-{
- #pragma acc loop vector
- for (unsigned ix = 0; ix < n; ix++)
- ptr[ix] += inc;
-}
-
-int
-main (void)
-{
- const int n = 32, m=32;
-
- int ary[m][n];
- unsigned ix, iy;
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
-
- int err = 0;
-
-#pragma acc parallel copy (ary)
- {
- Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
- }
-
- for (ix = m; ix--;)
- for (iy = n; iy--;)
- if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
- {
- printf ("ary[%u][%u] = %x expected %x\n",
- ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
- err++;
- }
-
- if (err)
- {
- printf ("%d failed\n", err);
- return 1;
- }
-
- return 0;
-}
-
-/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
index 99c08059d37c..0d98b82f9932 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486.c
@@ -1,4 +1,8 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-DVECTOR_LENGTH=vector_length(128)" } */
+
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
/* Minimized from ref-1.C. */
@@ -27,7 +31,7 @@ main (void)
int err = 0;
-#pragma acc parallel copy (ary) vector_length (128) /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
+#pragma acc parallel copy (ary) VECTOR_LENGTH /* { dg-warning "using vector_length \\(32\\) due to call to vector-partitionable routine, ignoring 128" } */
{
Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
}
@@ -49,3 +53,6 @@ main (void)
return 0;
}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
--
2.17.1
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 5/9] [nvptx] Don't emit barriers for empty loops -- test-cases
2019-01-12 22:21 [PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length Tom de Vries
` (2 preceding siblings ...)
2019-01-12 22:21 ` [PATCH 6/9] [nvptx] Force vl32 if calling vector-partitionable routines -- test-cases Tom de Vries
@ 2019-01-12 22:21 ` Tom de Vries
2019-01-12 22:21 ` [PATCH 7/9] [nvptx] Add vector_length 64 test-cases Tom de Vries
` (4 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Tom de Vries @ 2019-01-12 22:21 UTC (permalink / raw)
To: gcc-patches; +Cc: Thomas Schwinge
Add test-cases for PR85381.
2018-12-17 Tom de Vries <tdevries@suse.de>
PR target/85381
* testsuite/libgomp.oacc-c-c++-common/pr85381-5.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr85381.c: New test.
---
.../libgomp.oacc-c-c++-common/pr85381-5.c | 24 ++++++++++++++++++++++
.../testsuite/libgomp.oacc-c-c++-common/pr85381.c | 18 ++++++++++++++++
2 files changed, 42 insertions(+)
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c
new file mode 100644
index 00000000000..61e7e48f0c9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-5.c
@@ -0,0 +1,24 @@
+/* { dg-additional-options "-save-temps" } */
+/* { dg-do run { target openacc_nvidia_accel_selected } }
+ { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+
+#define n 1024
+
+int
+main (void)
+{
+ #pragma acc parallel vector_length(128)
+ {
+ #pragma acc loop vector
+ for (int i = 0; i < n; i++)
+ ;
+
+ #pragma acc loop vector
+ for (int i = 0; i < n; i++)
+ ;
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "bar.sync" } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c
new file mode 100644
index 00000000000..2864dfcf3cb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381.c
@@ -0,0 +1,18 @@
+/* { dg-additional-options "-save-temps" } */
+/* { dg-do run { target openacc_nvidia_accel_selected } }
+ { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+
+int
+main (void)
+{
+ int v1;
+
+ #pragma acc parallel vector_length (128)
+ #pragma acc loop vector
+ for (v1 = 0; v1 < 20; v1 += 2)
+ ;
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "bar.sync" } } */
--
2.16.4
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 7/9] [nvptx] Add vector_length 64 test-cases
2019-01-12 22:21 [PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length Tom de Vries
` (3 preceding siblings ...)
2019-01-12 22:21 ` [PATCH 5/9] [nvptx] Don't emit barriers for empty loops " Tom de Vries
@ 2019-01-12 22:21 ` Tom de Vries
2019-01-12 22:21 ` [PATCH 4/9] [nvptx] Enable large vectors -- reduction testcases Tom de Vries
` (3 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Tom de Vries @ 2019-01-12 22:21 UTC (permalink / raw)
To: gcc-patches; +Cc: Thomas Schwinge
Add some test-cases using vector_length 64.
2019-01-10 Tom de Vries <tdevries@suse.de>
* testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c: New test.
---
.../libgomp.oacc-c-c++-common/vector-length-64-1.c | 17 +++++++++++++++++
.../libgomp.oacc-c-c++-common/vector-length-64-2.c | 21 +++++++++++++++++++++
.../libgomp.oacc-c-c++-common/vector-length-64-3.c | 17 +++++++++++++++++
3 files changed, 55 insertions(+)
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c
new file mode 100644
index 00000000000..b6ee732f863
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-1.c
@@ -0,0 +1,17 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+int
+main (void)
+{
+#pragma acc parallel vector_length (64) num_workers (16) /* { dg-warning "using num_workers \\(15\\), ignoring 16" "" { target openacc_nvidia_accel_configured } } */
+ {
+#pragma acc loop worker
+ for (unsigned int i = 0; i < 32; i++)
+#pragma acc loop vector
+ for (unsigned int j = 0; j < 64; j++)
+ ;
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c
new file mode 100644
index 00000000000..4dfbae8de91
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-2.c
@@ -0,0 +1,21 @@
+/* { dg-set-target-env-var "GOMP_OPENACC_DIM" ":16:" } */
+/* { dg-shouldfail "" { openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int
+main (void)
+{
+#pragma acc parallel vector_length (64)
+ {
+#pragma acc loop worker
+ for (unsigned int i = 0; i < 32; i++)
+#pragma acc loop vector
+ for (unsigned int j = 0; j < 64; j++)
+ ;
+ }
+
+ return 0;
+}
+/* { dg-output "The Nvidia accelerator has insufficient barrier resources" { target openacc_nvidia_accel_selected } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c
new file mode 100644
index 00000000000..1acb40e8357
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-64-3.c
@@ -0,0 +1,17 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+int
+main (void)
+{
+#pragma acc parallel vector_length (64)
+ {
+#pragma acc loop worker
+ for (unsigned int i = 0; i < 32; i++)
+#pragma acc loop vector
+ for (unsigned int j = 0; j < 64; j++)
+ ;
+ }
+
+ return 0;
+}
--
2.16.4
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 4/9] [nvptx] Enable large vectors -- reduction testcases
2019-01-12 22:21 [PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length Tom de Vries
` (4 preceding siblings ...)
2019-01-12 22:21 ` [PATCH 7/9] [nvptx] Add vector_length 64 test-cases Tom de Vries
@ 2019-01-12 22:21 ` Tom de Vries
2019-01-12 22:21 ` [PATCH 2/9] [nvptx] Update insufficient launch message for variable vector_length Tom de Vries
` (2 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Tom de Vries @ 2019-01-12 22:21 UTC (permalink / raw)
To: gcc-patches; +Cc: Thomas Schwinge
Add various reduction test-cases with vector length 128.
2018-12-17 Tom de Vries <tdevries@suse.de>
* testsuite/libgomp.oacc-c-c++-common/vred2d-128.c: New test.
* testsuite/libgomp.oacc-fortran/gemm.f90: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c: New test.
---
.../vector-length-128-10.c | 39 +++++++++++
.../libgomp.oacc-c-c++-common/vred2d-128.c | 55 +++++++++++++++
libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 | 79 ++++++++++++++++++++++
3 files changed, 173 insertions(+)
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm.f90
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c
new file mode 100644
index 00000000000..0658cfde7ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-10.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+ for (unsigned int i = 0; i < n; ++i)
+ {
+ a[i] = i % 3;
+ b[i] = i % 5;
+ }
+
+ unsigned int res = 1;
+ unsigned long long res2 = 1;
+#pragma acc parallel vector_length (128) copyin (a,b) reduction (+:res, res2) copy (res, res2)
+ {
+#pragma acc loop vector reduction (+:res, res2)
+ for (unsigned int i = 0; i < n; i++)
+ {
+ res += ((a[i] + b[i]) % 2);
+ res2 += ((a[i] + b[i]) % 2);
+ }
+ }
+
+ if (res != 478)
+ abort ();
+ if (res2 != 478)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
new file mode 100644
index 00000000000..86171d456e0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vred2d-128.c
@@ -0,0 +1,55 @@
+/* Test large vector lengths. */
+
+#include <assert.h>
+
+#define n 10000
+int a1[n], a2[n];
+
+#define gentest(name, outer, inner) \
+ void name () \
+ { \
+ long i, j, t1, t2, t3; \
+ _Pragma(outer) \
+ for (i = 0; i < n; i++) \
+ { \
+ t1 = 0; \
+ t2 = 0; \
+ _Pragma(inner) \
+ for (j = i; j < n; j++) \
+ { \
+ t1++; \
+ t2--; \
+ } \
+ a1[i] = t1; \
+ a2[i] = t2; \
+ } \
+ for (i = 0; i < n; i++) \
+ { \
+ assert (a1[i] == n-i); \
+ assert (a2[i] == -(n-i)); \
+ } \
+ } \
+
+gentest (test1, "acc parallel loop gang vector_length (128) firstprivate (t1, t2)",
+ "acc loop vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test2, "acc parallel loop gang vector_length (128) firstprivate (t1, t2)",
+ "acc loop worker vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test3, "acc parallel loop gang worker vector_length (128) firstprivate (t1, t2)",
+ "acc loop vector reduction(+:t1) reduction(-:t2)")
+
+gentest (test4, "acc parallel loop firstprivate (t1, t2)",
+ "acc loop reduction(+:t1) reduction(-:t2)")
+
+
+int
+main ()
+{
+ test1 ();
+ test2 ();
+ test3 ();
+ test4 ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90
new file mode 100644
index 00000000000..de78148c7b3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm.f90
@@ -0,0 +1,79 @@
+! Exercise three levels of parallelism using SGEMM from BLAS.
+
+! { dg-do run }
+
+! Explicitly set vector_length to 128 using a vector_length clause.
+subroutine openacc_sgemm_128 (m, n, k, alpha, a, b, beta, c)
+ integer :: m, n, k
+ real :: alpha, beta
+ real :: a(k,*), b(k,*), c(m,*)
+
+ integer :: i, j, l
+ real :: temp
+
+ !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) vector_length (128) firstprivate (temp)
+ do j = 1, n
+ !$acc loop
+ do i = 1, m
+ temp = 0.0
+ !$acc loop reduction(+:temp)
+ do l = 1, k
+ temp = temp + a(l,i)*b(l,j)
+ end do
+ if(beta == 0.0) then
+ c(i,j) = alpha*temp
+ else
+ c(i,j) = alpha*temp + beta*c(i,j)
+ end if
+ end do
+ end do
+end subroutine openacc_sgemm_128
+
+subroutine host_sgemm (m, n, k, alpha, a, b, beta, c)
+ integer :: m, n, k
+ real :: alpha, beta
+ real :: a(k,*), b(k,*), c(m,*)
+
+ integer :: i, j, l
+ real :: temp
+
+ do j = 1, n
+ do i = 1, m
+ temp = 0.0
+ do l = 1, k
+ temp = temp + a(l,i)*b(l,j)
+ end do
+ if(beta == 0.0) then
+ c(i,j) = alpha*temp
+ else
+ c(i,j) = alpha*temp + beta*c(i,j)
+ end if
+ end do
+ end do
+end subroutine host_sgemm
+
+program main
+ integer, parameter :: M = 100, N = 50, K = 2000
+ real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N)
+ real alpha, beta
+ integer i, j
+
+ a(:,:) = 1.0
+ b(:,:) = 0.25
+
+ c(:,:) = 0.0
+ d(:,:) = 0.0
+ e(:,:) = 0.0
+
+ alpha = 1.05
+ beta = 1.25
+
+ call openacc_sgemm_128 (M, N, K, alpha, a, b, beta, d)
+ call host_sgemm (M, N, K, alpha, a, b, beta, e)
+
+ do i = 1, m
+ do j = 1, n
+ if (d(i,j) /= e(i,j)) call abort
+ end do
+ end do
+end program main
--
2.16.4
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 2/9] [nvptx] Update insufficient launch message for variable vector_length
2019-01-12 22:21 [PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length Tom de Vries
` (5 preceding siblings ...)
2019-01-12 22:21 ` [PATCH 4/9] [nvptx] Enable large vectors -- reduction testcases Tom de Vries
@ 2019-01-12 22:21 ` Tom de Vries
2019-01-12 22:21 ` [PATCH 1/9] [nvptx] Enable large vectors Tom de Vries
2019-01-12 22:21 ` [PATCH 8/9] [nvptx] Enable setting vector length using -fopenacc-dim Tom de Vries
8 siblings, 0 replies; 14+ messages in thread
From: Tom de Vries @ 2019-01-12 22:21 UTC (permalink / raw)
To: gcc-patches; +Cc: Thomas Schwinge
Update message in nvptx libgomp plugin about insufficient resources to launch
kernel, to accommodate for the fact the vector_length can now be variable.
19-01-08 Tom de Vries <tdevries@suse.de>
* plugin/plugin-nvptx.c (nvptx_exec): Update insufficient hardware
resources diagnostic.
---
libgomp/plugin/plugin-nvptx.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index c80da64c422..8912660966a 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1297,14 +1297,16 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
if (dims[GOMP_DIM_WORKER] * dims[GOMP_DIM_VECTOR]
> targ_fn->max_threads_per_block)
{
- int suggest_workers
- = targ_fn->max_threads_per_block / dims[GOMP_DIM_VECTOR];
- GOMP_PLUGIN_fatal ("The Nvidia accelerator has insufficient resources to"
- " launch '%s' with num_workers = %d; recompile the"
- " program with 'num_workers = %d' on that offloaded"
- " region or '-fopenacc-dim=:%d'",
- targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
- suggest_workers, suggest_workers);
+ const char *msg
+ = ("The Nvidia accelerator has insufficient resources to launch '%s'"
+ " with num_workers = %d and vector_length = %d"
+ "; "
+ "recompile the program with 'num_workers = x and vector_length = y'"
+ " on that offloaded region or '-fopenacc-dim=:x:y' where"
+ " x * y <= %d"
+ ".\n");
+ GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
+ dims[GOMP_DIM_VECTOR], targ_fn->max_threads_per_block);
}
/* Check if the accelerator has sufficient barrier resources to
--
2.16.4
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 1/9] [nvptx] Enable large vectors
2019-01-12 22:21 [PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length Tom de Vries
` (6 preceding siblings ...)
2019-01-12 22:21 ` [PATCH 2/9] [nvptx] Update insufficient launch message for variable vector_length Tom de Vries
@ 2019-01-12 22:21 ` Tom de Vries
2021-06-08 9:10 ` Thomas Schwinge
2019-01-12 22:21 ` [PATCH 8/9] [nvptx] Enable setting vector length using -fopenacc-dim Tom de Vries
8 siblings, 1 reply; 14+ messages in thread
From: Tom de Vries @ 2019-01-12 22:21 UTC (permalink / raw)
To: gcc-patches; +Cc: Thomas Schwinge
Allow vector_length clauses to accept values larger than warp size. Note that
this does not enable setting vector_length to values larger than warp size using
-fopenacc-dim.
2018-12-17 Tom de Vries <tdevries@suse.de>
* config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Take larger vector
lengths into account.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c: Expect
vector length to be 128.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Expect vector
length 2097152 to be reduced to 1024 instead of 32.
---
gcc/config/nvptx/nvptx.c | 2 +-
libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c | 4 ++--
libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c | 5 ++---
3 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 1d9704543d9..8d2740cd50f 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -96,7 +96,7 @@
#define PTX_NUM_PER_WORKER_BARRIERS (PTX_CTA_NUM_BARRIERS - PTX_NUM_PER_CTA_BARRIERS)
#define PTX_DEFAULT_VECTOR_LENGTH PTX_WARP_SIZE
-#define PTX_MAX_VECTOR_LENGTH PTX_WARP_SIZE
+#define PTX_MAX_VECTOR_LENGTH PTX_CTA_SIZE
#define PTX_WORKER_LENGTH 32
#define PTX_DEFAULT_RUNTIME_DIM 0 /* Defer to runtime. */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index 4a9854662cc..d7cd0461b53 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -350,7 +350,7 @@ int main ()
int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
gangs_min = workers_min = vectors_min = INT_MAX;
gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \
+#pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(1024\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \
vector_length (VECTORS)
{
if (acc_on_device (acc_device_host))
@@ -361,7 +361,7 @@ int main ()
else if (acc_on_device (acc_device_nvidia))
{
/* The GCC nvptx back end enforces vector_length (32). */
- vectors_actual = 32;
+ vectors_actual = 1024;
}
else
__builtin_abort ();
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
index fab5b0d25d1..18d77cc5ecb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
@@ -33,7 +33,6 @@ main (void)
return 0;
}
-/* { dg-prune-output "using vector_length \\(32\\), ignoring 128" } */
-/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */
-/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */
--
2.16.4
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 1/9] [nvptx] Enable large vectors
2019-01-12 22:21 ` [PATCH 1/9] [nvptx] Enable large vectors Tom de Vries
@ 2021-06-08 9:10 ` Thomas Schwinge
0 siblings, 0 replies; 14+ messages in thread
From: Thomas Schwinge @ 2021-06-08 9:10 UTC (permalink / raw)
To: Tom de Vries, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1755 bytes --]
Hi!
On 2019-01-12T23:21:23+0100, Tom de Vries <tdevries@suse.de> wrote:
> Allow vector_length clauses to accept values larger than warp size.
> * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Expect vector
> length 2097152 to be reduced to 1024 instead of 32.
> --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
> @@ -350,7 +350,7 @@ int main ()
> int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
> gangs_min = workers_min = vectors_min = INT_MAX;
> gangs_max = workers_max = vectors_max = INT_MIN;
> -#pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \
> +#pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(1024\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \
> vector_length (VECTORS)
> {
> if (acc_on_device (acc_device_host))
> @@ -361,7 +361,7 @@ int main ()
> else if (acc_on_device (acc_device_nvidia))
> {
> /* The GCC nvptx back end enforces vector_length (32). */
> - vectors_actual = 32;
> + vectors_actual = 1024;
> }
> else
> __builtin_abort ();
As obvious, pushed "[nvptx] Update comment in
'libgomp.oacc-c-c++-common/parallel-dims.c'" to master branch in commit
e64d62c7008e6a4b0227fd25e071db8f0b3f1820, see attached.
Grüße
Thomas
-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-nvptx-Update-comment-in-libgomp.oacc-c-c-common-para.patch --]
[-- Type: text/x-diff, Size: 1324 bytes --]
From e64d62c7008e6a4b0227fd25e071db8f0b3f1820 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Sat, 5 Jun 2021 22:01:48 +0200
Subject: [PATCH] [nvptx] Update comment in
'libgomp.oacc-c-c++-common/parallel-dims.c'
Small fix-up for r267889 (commit 2b9d9e393766d2fa6e2dd5f361d0db14872cf261)
"[nvptx] Enable large vectors":
> * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Expect vector
> length 2097152 to be reduced to 1024 instead of 32.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
<acc_device_nvidia>: Update comment.
---
libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index ef4917aafff..ef3dfda5fa5 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -385,7 +385,7 @@ int main ()
}
else if (acc_on_device (acc_device_nvidia))
{
- /* The GCC nvptx back end enforces vector_length (32). */
+ /* The GCC nvptx back end reduces to vector_length (1024). */
vectors_actual = 1024;
}
else if (acc_on_device (acc_device_radeon))
--
2.30.2
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 8/9] [nvptx] Enable setting vector length using -fopenacc-dim
2019-01-12 22:21 [PATCH 0/9] [COVER-LETTER, nvptx] Add support for warp-multiple openacc vector length Tom de Vries
` (7 preceding siblings ...)
2019-01-12 22:21 ` [PATCH 1/9] [nvptx] Enable large vectors Tom de Vries
@ 2019-01-12 22:21 ` Tom de Vries
8 siblings, 0 replies; 14+ messages in thread
From: Tom de Vries @ 2019-01-12 22:21 UTC (permalink / raw)
To: gcc-patches; +Cc: Thomas Schwinge
Enable setting vector length using -fopenacc-dim, f.i. -fopenacc-dim=::128.
2019-01-12 Tom de Vries <tdevries@suse.de>
* config/nvptx/nvptx.c (nvptx_goacc_validate_dims_1): Alow setting
vector length using -fopenacc-dim.
* plugin/plugin-nvptx.c (nvptx_exec): Update error message.
---
gcc/config/nvptx/nvptx.c | 3 ++-
libgomp/plugin/plugin-nvptx.c | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 8d2740cd50f..03c0f82f4a2 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -5705,7 +5705,8 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level, unsigned used)
if (oacc_default_dims_p)
{
- dims[GOMP_DIM_VECTOR] = default_vector_length;
+ if (dims[GOMP_DIM_VECTOR] < 0)
+ dims[GOMP_DIM_VECTOR] = default_vector_length;
if (dims[GOMP_DIM_WORKER] < 0)
dims[GOMP_DIM_WORKER] = PTX_DEFAULT_RUNTIME_DIM;
if (dims[GOMP_DIM_GANG] < 0)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 8912660966a..dd2bcf3083f 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1321,7 +1321,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
" region or '-fopenacc-dim=:x:' where x <= 15"
"; "
"or, recompile the program with 'vector_length = 32' on that"
- " offloaded region"
+ " offloaded region or '-fopenacc-dim=::32'"
".\n");
GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER],
dims[GOMP_DIM_VECTOR]);
--
2.16.4
^ permalink raw reply [flat|nested] 14+ messages in thread