public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-12] Adjustments and additions to testcases
@ 2022-06-29 14:34 Kwok Yeung
0 siblings, 0 replies; only message in thread
From: Kwok Yeung @ 2022-06-29 14:34 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:59e263c424125d3f404fa6ab5cdf0fde048e0916
commit 59e263c424125d3f404fa6ab5cdf0fde048e0916
Author: Julian Brown <julian@codesourcery.com>
Date: Tue Feb 26 13:18:36 2019 -0800
Adjustments and additions to testcases
Some additions of redundant "present" clauses dropped.
2018-10-22 Cesar Philippidis <cesar@codesourcery.com>
gcc/testsuite/
* g++.dg/goacc/loop-1.c: New test.
* g++.dg/goacc/loop-2.c: New test.
* g++.dg/goacc/loop-3.c: New test.
2018-10-22 James Norris <jnorris@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
Tom de Vries <tom@codesourcery.com>
libgomp/
* testsuite/libgomp.oacc-fortran/data-3.f90: Update parallel
regions to denote variables copyied in via acc enter data as
present.
* testsuite/libgomp.oacc-c-c++-common/subr.h: Reimplement.
* testsuite/libgomp.oacc-c-c++-common/subr.ptx: Regenerated PTX.
* testsuite/libgomp.oacc-c-c++-common/timer.h: Removed.
* testsuite/libgomp.oacc-c-c++-common/lib-69.c: Change async checks.
* testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-72.c: Rework kernel i/f and
change async checks.
* testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-74.c: Rework kernel i/f and
timing checks.
* testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/lib-93.c: New test.
Diff:
---
gcc/testsuite/ChangeLog.omp | 6 +
gcc/testsuite/g++.dg/goacc/loop-1.c | 23 +++
gcc/testsuite/g++.dg/goacc/loop-2.c | 70 +++++++
gcc/testsuite/g++.dg/goacc/loop-3.c | 43 ++++
libgomp/ChangeLog.omp | 25 +++
.../testsuite/libgomp.oacc-c-c++-common/lib-69.c | 55 +----
.../testsuite/libgomp.oacc-c-c++-common/lib-70.c | 79 +++-----
.../testsuite/libgomp.oacc-c-c++-common/lib-72.c | 60 +-----
.../testsuite/libgomp.oacc-c-c++-common/lib-73.c | 64 +-----
.../testsuite/libgomp.oacc-c-c++-common/lib-74.c | 87 +++-----
.../testsuite/libgomp.oacc-c-c++-common/lib-75.c | 81 ++------
.../testsuite/libgomp.oacc-c-c++-common/lib-76.c | 80 ++------
.../testsuite/libgomp.oacc-c-c++-common/lib-78.c | 83 +++-----
.../testsuite/libgomp.oacc-c-c++-common/lib-79.c | 83 ++------
.../testsuite/libgomp.oacc-c-c++-common/lib-81.c | 102 ++++------
.../testsuite/libgomp.oacc-c-c++-common/lib-82.c | 43 +---
.../testsuite/libgomp.oacc-c-c++-common/lib-93.c | 19 ++
libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h | 45 +----
.../testsuite/libgomp.oacc-c-c++-common/subr.ptx | 222 ++++++++-------------
.../testsuite/libgomp.oacc-c-c++-common/timer.h | 103 ----------
libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 | 12 +-
21 files changed, 492 insertions(+), 893 deletions(-)
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index e22d9abc79b..74fd6f5464e 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,9 @@
+2018-10-22 Cesar Philippidis <cesar@codesourcery.com>
+
+ * g++.dg/goacc/loop-1.c: New test.
+ * g++.dg/goacc/loop-2.c: New test.
+ * g++.dg/goacc/loop-3.c: New test.
+
2018-12-13 Cesar Philippidis <cesar@codesourcery.com>
Nathan Sidwell <nathan@acm.org>
Julian Brown <julian@codesourcery.com>
diff --git a/gcc/testsuite/g++.dg/goacc/loop-1.c b/gcc/testsuite/g++.dg/goacc/loop-1.c
new file mode 100644
index 00000000000..51b20b0e2da
--- /dev/null
+++ b/gcc/testsuite/g++.dg/goacc/loop-1.c
@@ -0,0 +1,23 @@
+void
+f (int i, float j, int k)
+{
+#pragma acc parallel num_gangs (i) num_workers (i) vector_length (i)
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc parallel num_gangs (j) /* { dg-error "'num_gangs' expression must be integral" } */
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc parallel num_workers (j) /* { dg-error "'num_workers' expression must be integral" } */
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc parallel vector_length (j) /* { dg-error "'vector_length' expression must be integral" } */
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+}
diff --git a/gcc/testsuite/g++.dg/goacc/loop-2.c b/gcc/testsuite/g++.dg/goacc/loop-2.c
new file mode 100644
index 00000000000..ddfb4804353
--- /dev/null
+++ b/gcc/testsuite/g++.dg/goacc/loop-2.c
@@ -0,0 +1,70 @@
+void
+f (int i, int j, int k)
+{
+#pragma acc kernels
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc kernels
+#pragma acc loop gang (num: 10)
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc kernels
+#pragma acc loop gang (static: 10)
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc kernels
+#pragma acc loop gang (static: 5, num: 10)
+ for (i = 0; i < 20; ++i)
+ ;
+
+
+#pragma acc kernels
+#pragma acc loop gang (static: 5, num: 10, *) /* { dg-error "duplicate operand to clause" } */
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc kernels
+#pragma acc loop gang (static: 5, num: 10, static: *) /* { dg-error "duplicate 'num' argument" } */
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc kernels
+#pragma acc loop worker (static: 234) /* { dg-error "expected 'num' before" } */
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc kernels
+#pragma acc loop worker (num: 234)
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc kernels
+#pragma acc loop worker (num: 234, num: 12) /* { dg-error "duplicate operand to clause" } */
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc kernels
+#pragma acc loop vector /* { dg-error "gang, worker and vector must occur in this order in a loop nest" } */
+ for (i = 0; i < 20; ++i)
+#pragma acc loop worker
+ for (j = 0; j < 25; ++j)
+ ;
+
+#pragma acc kernels
+#pragma acc loop worker (length: 20) /* { dg-error "expected 'num' before 'length'" } */
+ for (i = 0; i < 20; ++i)
+#pragma acc loop vector (length: 10)
+ for (j = 0; j < 25; ++j)
+ ;
+
+#pragma acc kernels
+#pragma acc loop worker
+ for (i = 0; i < 20; ++i)
+#pragma acc loop vector
+ for (j = 0; j < 25; ++j)
+ ;
+}
diff --git a/gcc/testsuite/g++.dg/goacc/loop-3.c b/gcc/testsuite/g++.dg/goacc/loop-3.c
new file mode 100644
index 00000000000..c43b4f33e62
--- /dev/null
+++ b/gcc/testsuite/g++.dg/goacc/loop-3.c
@@ -0,0 +1,43 @@
+void
+f (int i, int j, int k)
+{
+#pragma acc kernels num_gangs (10) /* { dg-error "'num_gangs' is not valid" } */
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc kernels num_workers (10) /* { dg-error "'num_workers' is not valid" } */
+#pragma acc loop worker
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc kernels vector_length (10) /* { dg-error "'vector_length' is not valid" } */
+#pragma acc loop vector
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc parallel num_gangs (10) num_workers (20) vector_length (32)
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc parallel num_gangs (i) num_workers (j) vector_length (k)
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc parallel num_gangs (10, i) /* { dg-error "expected '\\)' before ',' token" } */
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc parallel num_workers (10, i) /* { dg-error "expected '\\)' before ',' token" } */
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+
+#pragma acc parallel vector_length (10, i) /* { dg-error "expected '\\)' before ',' token" } */
+#pragma acc loop gang
+ for (i = 0; i < 20; ++i)
+ ;
+}
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index a5561fffe4d..75345158736 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,28 @@
+2018-10-22 James Norris <jnorris@codesourcery.com>
+ Cesar Philippidis <cesar@codesourcery.com>
+ Tom de Vries <tom@codesourcery.com>
+
+ * testsuite/libgomp.oacc-fortran/data-3.f90: Update parallel
+ regions to denote variables copyied in via acc enter data as
+ present.
+ * testsuite/libgomp.oacc-c-c++-common/subr.h: Reimplement.
+ * testsuite/libgomp.oacc-c-c++-common/subr.ptx: Regenerated PTX.
+ * testsuite/libgomp.oacc-c-c++-common/timer.h: Removed.
+ * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Change async checks.
+ * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Rework kernel i/f and
+ change async checks.
+ * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Rework kernel i/f and
+ timing checks.
+ * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/lib-93.c: New test.
+
2018-12-13 Cesar Philippidis <cesar@codesourcery.com>
Nathan Sidwell <nathan@acm.org>
Julian Brown <julian@codesourcery.com>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c
index 00e0ca89f17..0c46f955f1e 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c
@@ -10,46 +10,14 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuModuleLoad (&module, "subr.ptx");
+ r = cuModuleLoad (&module, "./subr.ptx");
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuModuleLoad failed: %d\n", r);
@@ -63,20 +31,6 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
stream = (CUstream) acc_get_cuda_stream (0);
if (stream != NULL)
abort ();
@@ -91,7 +45,7 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -119,11 +73,6 @@ main (int argc, char **argv)
abort ();
}
- acc_unmap_data (a);
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c
index a2918c08a12..b28d1152cc3 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c
@@ -2,6 +2,7 @@
/* { dg-additional-options "-lcuda" } */
/* { dg-require-effective-target openacc_cuda } */
+#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -11,47 +12,17 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- const int N = 10;
+ const int N = 3;
int i;
CUstream streams[N];
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t diff;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -66,20 +37,6 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
for (i = 0; i < N; i++)
{
streams[i] = (CUstream) acc_get_cuda_stream (i);
@@ -97,9 +54,29 @@ main (int argc, char **argv)
abort ();
}
+ gettimeofday (&tv1, NULL);
+
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[0], NULL, 0);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
+ }
+
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxLaunch failed: %d\n", r);
+ abort ();
+ }
+
+ gettimeofday (&tv2, NULL);
+
+ diff = tv2.tv_sec - tv1.tv_sec;
+
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -113,7 +90,7 @@ main (int argc, char **argv)
}
}
- sleep ((int) (dtime / 1000.0f) + 1);
+ sleep ((diff + 1) * N);
for (i = 0; i < N; i++)
{
@@ -124,10 +101,6 @@ main (int argc, char **argv)
}
}
- acc_unmap_data (a);
-
- free (a);
- acc_free (d_a);
acc_shutdown (acc_device_nvidia);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c
index 99b62f1132a..025cd8a4816 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c
@@ -11,45 +11,13 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -64,20 +32,6 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
if (r != CUDA_SUCCESS)
{
@@ -88,7 +42,7 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -101,7 +55,12 @@ main (int argc, char **argv)
abort ();
}
- sleep ((int) (dtime / 1000.f) + 1);
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize () failed: %d\n", r);
+ abort ();
+ }
if (acc_async_test_all () != 1)
{
@@ -109,11 +68,6 @@ main (int argc, char **argv)
abort ();
}
- acc_unmap_data (a);
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c
index 5b4b3fdde31..21e0f8c4484 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c
@@ -2,6 +2,7 @@
/* { dg-additional-options "-lcuda" } */
/* { dg-require-effective-target openacc_cuda } */
+#include <sys/time.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
@@ -11,47 +12,15 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- const int N = 10;
+ const int N = 6;
int i;
CUstream streams[N];
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -66,20 +35,6 @@ main (int argc, char **argv)
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
for (i = 0; i < N; i++)
{
streams[i] = (CUstream) acc_get_cuda_stream (i);
@@ -99,13 +54,12 @@ main (int argc, char **argv)
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
-
}
if (acc_async_test_all () != 0)
@@ -114,7 +68,12 @@ main (int argc, char **argv)
abort ();
}
- sleep ((int) (dtime / 1000.0f) + 1);
+ r = cuCtxSynchronize ();
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
+ }
if (acc_async_test_all () != 1)
{
@@ -122,11 +81,6 @@ main (int argc, char **argv)
abort ();
}
- acc_unmap_data (a);
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c
index 939f255f26c..13953df65b9 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c
@@ -6,77 +6,53 @@
#include <stdlib.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
+ r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
abort ();
}
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
+ r = cuModuleGetFunction (&delay, module, "delay");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
abort ();
}
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
+ gettimeofday (&tv1, NULL);
- r = cuModuleLoad (&module, "subr.ptx");
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
- r = cuModuleGetFunction (&delay, module, "delay");
+ r = cuCtxSynchronize ();
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
+ gettimeofday (&tv2, NULL);
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
stream = (CUstream) acc_get_cuda_stream (0);
if (stream != NULL)
@@ -92,11 +68,9 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- init_timers (1);
+ gettimeofday (&tv1, NULL);
- start_timer (0);
-
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -104,38 +78,31 @@ main (int argc, char **argv)
}
acc_wait (0);
- /* Test unseen async-argument. */
- acc_wait (1);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
+
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- if (atime < dtime)
+ if (((abs (t2 - t1) / t1) * 100.0) > 1.0)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long 1\n");
abort ();
}
- start_timer (0);
+ gettimeofday (&tv1, NULL);
acc_wait (0);
- /* Test unseen async-argument. */
- acc_wait (1);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
+
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- if (0.010 < atime)
+ if (t2 > 1000)
{
- fprintf (stderr, "actual time too long\n");
+ fprintf (stderr, "too long 2\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c
index 804ee3938ac..96c36758a3d 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c
@@ -7,78 +7,55 @@
#include <stdlib.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- int N;
+ const int N = 2;
int i;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime, hitime, lotime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
+ r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
abort ();
}
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
+ r = cuModuleGetFunction (&delay, module, "delay");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
abort ();
}
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
+ gettimeofday (&tv1, NULL);
- r = cuModuleLoad (&module, "subr.ptx");
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
- r = cuModuleGetFunction (&delay, module, "delay");
+ r = cuCtxSynchronize ();
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- N = nprocs;
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
+ gettimeofday (&tv2, NULL);
- acc_map_data (a, d_a, nbytes);
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
stream = (CUstream) acc_get_cuda_stream (0);
if (stream != NULL)
@@ -94,16 +71,11 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- init_timers (1);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
- start_timer (0);
+ gettimeofday (&tv1, NULL);
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -113,27 +85,18 @@ main (int argc, char **argv)
acc_wait (0);
}
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- hitime = dtime * N;
- hitime += hitime * 0.02;
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- lotime = dtime * N;
- lotime -= lotime * 0.02;
+ t1 *= N;
- if (atime > hitime || atime < lotime)
+ if (((abs (t2 - t1) / t1) * 100.0) > 1.0)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c
index f9045266f1c..0ec97dd364b 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c
@@ -7,78 +7,55 @@
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- int N;
+ const int N = 2;
int i;
CUstream *streams;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime, hitime, lotime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
+ r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
abort ();
}
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
+ r = cuModuleGetFunction (&delay, module, "delay");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
abort ();
}
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
+ gettimeofday (&tv1, NULL);
- r = cuModuleLoad (&module, "subr.ptx");
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
- r = cuModuleGetFunction (&delay, module, "delay");
+ r = cuCtxSynchronize ();
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- N = nprocs;
+ gettimeofday (&tv2, NULL);
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
streams = (CUstream *) malloc (N * sizeof (void *));
@@ -99,16 +76,11 @@ main (int argc, char **argv)
abort ();
}
- init_timers (1);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
- start_timer (0);
+ gettimeofday (&tv1, NULL);
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -118,27 +90,19 @@ main (int argc, char **argv)
acc_wait (i);
}
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- hitime = dtime * N;
- hitime += hitime * 0.02;
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- lotime = dtime * N;
- lotime -= lotime * 0.02;
+ t1 *= N;
- if (atime > hitime || atime < lotime)
+ if (((abs (t2 - t1) / t1) * 100.0) > 1.0)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
free (streams);
- free (a);
- acc_free (d_a);
acc_shutdown (acc_device_nvidia);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c
index d8cba4db538..fb191c6625b 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c
@@ -7,77 +7,53 @@
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
+ r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
abort ();
}
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
+ r = cuModuleGetFunction (&delay, module, "delay");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
abort ();
}
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
+ gettimeofday (&tv1, NULL);
- r = cuModuleLoad (&module, "subr.ptx");
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
- r = cuModuleGetFunction (&delay, module, "delay");
+ r = cuCtxSynchronize ();
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
+ gettimeofday (&tv2, NULL);
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
stream = (CUstream) acc_get_cuda_stream (0);
if (stream != NULL)
@@ -93,11 +69,9 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- init_timers (1);
+ gettimeofday (&tv1, NULL);
- start_timer (0);
-
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -106,33 +80,30 @@ main (int argc, char **argv)
acc_wait_all ();
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
+
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- if (atime < dtime)
+ if (t2 > (t1 + (t1 * 0.10)))
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long 1\n");
abort ();
}
- start_timer (0);
+ gettimeofday (&tv1, NULL);
acc_wait_all ();
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
+
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- if (0.010 < atime)
+ if (t2 > 1000)
{
- fprintf (stderr, "actual time too long\n");
+ fprintf (stderr, "too long 2\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c
index b805d5f9b81..af8aa119f74 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c
@@ -7,80 +7,55 @@
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- int N;
+ const int N = 2;
int i;
CUstream stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime, hitime, lotime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
-
- devnum = 2;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
+ r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
abort ();
}
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
+ r = cuModuleGetFunction (&delay, module, "delay");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
abort ();
}
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
+ gettimeofday (&tv1, NULL);
- r = cuModuleLoad (&module, "subr.ptx");
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleLoad failed: %d\n", r);
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
- r = cuModuleGetFunction (&delay, module, "delay");
+ r = cuCtxSynchronize ();
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 200.0;
-
- dticks = (unsigned long) (dtime * clkrate);
-
- N = nprocs;
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
+ gettimeofday (&tv2, NULL);
- acc_map_data (a, d_a, nbytes);
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
if (r != CUDA_SUCCESS)
@@ -106,16 +81,11 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (0, stream))
abort ();
- init_timers (1);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
- start_timer (0);
+ gettimeofday (&tv1, NULL);
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -157,7 +127,7 @@ main (int argc, char **argv)
acc_wait (1);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
if (acc_async_test (0) != 1)
abort ();
@@ -165,25 +135,16 @@ main (int argc, char **argv)
if (acc_async_test (1) != 1)
abort ();
- hitime = dtime * N;
- hitime += hitime * 0.02;
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- lotime = dtime * N;
- lotime -= lotime * 0.02;
+ t1 *= N;
- if (atime > hitime || atime < lotime)
+ if (((abs (t2 - t1) / t1) * 100.0) > 1.0)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
- free (a);
- acc_free (d_a);
-
acc_shutdown (acc_device_nvidia);
exit (0);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c
index 958672c45e5..902d2574b45 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c
@@ -7,78 +7,55 @@
#include <unistd.h>
#include <openacc.h>
#include <cuda.h>
-#include "timer.h"
+#include <sys/time.h>
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay;
CUmodule module;
CUresult r;
- int N;
+ const int N = 2;
int i;
CUstream *streams, stream;
- unsigned long *a, *d_a, dticks;
- int nbytes;
- float atime, dtime;
- void *kargs[2];
- int clkrate;
- int devnum, nprocs;
+ struct timeval tv1, tv2;
+ time_t t1, t2;
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
+ r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
+ fprintf (stderr, "cuModuleLoad failed: %d\n", r);
abort ();
}
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
+ r = cuModuleGetFunction (&delay, module, "delay");
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
+ fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
abort ();
}
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
+ gettimeofday (&tv1, NULL);
- r = cuModuleLoad (&module, "subr.ptx");
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0);
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleLoad failed: %d\n", r);
- abort ();
+ fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
+ abort ();
}
- r = cuModuleGetFunction (&delay, module, "delay");
+ r = cuCtxSynchronize ();
if (r != CUDA_SUCCESS)
{
- fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
- abort ();
+ fprintf (stderr, "cuCtxSynchronize failed: %d\n", r);
+ abort ();
}
- nbytes = nprocs * sizeof (unsigned long);
-
- dtime = 500.0;
-
- dticks = (unsigned long) (dtime * clkrate);
+ gettimeofday (&tv2, NULL);
- N = nprocs;
-
- a = (unsigned long *) malloc (nbytes);
- d_a = (unsigned long *) acc_malloc (nbytes);
-
- acc_map_data (a, d_a, nbytes);
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
streams = (CUstream *) malloc (N * sizeof (void *));
@@ -99,11 +76,6 @@ main (int argc, char **argv)
abort ();
}
- init_timers (1);
-
- kargs[0] = (void *) &d_a;
- kargs[1] = (void *) &dticks;
-
stream = (CUstream) acc_get_cuda_stream (N);
if (stream != NULL)
abort ();
@@ -118,11 +90,11 @@ main (int argc, char **argv)
if (!acc_set_cuda_stream (N, stream))
abort ();
- start_timer (0);
+ gettimeofday (&tv1, NULL);
for (i = 0; i < N; i++)
{
- r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
+ r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
@@ -130,6 +102,10 @@ main (int argc, char **argv)
}
}
+ gettimeofday (&tv2, NULL);
+
+ t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
acc_wait_all_async (N);
for (i = 0; i <= N; i++)
@@ -146,15 +122,13 @@ main (int argc, char **argv)
abort ();
}
- atime = stop_timer (0);
-
- if (atime < dtime)
+ if ((t1 * N) < t2)
{
- fprintf (stderr, "actual time < delay time\n");
+ fprintf (stderr, "too long 1\n");
abort ();
}
- start_timer (0);
+ gettimeofday (&tv1, NULL);
stream = (CUstream) acc_get_cuda_stream (N + 1);
if (stream != NULL)
@@ -174,35 +148,33 @@ main (int argc, char **argv)
acc_wait (N + 1);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
+
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
- if (0.10 < atime)
+ if (t1 > 1000)
{
- fprintf (stderr, "actual time too long\n");
+ fprintf (stderr, "too long 2\n");
abort ();
}
- start_timer (0);
+ gettimeofday (&tv1, NULL);
acc_wait_all_async (N);
acc_wait (N);
- atime = stop_timer (0);
+ gettimeofday (&tv2, NULL);
- if (0.10 < atime)
+ t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec);
+
+ if (t1 > 1000)
{
- fprintf (stderr, "actual time too long\n");
+ fprintf (stderr, "too long 3\n");
abort ();
}
- acc_unmap_data (a);
-
- fini_timers ();
-
free (streams);
- free (a);
- acc_free (d_a);
acc_shutdown (acc_device_nvidia);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c
index a36f8e69a18..054ffbf18ae 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c
@@ -11,46 +11,18 @@
int
main (int argc, char **argv)
{
- CUdevice dev;
CUfunction delay2;
CUmodule module;
CUresult r;
- int N;
+ const int N = 32;
int i;
CUstream *streams;
- unsigned long **a, **d_a, *tid, ticks;
+ unsigned long **a, **d_a, *tid;
int nbytes;
- void *kargs[3];
- int clkrate;
- int devnum, nprocs;
+ void *kargs[2];
acc_init (acc_device_nvidia);
- devnum = acc_get_device_num (acc_device_nvidia);
-
- r = cuDeviceGet (&dev, devnum);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGet failed: %d\n", r);
- abort ();
- }
-
- r =
- cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
- dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
- r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
- abort ();
- }
-
r = cuModuleLoad (&module, "subr.ptx");
if (r != CUDA_SUCCESS)
{
@@ -67,10 +39,6 @@ main (int argc, char **argv)
nbytes = sizeof (int);
- ticks = (unsigned long) (200.0 * clkrate);
-
- N = nprocs;
-
streams = (CUstream *) malloc (N * sizeof (void *));
a = (unsigned long **) malloc (N * sizeof (unsigned long *));
@@ -104,8 +72,7 @@ main (int argc, char **argv)
for (i = 0; i < N; i++)
{
kargs[0] = (void *) &d_a[i];
- kargs[1] = (void *) &ticks;
- kargs[2] = (void *) &tid[i];
+ kargs[1] = (void *) &tid[i];
r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
if (r != CUDA_SUCCESS)
@@ -113,8 +80,6 @@ main (int argc, char **argv)
fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
abort ();
}
-
- ticks = (unsigned long) (50.0 * clkrate);
}
acc_wait_all_async (0);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-93.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-93.c
new file mode 100644
index 00000000000..bc60a16c64f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-93.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { ! openacc_nvidia_accel_configured } } } */
+
+#include <stdio.h>
+#include <openacc.h>
+
+int
+main (void)
+{
+ fprintf (stderr, "CheCKpOInT\n");
+ acc_init (acc_device_nvidia);
+
+ acc_shutdown (acc_device_nvidia);
+
+ return 0;
+}
+
+/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */
+/* { dg-output "device type nvidia not supported" } */
+/* { dg-shouldfail "" } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h
index 9db236c8362..a99c08ddd18 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h
@@ -1,46 +1,23 @@
-
-#if ACC_DEVICE_TYPE_nvidia
-
#pragma acc routine nohost
-static int clock (void)
-{
- int thetime;
-
- asm __volatile__ ("mov.u32 %0, %%clock;" : "=r"(thetime));
-
- return thetime;
-}
-
-#endif
-
void
-delay (unsigned long *d_o, unsigned long delay)
+delay ()
{
- int start, ticks;
+ int i, sum;
+ const int N = 500000;
- start = clock ();
-
- ticks = 0;
-
- while (ticks < delay)
- ticks = clock () - start;
-
- return;
+ for (i = 0; i < N; i++)
+ sum = sum + 1;
}
+#pragma acc routine nohost
void
-delay2 (unsigned long *d_o, unsigned long delay, unsigned long tid)
+delay2 (unsigned long *d_o, unsigned long tid)
{
- int start, ticks;
+ int i, sum;
+ const int N = 500000;
- start = clock ();
-
- ticks = 0;
-
- while (ticks < delay)
- ticks = clock () - start;
+ for (i = 0; i < N; i++)
+ sum = sum + 1;
d_o[0] = tid;
-
- return;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx
index 6f748fcaf9e..88b63bfb74b 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx
@@ -1,148 +1,90 @@
-// BEGIN PREAMBLE
- .version 3.1
- .target sm_30
+ .version 3.1
+ .target sm_30
.address_size 64
-// END PREAMBLE
-// BEGIN FUNCTION DEF: clock
-.func (.param.u32 %out_retval)clock
-{
-.reg.u32 %retval;
- .reg.u64 %hr10;
- .reg.u32 %r22;
- .reg.u32 %r23;
- .reg.u32 %r24;
- .local.align 8 .b8 %frame[8];
- // #APP
-// 7 "subr.c" 1
- mov.u32 %r24, %clock;
-// 0 "" 2
- // #NO_APP
- st.local.u32 [%frame], %r24;
- ld.local.u32 %r22, [%frame];
- mov.u32 %r23, %r22;
- mov.u32 %retval, %r23;
- st.param.u32 [%out_retval], %retval;
- ret;
- }
-// END FUNCTION DEF
-// BEGIN GLOBAL FUNCTION DEF: delay
-.visible .entry delay(.param.u64 %in_ar1, .param.u64 %in_ar2)
-{
- .reg.u64 %ar1;
- .reg.u64 %ar2;
- .reg.u64 %hr10;
- .reg.u64 %r22;
- .reg.u32 %r23;
- .reg.u64 %r24;
- .reg.u64 %r25;
- .reg.u32 %r26;
- .reg.u32 %r27;
- .reg.u32 %r28;
- .reg.u32 %r29;
- .reg.u32 %r30;
- .reg.u64 %r31;
- .reg.pred %r32;
- .local.align 8 .b8 %frame[24];
- ld.param.u64 %ar1, [%in_ar1];
- ld.param.u64 %ar2, [%in_ar2];
- mov.u64 %r24, %ar1;
- st.u64 [%frame+8], %r24;
- mov.u64 %r25, %ar2;
- st.local.u64 [%frame+16], %r25;
+ .visible .entry delay
{
- .param.u32 %retval_in;
- {
- call (%retval_in), clock;
- }
- ld.param.u32 %r26, [%retval_in];
-}
- st.local.u32 [%frame+4], %r26;
- mov.u32 %r27, 0;
- st.local.u32 [%frame], %r27;
- bra $L4;
-$L5:
- {
- .param.u32 %retval_in;
- {
- call (%retval_in), clock;
- }
- ld.param.u32 %r28, [%retval_in];
-}
- mov.u32 %r23, %r28;
- ld.local.u32 %r30, [%frame+4];
- sub.u32 %r29, %r23, %r30;
- st.local.u32 [%frame], %r29;
-$L4:
- ld.local.s32 %r22, [%frame];
- ld.local.u64 %r31, [%frame+16];
- setp.lo.u64 %r32,%r22,%r31;
- @%r32 bra $L5;
+ .reg .u64 %hr10;
+ .reg .u32 %r22;
+ .reg .u32 %r23;
+ .reg .u32 %r24;
+ .reg .u32 %r25;
+ .reg .u32 %r26;
+ .reg .u32 %r27;
+ .reg .u32 %r28;
+ .reg .u32 %r29;
+ .reg .pred %r30;
+ .reg .u64 %frame;
+ .local .align 8 .b8 %farray[16];
+ cvta.local.u64 %frame,%farray;
+ mov.u32 %r22,500000;
+ st.u32 [%frame+8],%r22;
+ mov.u32 %r23,0;
+ st.u32 [%frame],%r23;
+ bra $L2;
+ $L3:
+ ld.u32 %r25,[%frame+4];
+ add.u32 %r24,%r25,1;
+ st.u32 [%frame+4],%r24;
+ ld.u32 %r27,[%frame];
+ add.u32 %r26,%r27,1;
+ st.u32 [%frame],%r26;
+ $L2:
+ ld.u32 %r28,[%frame];
+ ld.u32 %r29,[%frame+8];
+ setp.lt.s32 %r30,%r28,%r29;
+ @%r30
+ bra $L3;
ret;
}
-// END FUNCTION DEF
-// BEGIN GLOBAL FUNCTION DEF: delay2
-.visible .entry delay2(.param.u64 %in_ar1, .param.u64 %in_ar2, .param.u64 %in_ar3)
-{
- .reg.u64 %ar1;
- .reg.u64 %ar2;
- .reg.u64 %ar3;
- .reg.u64 %hr10;
- .reg.u64 %r22;
- .reg.u32 %r23;
- .reg.u64 %r24;
- .reg.u64 %r25;
- .reg.u64 %r26;
- .reg.u32 %r27;
- .reg.u32 %r28;
- .reg.u32 %r29;
- .reg.u32 %r30;
- .reg.u32 %r31;
- .reg.u64 %r32;
- .reg.pred %r33;
- .reg.u64 %r34;
- .reg.u64 %r35;
- .local.align 8 .b8 %frame[32];
- ld.param.u64 %ar1, [%in_ar1];
- ld.param.u64 %ar2, [%in_ar2];
- ld.param.u64 %ar3, [%in_ar3];
- mov.u64 %r24, %ar1;
- st.local.u64 [%frame+8], %r24;
- mov.u64 %r25, %ar2;
- st.local.u64 [%frame+16], %r25;
- mov.u64 %r26, %ar3;
- st.local.u64 [%frame+24], %r26;
- {
- .param.u32 %retval_in;
- {
- call (%retval_in), clock;
- }
- ld.param.u32 %r27, [%retval_in];
-}
- st.local.u32 [%frame+4], %r27;
- mov.u32 %r28, 0;
- st.local.u32 [%frame], %r28;
- bra $L8;
-$L9:
- {
- .param.u32 %retval_in;
+
+ .visible .entry delay2 (.param .u64 %in_ar1, .param .u64 %in_ar2)
{
- call (%retval_in), clock;
- }
- ld.param.u32 %r29, [%retval_in];
-}
- mov.u32 %r23, %r29;
- ld.local.u32 %r31, [%frame+4];
- sub.u32 %r30, %r23, %r31;
- st.local.u32 [%frame], %r30;
-$L8:
- ld.local.s32 %r22, [%frame];
- ld.local.u64 %r32, [%frame+16];
- setp.lo.u64 %r33,%r22,%r32;
- @%r33 bra $L9;
- ld.local.u64 %r34, [%frame+8];
- ld.local.u64 %r35, [%frame+24];
- st.u64 [%r34], %r35;
+ .reg .u64 %ar1;
+ .reg .u64 %ar2;
+ .reg .u64 %hr10;
+ .reg .u64 %r22;
+ .reg .u64 %r23;
+ .reg .u32 %r24;
+ .reg .u32 %r25;
+ .reg .u32 %r26;
+ .reg .u32 %r27;
+ .reg .u32 %r28;
+ .reg .u32 %r29;
+ .reg .u32 %r30;
+ .reg .u32 %r31;
+ .reg .pred %r32;
+ .reg .u64 %r33;
+ .reg .u64 %r34;
+ .reg .u64 %frame;
+ .local .align 8 .b8 %farray[32];
+ cvta.local.u64 %frame,%farray;
+ ld.param.u64 %ar1,[%in_ar1];
+ ld.param.u64 %ar2,[%in_ar2];
+ mov.u64 %r22,%ar1;
+ st.u64 [%frame+16],%r22;
+ mov.u64 %r23,%ar2;
+ st.u64 [%frame+24],%r23;
+ mov.u32 %r24,500000;
+ st.u32 [%frame+8],%r24;
+ mov.u32 %r25,0;
+ st.u32 [%frame],%r25;
+ bra $L5;
+ $L6:
+ ld.u32 %r27,[%frame+4];
+ add.u32 %r26,%r27,1;
+ st.u32 [%frame+4],%r26;
+ ld.u32 %r29,[%frame];
+ add.u32 %r28,%r29,1;
+ st.u32 [%frame],%r28;
+ $L5:
+ ld.u32 %r30,[%frame];
+ ld.u32 %r31,[%frame+8];
+ setp.lt.s32 %r32,%r30,%r31;
+ @%r32
+ bra $L6;
+ ld.u64 %r33,[%frame+16];
+ ld.u64 %r34,[%frame+24];
+ st.u64 [%r33],%r34;
ret;
}
-// END FUNCTION DEF
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h
index 53749da5a0d..e69de29bb2d 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h
@@ -1,103 +0,0 @@
-
-#include <stdio.h>
-#include <cuda.h>
-
-static int _Tnum_timers;
-static CUevent *_Tstart_events, *_Tstop_events;
-static CUstream _Tstream;
-
-void
-init_timers (int ntimers)
-{
- int i;
- CUresult r;
-
- _Tnum_timers = ntimers;
-
- _Tstart_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent));
- _Tstop_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent));
-
- r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuStreamCreate failed: %d\n", r);
- abort ();
- }
-
- for (i = 0; i < _Tnum_timers; i++)
- {
- r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventCreate failed: %d\n", r);
- abort ();
- }
-
- r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventCreate failed: %d\n", r);
- abort ();
- }
- }
-}
-
-void
-fini_timers (void)
-{
- int i;
-
- for (i = 0; i < _Tnum_timers; i++)
- {
- cuEventDestroy (_Tstart_events[i]);
- cuEventDestroy (_Tstop_events[i]);
- }
-
- cuStreamDestroy (_Tstream);
-
- free (_Tstart_events);
- free (_Tstop_events);
-}
-
-void
-start_timer (int timer)
-{
- CUresult r;
-
- r = cuEventRecord (_Tstart_events[timer], _Tstream);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventRecord failed: %d\n", r);
- abort ();
- }
-}
-
-float
-stop_timer (int timer)
-{
- CUresult r;
- float etime;
-
- r = cuEventRecord (_Tstop_events[timer], _Tstream);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventRecord failed: %d\n", r);
- abort ();
- }
-
- r = cuEventSynchronize (_Tstop_events[timer]);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventSynchronize failed: %d\n", r);
- abort ();
- }
-
- r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]);
- if (r != CUDA_SUCCESS)
- {
- fprintf (stderr, "cuEventElapsedTime failed: %d\n", r);
- abort ();
- }
-
- return etime;
-}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90
index 19eb4bd6a45..b5586be0609 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90
@@ -55,7 +55,8 @@ program asyncwait
c(:) = 0.0
d(:) = 0.0
- !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N))
+ !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) &
+ !$acc& create (d(1:N))
!$acc parallel async (1)
do i = 1, N
@@ -76,7 +77,8 @@ program asyncwait
!$acc end parallel
!$acc wait (1)
- !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N))
+ !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) &
+ !$acc& copyout (d(1:N))
do i = 1, N
if (a(i) .ne. 3.0) STOP 5
@@ -91,7 +93,8 @@ program asyncwait
d(:) = 0.0
e(:) = 0.0
- !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N)) copyin (e(1:N))
+ !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) &
+ !$acc& create (d(1:N)) copyin (e(1:N))
!$acc parallel async (1)
do i = 1, N
@@ -118,7 +121,8 @@ program asyncwait
!$acc end parallel
!$acc wait (1)
- !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N)) copyout (e(1:N))
+ !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) &
+ !$acc& copyout (d(1:N)) copyout (e(1:N))
!$acc exit data delete (N)
do i = 1, N
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-06-29 14:34 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-29 14:34 [gcc/devel/omp/gcc-12] Adjustments and additions to testcases Kwok Yeung
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).