From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1729) id B71DE3843889; Wed, 29 Jun 2022 14:34:15 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org B71DE3843889 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Kwok Yeung To: gcc-cvs@gcc.gnu.org Subject: [gcc/devel/omp/gcc-12] Adjustments and additions to testcases X-Act-Checkin: gcc X-Git-Author: Julian Brown X-Git-Refname: refs/heads/devel/omp/gcc-12 X-Git-Oldrev: d27d6c9e1e3bc18ba0113757b743b306ea69f825 X-Git-Newrev: 59e263c424125d3f404fa6ab5cdf0fde048e0916 Message-Id: <20220629143415.B71DE3843889@sourceware.org> Date: Wed, 29 Jun 2022 14:34:15 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 29 Jun 2022 14:34:15 -0000 https://gcc.gnu.org/g:59e263c424125d3f404fa6ab5cdf0fde048e0916 commit 59e263c424125d3f404fa6ab5cdf0fde048e0916 Author: Julian Brown Date: Tue Feb 26 13:18:36 2019 -0800 Adjustments and additions to testcases Some additions of redundant "present" clauses dropped. 2018-10-22 Cesar Philippidis gcc/testsuite/ * g++.dg/goacc/loop-1.c: New test. * g++.dg/goacc/loop-2.c: New test. * g++.dg/goacc/loop-3.c: New test. 2018-10-22 James Norris Cesar Philippidis Tom de Vries libgomp/ * testsuite/libgomp.oacc-fortran/data-3.f90: Update parallel regions to denote variables copyied in via acc enter data as present. * testsuite/libgomp.oacc-c-c++-common/subr.h: Reimplement. * testsuite/libgomp.oacc-c-c++-common/subr.ptx: Regenerated PTX. * testsuite/libgomp.oacc-c-c++-common/timer.h: Removed. * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Change async checks. * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Rework kernel i/f and change async checks. * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Rework kernel i/f and timing checks. * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/lib-93.c: New test. Diff: --- gcc/testsuite/ChangeLog.omp | 6 + gcc/testsuite/g++.dg/goacc/loop-1.c | 23 +++ gcc/testsuite/g++.dg/goacc/loop-2.c | 70 +++++++ gcc/testsuite/g++.dg/goacc/loop-3.c | 43 ++++ libgomp/ChangeLog.omp | 25 +++ .../testsuite/libgomp.oacc-c-c++-common/lib-69.c | 55 +---- .../testsuite/libgomp.oacc-c-c++-common/lib-70.c | 79 +++----- .../testsuite/libgomp.oacc-c-c++-common/lib-72.c | 60 +----- .../testsuite/libgomp.oacc-c-c++-common/lib-73.c | 64 +----- .../testsuite/libgomp.oacc-c-c++-common/lib-74.c | 87 +++----- .../testsuite/libgomp.oacc-c-c++-common/lib-75.c | 81 ++------ .../testsuite/libgomp.oacc-c-c++-common/lib-76.c | 80 ++------ .../testsuite/libgomp.oacc-c-c++-common/lib-78.c | 83 +++----- .../testsuite/libgomp.oacc-c-c++-common/lib-79.c | 83 ++------ .../testsuite/libgomp.oacc-c-c++-common/lib-81.c | 102 ++++------ .../testsuite/libgomp.oacc-c-c++-common/lib-82.c | 43 +--- .../testsuite/libgomp.oacc-c-c++-common/lib-93.c | 19 ++ libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h | 45 +---- .../testsuite/libgomp.oacc-c-c++-common/subr.ptx | 222 ++++++++------------- .../testsuite/libgomp.oacc-c-c++-common/timer.h | 103 ---------- libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 | 12 +- 21 files changed, 492 insertions(+), 893 deletions(-) diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp index e22d9abc79b..74fd6f5464e 100644 --- a/gcc/testsuite/ChangeLog.omp +++ b/gcc/testsuite/ChangeLog.omp @@ -1,3 +1,9 @@ +2018-10-22 Cesar Philippidis + + * g++.dg/goacc/loop-1.c: New test. + * g++.dg/goacc/loop-2.c: New test. + * g++.dg/goacc/loop-3.c: New test. + 2018-12-13 Cesar Philippidis Nathan Sidwell Julian Brown diff --git a/gcc/testsuite/g++.dg/goacc/loop-1.c b/gcc/testsuite/g++.dg/goacc/loop-1.c new file mode 100644 index 00000000000..51b20b0e2da --- /dev/null +++ b/gcc/testsuite/g++.dg/goacc/loop-1.c @@ -0,0 +1,23 @@ +void +f (int i, float j, int k) +{ +#pragma acc parallel num_gangs (i) num_workers (i) vector_length (i) +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_gangs (j) /* { dg-error "'num_gangs' expression must be integral" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_workers (j) /* { dg-error "'num_workers' expression must be integral" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel vector_length (j) /* { dg-error "'vector_length' expression must be integral" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; +} diff --git a/gcc/testsuite/g++.dg/goacc/loop-2.c b/gcc/testsuite/g++.dg/goacc/loop-2.c new file mode 100644 index 00000000000..ddfb4804353 --- /dev/null +++ b/gcc/testsuite/g++.dg/goacc/loop-2.c @@ -0,0 +1,70 @@ +void +f (int i, int j, int k) +{ +#pragma acc kernels +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop gang (num: 10) + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop gang (static: 10) + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop gang (static: 5, num: 10) + for (i = 0; i < 20; ++i) + ; + + +#pragma acc kernels +#pragma acc loop gang (static: 5, num: 10, *) /* { dg-error "duplicate operand to clause" } */ + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop gang (static: 5, num: 10, static: *) /* { dg-error "duplicate 'num' argument" } */ + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop worker (static: 234) /* { dg-error "expected 'num' before" } */ + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop worker (num: 234) + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop worker (num: 234, num: 12) /* { dg-error "duplicate operand to clause" } */ + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels +#pragma acc loop vector /* { dg-error "gang, worker and vector must occur in this order in a loop nest" } */ + for (i = 0; i < 20; ++i) +#pragma acc loop worker + for (j = 0; j < 25; ++j) + ; + +#pragma acc kernels +#pragma acc loop worker (length: 20) /* { dg-error "expected 'num' before 'length'" } */ + for (i = 0; i < 20; ++i) +#pragma acc loop vector (length: 10) + for (j = 0; j < 25; ++j) + ; + +#pragma acc kernels +#pragma acc loop worker + for (i = 0; i < 20; ++i) +#pragma acc loop vector + for (j = 0; j < 25; ++j) + ; +} diff --git a/gcc/testsuite/g++.dg/goacc/loop-3.c b/gcc/testsuite/g++.dg/goacc/loop-3.c new file mode 100644 index 00000000000..c43b4f33e62 --- /dev/null +++ b/gcc/testsuite/g++.dg/goacc/loop-3.c @@ -0,0 +1,43 @@ +void +f (int i, int j, int k) +{ +#pragma acc kernels num_gangs (10) /* { dg-error "'num_gangs' is not valid" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels num_workers (10) /* { dg-error "'num_workers' is not valid" } */ +#pragma acc loop worker + for (i = 0; i < 20; ++i) + ; + +#pragma acc kernels vector_length (10) /* { dg-error "'vector_length' is not valid" } */ +#pragma acc loop vector + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_gangs (10) num_workers (20) vector_length (32) +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_gangs (i) num_workers (j) vector_length (k) +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_gangs (10, i) /* { dg-error "expected '\\)' before ',' token" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel num_workers (10, i) /* { dg-error "expected '\\)' before ',' token" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; + +#pragma acc parallel vector_length (10, i) /* { dg-error "expected '\\)' before ',' token" } */ +#pragma acc loop gang + for (i = 0; i < 20; ++i) + ; +} diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index a5561fffe4d..75345158736 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,28 @@ +2018-10-22 James Norris + Cesar Philippidis + Tom de Vries + + * testsuite/libgomp.oacc-fortran/data-3.f90: Update parallel + regions to denote variables copyied in via acc enter data as + present. + * testsuite/libgomp.oacc-c-c++-common/subr.h: Reimplement. + * testsuite/libgomp.oacc-c-c++-common/subr.ptx: Regenerated PTX. + * testsuite/libgomp.oacc-c-c++-common/timer.h: Removed. + * testsuite/libgomp.oacc-c-c++-common/lib-69.c: Change async checks. + * testsuite/libgomp.oacc-c-c++-common/lib-70.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-72.c: Rework kernel i/f and + change async checks. + * testsuite/libgomp.oacc-c-c++-common/lib-73.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-74.c: Rework kernel i/f and + timing checks. + * testsuite/libgomp.oacc-c-c++-common/lib-75.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-76.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-78.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-79.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-81.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-82.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/lib-93.c: New test. + 2018-12-13 Cesar Philippidis Nathan Sidwell Julian Brown diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c index 00e0ca89f17..0c46f955f1e 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-69.c @@ -10,46 +10,14 @@ int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuModuleLoad (&module, "subr.ptx"); + r = cuModuleLoad (&module, "./subr.ptx"); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuModuleLoad failed: %d\n", r); @@ -63,20 +31,6 @@ main (int argc, char **argv) abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - stream = (CUstream) acc_get_cuda_stream (0); if (stream != NULL) abort (); @@ -91,7 +45,7 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -119,11 +73,6 @@ main (int argc, char **argv) abort (); } - acc_unmap_data (a); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c index a2918c08a12..b28d1152cc3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-70.c @@ -2,6 +2,7 @@ /* { dg-additional-options "-lcuda" } */ /* { dg-require-effective-target openacc_cuda } */ +#include #include #include #include @@ -11,47 +12,17 @@ int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - const int N = 10; + const int N = 3; int i; CUstream streams[N]; - unsigned long *a, *d_a, dticks; - int nbytes; - float dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t diff; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { @@ -66,20 +37,6 @@ main (int argc, char **argv) abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - for (i = 0; i < N; i++) { streams[i] = (CUstream) acc_get_cuda_stream (i); @@ -97,9 +54,29 @@ main (int argc, char **argv) abort (); } + gettimeofday (&tv1, NULL); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[0], NULL, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + r = cuCtxSynchronize (); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxLaunch failed: %d\n", r); + abort (); + } + + gettimeofday (&tv2, NULL); + + diff = tv2.tv_sec - tv1.tv_sec; + for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -113,7 +90,7 @@ main (int argc, char **argv) } } - sleep ((int) (dtime / 1000.0f) + 1); + sleep ((diff + 1) * N); for (i = 0; i < N; i++) { @@ -124,10 +101,6 @@ main (int argc, char **argv) } } - acc_unmap_data (a); - - free (a); - acc_free (d_a); acc_shutdown (acc_device_nvidia); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c index 99b62f1132a..025cd8a4816 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-72.c @@ -11,45 +11,13 @@ int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { @@ -64,20 +32,6 @@ main (int argc, char **argv) abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); if (r != CUDA_SUCCESS) { @@ -88,7 +42,7 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -101,7 +55,12 @@ main (int argc, char **argv) abort (); } - sleep ((int) (dtime / 1000.f) + 1); + r = cuCtxSynchronize (); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxSynchronize () failed: %d\n", r); + abort (); + } if (acc_async_test_all () != 1) { @@ -109,11 +68,6 @@ main (int argc, char **argv) abort (); } - acc_unmap_data (a); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c index 5b4b3fdde31..21e0f8c4484 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-73.c @@ -2,6 +2,7 @@ /* { dg-additional-options "-lcuda" } */ /* { dg-require-effective-target openacc_cuda } */ +#include #include #include #include @@ -11,47 +12,15 @@ int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - const int N = 10; + const int N = 6; int i; CUstream streams[N]; - unsigned long *a, *d_a, dticks; - int nbytes; - float dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { @@ -66,20 +35,6 @@ main (int argc, char **argv) abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - for (i = 0; i < N; i++) { streams[i] = (CUstream) acc_get_cuda_stream (i); @@ -99,13 +54,12 @@ main (int argc, char **argv) for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - } if (acc_async_test_all () != 0) @@ -114,7 +68,12 @@ main (int argc, char **argv) abort (); } - sleep ((int) (dtime / 1000.0f) + 1); + r = cuCtxSynchronize (); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); + abort (); + } if (acc_async_test_all () != 1) { @@ -122,11 +81,6 @@ main (int argc, char **argv) abort (); } - acc_unmap_data (a); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c index 939f255f26c..13953df65b9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-74.c @@ -6,77 +6,53 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); + gettimeofday (&tv2, NULL); - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); stream = (CUstream) acc_get_cuda_stream (0); if (stream != NULL) @@ -92,11 +68,9 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - init_timers (1); + gettimeofday (&tv1, NULL); - start_timer (0); - - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -104,38 +78,31 @@ main (int argc, char **argv) } acc_wait (0); - /* Test unseen async-argument. */ - acc_wait (1); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); + + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - if (atime < dtime) + if (((abs (t2 - t1) / t1) * 100.0) > 1.0) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long 1\n"); abort (); } - start_timer (0); + gettimeofday (&tv1, NULL); acc_wait (0); - /* Test unseen async-argument. */ - acc_wait (1); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); + + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - if (0.010 < atime) + if (t2 > 1000) { - fprintf (stderr, "actual time too long\n"); + fprintf (stderr, "too long 2\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c index 804ee3938ac..96c36758a3d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-75.c @@ -7,78 +7,55 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - int N; + const int N = 2; int i; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime, hitime, lotime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - N = nprocs; - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); + gettimeofday (&tv2, NULL); - acc_map_data (a, d_a, nbytes); + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); stream = (CUstream) acc_get_cuda_stream (0); if (stream != NULL) @@ -94,16 +71,11 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - init_timers (1); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - - start_timer (0); + gettimeofday (&tv1, NULL); for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -113,27 +85,18 @@ main (int argc, char **argv) acc_wait (0); } - atime = stop_timer (0); + gettimeofday (&tv2, NULL); - hitime = dtime * N; - hitime += hitime * 0.02; + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - lotime = dtime * N; - lotime -= lotime * 0.02; + t1 *= N; - if (atime > hitime || atime < lotime) + if (((abs (t2 - t1) / t1) * 100.0) > 1.0) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c index f9045266f1c..0ec97dd364b 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-76.c @@ -7,78 +7,55 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - int N; + const int N = 2; int i; CUstream *streams; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime, hitime, lotime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - N = nprocs; + gettimeofday (&tv2, NULL); - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); streams = (CUstream *) malloc (N * sizeof (void *)); @@ -99,16 +76,11 @@ main (int argc, char **argv) abort (); } - init_timers (1); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - - start_timer (0); + gettimeofday (&tv1, NULL); for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -118,27 +90,19 @@ main (int argc, char **argv) acc_wait (i); } - atime = stop_timer (0); + gettimeofday (&tv2, NULL); - hitime = dtime * N; - hitime += hitime * 0.02; + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - lotime = dtime * N; - lotime -= lotime * 0.02; + t1 *= N; - if (atime > hitime || atime < lotime) + if (((abs (t2 - t1) / t1) * 100.0) > 1.0) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - free (streams); - free (a); - acc_free (d_a); acc_shutdown (acc_device_nvidia); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c index d8cba4db538..fb191c6625b 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-78.c @@ -7,77 +7,53 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); + gettimeofday (&tv2, NULL); - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); stream = (CUstream) acc_get_cuda_stream (0); if (stream != NULL) @@ -93,11 +69,9 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - init_timers (1); + gettimeofday (&tv1, NULL); - start_timer (0); - - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -106,33 +80,30 @@ main (int argc, char **argv) acc_wait_all (); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); + + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - if (atime < dtime) + if (t2 > (t1 + (t1 * 0.10))) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long 1\n"); abort (); } - start_timer (0); + gettimeofday (&tv1, NULL); acc_wait_all (); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); + + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - if (0.010 < atime) + if (t2 > 1000) { - fprintf (stderr, "actual time too long\n"); + fprintf (stderr, "too long 2\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c index b805d5f9b81..af8aa119f74 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-79.c @@ -7,80 +7,55 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - int N; + const int N = 2; int i; CUstream stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime, hitime, lotime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; - - devnum = 2; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - N = nprocs; - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); + gettimeofday (&tv2, NULL); - acc_map_data (a, d_a, nbytes); + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); if (r != CUDA_SUCCESS) @@ -106,16 +81,11 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (0, stream)) abort (); - init_timers (1); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - - start_timer (0); + gettimeofday (&tv1, NULL); for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -157,7 +127,7 @@ main (int argc, char **argv) acc_wait (1); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); if (acc_async_test (0) != 1) abort (); @@ -165,25 +135,16 @@ main (int argc, char **argv) if (acc_async_test (1) != 1) abort (); - hitime = dtime * N; - hitime += hitime * 0.02; + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - lotime = dtime * N; - lotime -= lotime * 0.02; + t1 *= N; - if (atime > hitime || atime < lotime) + if (((abs (t2 - t1) / t1) * 100.0) > 1.0) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - - free (a); - acc_free (d_a); - acc_shutdown (acc_device_nvidia); exit (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c index 958672c45e5..902d2574b45 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-81.c @@ -7,78 +7,55 @@ #include #include #include -#include "timer.h" +#include int main (int argc, char **argv) { - CUdevice dev; CUfunction delay; CUmodule module; CUresult r; - int N; + const int N = 2; int i; CUstream *streams, stream; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; + struct timeval tv1, tv2; + time_t t1, t2; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); + r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); + fprintf (stderr, "cuModuleLoad failed: %d\n", r); abort (); } - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); + r = cuModuleGetFunction (&delay, module, "delay"); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); abort (); } - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } + gettimeofday (&tv1, NULL); - r = cuModuleLoad (&module, "subr.ptx"); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, NULL, NULL, 0); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); - abort (); + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); } - r = cuModuleGetFunction (&delay, module, "delay"); + r = cuCtxSynchronize (); if (r != CUDA_SUCCESS) { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); - abort (); + fprintf (stderr, "cuCtxSynchronize failed: %d\n", r); + abort (); } - nbytes = nprocs * sizeof (unsigned long); - - dtime = 500.0; - - dticks = (unsigned long) (dtime * clkrate); + gettimeofday (&tv2, NULL); - N = nprocs; - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); streams = (CUstream *) malloc (N * sizeof (void *)); @@ -99,11 +76,6 @@ main (int argc, char **argv) abort (); } - init_timers (1); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - stream = (CUstream) acc_get_cuda_stream (N); if (stream != NULL) abort (); @@ -118,11 +90,11 @@ main (int argc, char **argv) if (!acc_set_cuda_stream (N, stream)) abort (); - start_timer (0); + gettimeofday (&tv1, NULL); for (i = 0; i < N; i++) { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], NULL, 0); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuLaunchKernel failed: %d\n", r); @@ -130,6 +102,10 @@ main (int argc, char **argv) } } + gettimeofday (&tv2, NULL); + + t2 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); + acc_wait_all_async (N); for (i = 0; i <= N; i++) @@ -146,15 +122,13 @@ main (int argc, char **argv) abort (); } - atime = stop_timer (0); - - if (atime < dtime) + if ((t1 * N) < t2) { - fprintf (stderr, "actual time < delay time\n"); + fprintf (stderr, "too long 1\n"); abort (); } - start_timer (0); + gettimeofday (&tv1, NULL); stream = (CUstream) acc_get_cuda_stream (N + 1); if (stream != NULL) @@ -174,35 +148,33 @@ main (int argc, char **argv) acc_wait (N + 1); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); + + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); - if (0.10 < atime) + if (t1 > 1000) { - fprintf (stderr, "actual time too long\n"); + fprintf (stderr, "too long 2\n"); abort (); } - start_timer (0); + gettimeofday (&tv1, NULL); acc_wait_all_async (N); acc_wait (N); - atime = stop_timer (0); + gettimeofday (&tv2, NULL); - if (0.10 < atime) + t1 = ((tv2.tv_sec - tv1.tv_sec) * 1000000) + (tv2.tv_usec - tv1.tv_usec); + + if (t1 > 1000) { - fprintf (stderr, "actual time too long\n"); + fprintf (stderr, "too long 3\n"); abort (); } - acc_unmap_data (a); - - fini_timers (); - free (streams); - free (a); - acc_free (d_a); acc_shutdown (acc_device_nvidia); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c index a36f8e69a18..054ffbf18ae 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-82.c @@ -11,46 +11,18 @@ int main (int argc, char **argv) { - CUdevice dev; CUfunction delay2; CUmodule module; CUresult r; - int N; + const int N = 32; int i; CUstream *streams; - unsigned long **a, **d_a, *tid, ticks; + unsigned long **a, **d_a, *tid; int nbytes; - void *kargs[3]; - int clkrate; - int devnum, nprocs; + void *kargs[2]; acc_init (acc_device_nvidia); - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - r = cuModuleLoad (&module, "subr.ptx"); if (r != CUDA_SUCCESS) { @@ -67,10 +39,6 @@ main (int argc, char **argv) nbytes = sizeof (int); - ticks = (unsigned long) (200.0 * clkrate); - - N = nprocs; - streams = (CUstream *) malloc (N * sizeof (void *)); a = (unsigned long **) malloc (N * sizeof (unsigned long *)); @@ -104,8 +72,7 @@ main (int argc, char **argv) for (i = 0; i < N; i++) { kargs[0] = (void *) &d_a[i]; - kargs[1] = (void *) &ticks; - kargs[2] = (void *) &tid[i]; + kargs[1] = (void *) &tid[i]; r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); if (r != CUDA_SUCCESS) @@ -113,8 +80,6 @@ main (int argc, char **argv) fprintf (stderr, "cuLaunchKernel failed: %d\n", r); abort (); } - - ticks = (unsigned long) (50.0 * clkrate); } acc_wait_all_async (0); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-93.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-93.c new file mode 100644 index 00000000000..bc60a16c64f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-93.c @@ -0,0 +1,19 @@ +/* { dg-do run { target { ! openacc_nvidia_accel_configured } } } */ + +#include +#include + +int +main (void) +{ + fprintf (stderr, "CheCKpOInT\n"); + acc_init (acc_device_nvidia); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ +/* { dg-output "device type nvidia not supported" } */ +/* { dg-shouldfail "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h index 9db236c8362..a99c08ddd18 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.h @@ -1,46 +1,23 @@ - -#if ACC_DEVICE_TYPE_nvidia - #pragma acc routine nohost -static int clock (void) -{ - int thetime; - - asm __volatile__ ("mov.u32 %0, %%clock;" : "=r"(thetime)); - - return thetime; -} - -#endif - void -delay (unsigned long *d_o, unsigned long delay) +delay () { - int start, ticks; + int i, sum; + const int N = 500000; - start = clock (); - - ticks = 0; - - while (ticks < delay) - ticks = clock () - start; - - return; + for (i = 0; i < N; i++) + sum = sum + 1; } +#pragma acc routine nohost void -delay2 (unsigned long *d_o, unsigned long delay, unsigned long tid) +delay2 (unsigned long *d_o, unsigned long tid) { - int start, ticks; + int i, sum; + const int N = 500000; - start = clock (); - - ticks = 0; - - while (ticks < delay) - ticks = clock () - start; + for (i = 0; i < N; i++) + sum = sum + 1; d_o[0] = tid; - - return; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx index 6f748fcaf9e..88b63bfb74b 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subr.ptx @@ -1,148 +1,90 @@ -// BEGIN PREAMBLE - .version 3.1 - .target sm_30 + .version 3.1 + .target sm_30 .address_size 64 -// END PREAMBLE -// BEGIN FUNCTION DEF: clock -.func (.param.u32 %out_retval)clock -{ -.reg.u32 %retval; - .reg.u64 %hr10; - .reg.u32 %r22; - .reg.u32 %r23; - .reg.u32 %r24; - .local.align 8 .b8 %frame[8]; - // #APP -// 7 "subr.c" 1 - mov.u32 %r24, %clock; -// 0 "" 2 - // #NO_APP - st.local.u32 [%frame], %r24; - ld.local.u32 %r22, [%frame]; - mov.u32 %r23, %r22; - mov.u32 %retval, %r23; - st.param.u32 [%out_retval], %retval; - ret; - } -// END FUNCTION DEF -// BEGIN GLOBAL FUNCTION DEF: delay -.visible .entry delay(.param.u64 %in_ar1, .param.u64 %in_ar2) -{ - .reg.u64 %ar1; - .reg.u64 %ar2; - .reg.u64 %hr10; - .reg.u64 %r22; - .reg.u32 %r23; - .reg.u64 %r24; - .reg.u64 %r25; - .reg.u32 %r26; - .reg.u32 %r27; - .reg.u32 %r28; - .reg.u32 %r29; - .reg.u32 %r30; - .reg.u64 %r31; - .reg.pred %r32; - .local.align 8 .b8 %frame[24]; - ld.param.u64 %ar1, [%in_ar1]; - ld.param.u64 %ar2, [%in_ar2]; - mov.u64 %r24, %ar1; - st.u64 [%frame+8], %r24; - mov.u64 %r25, %ar2; - st.local.u64 [%frame+16], %r25; + .visible .entry delay { - .param.u32 %retval_in; - { - call (%retval_in), clock; - } - ld.param.u32 %r26, [%retval_in]; -} - st.local.u32 [%frame+4], %r26; - mov.u32 %r27, 0; - st.local.u32 [%frame], %r27; - bra $L4; -$L5: - { - .param.u32 %retval_in; - { - call (%retval_in), clock; - } - ld.param.u32 %r28, [%retval_in]; -} - mov.u32 %r23, %r28; - ld.local.u32 %r30, [%frame+4]; - sub.u32 %r29, %r23, %r30; - st.local.u32 [%frame], %r29; -$L4: - ld.local.s32 %r22, [%frame]; - ld.local.u64 %r31, [%frame+16]; - setp.lo.u64 %r32,%r22,%r31; - @%r32 bra $L5; + .reg .u64 %hr10; + .reg .u32 %r22; + .reg .u32 %r23; + .reg .u32 %r24; + .reg .u32 %r25; + .reg .u32 %r26; + .reg .u32 %r27; + .reg .u32 %r28; + .reg .u32 %r29; + .reg .pred %r30; + .reg .u64 %frame; + .local .align 8 .b8 %farray[16]; + cvta.local.u64 %frame,%farray; + mov.u32 %r22,500000; + st.u32 [%frame+8],%r22; + mov.u32 %r23,0; + st.u32 [%frame],%r23; + bra $L2; + $L3: + ld.u32 %r25,[%frame+4]; + add.u32 %r24,%r25,1; + st.u32 [%frame+4],%r24; + ld.u32 %r27,[%frame]; + add.u32 %r26,%r27,1; + st.u32 [%frame],%r26; + $L2: + ld.u32 %r28,[%frame]; + ld.u32 %r29,[%frame+8]; + setp.lt.s32 %r30,%r28,%r29; + @%r30 + bra $L3; ret; } -// END FUNCTION DEF -// BEGIN GLOBAL FUNCTION DEF: delay2 -.visible .entry delay2(.param.u64 %in_ar1, .param.u64 %in_ar2, .param.u64 %in_ar3) -{ - .reg.u64 %ar1; - .reg.u64 %ar2; - .reg.u64 %ar3; - .reg.u64 %hr10; - .reg.u64 %r22; - .reg.u32 %r23; - .reg.u64 %r24; - .reg.u64 %r25; - .reg.u64 %r26; - .reg.u32 %r27; - .reg.u32 %r28; - .reg.u32 %r29; - .reg.u32 %r30; - .reg.u32 %r31; - .reg.u64 %r32; - .reg.pred %r33; - .reg.u64 %r34; - .reg.u64 %r35; - .local.align 8 .b8 %frame[32]; - ld.param.u64 %ar1, [%in_ar1]; - ld.param.u64 %ar2, [%in_ar2]; - ld.param.u64 %ar3, [%in_ar3]; - mov.u64 %r24, %ar1; - st.local.u64 [%frame+8], %r24; - mov.u64 %r25, %ar2; - st.local.u64 [%frame+16], %r25; - mov.u64 %r26, %ar3; - st.local.u64 [%frame+24], %r26; - { - .param.u32 %retval_in; - { - call (%retval_in), clock; - } - ld.param.u32 %r27, [%retval_in]; -} - st.local.u32 [%frame+4], %r27; - mov.u32 %r28, 0; - st.local.u32 [%frame], %r28; - bra $L8; -$L9: - { - .param.u32 %retval_in; + + .visible .entry delay2 (.param .u64 %in_ar1, .param .u64 %in_ar2) { - call (%retval_in), clock; - } - ld.param.u32 %r29, [%retval_in]; -} - mov.u32 %r23, %r29; - ld.local.u32 %r31, [%frame+4]; - sub.u32 %r30, %r23, %r31; - st.local.u32 [%frame], %r30; -$L8: - ld.local.s32 %r22, [%frame]; - ld.local.u64 %r32, [%frame+16]; - setp.lo.u64 %r33,%r22,%r32; - @%r33 bra $L9; - ld.local.u64 %r34, [%frame+8]; - ld.local.u64 %r35, [%frame+24]; - st.u64 [%r34], %r35; + .reg .u64 %ar1; + .reg .u64 %ar2; + .reg .u64 %hr10; + .reg .u64 %r22; + .reg .u64 %r23; + .reg .u32 %r24; + .reg .u32 %r25; + .reg .u32 %r26; + .reg .u32 %r27; + .reg .u32 %r28; + .reg .u32 %r29; + .reg .u32 %r30; + .reg .u32 %r31; + .reg .pred %r32; + .reg .u64 %r33; + .reg .u64 %r34; + .reg .u64 %frame; + .local .align 8 .b8 %farray[32]; + cvta.local.u64 %frame,%farray; + ld.param.u64 %ar1,[%in_ar1]; + ld.param.u64 %ar2,[%in_ar2]; + mov.u64 %r22,%ar1; + st.u64 [%frame+16],%r22; + mov.u64 %r23,%ar2; + st.u64 [%frame+24],%r23; + mov.u32 %r24,500000; + st.u32 [%frame+8],%r24; + mov.u32 %r25,0; + st.u32 [%frame],%r25; + bra $L5; + $L6: + ld.u32 %r27,[%frame+4]; + add.u32 %r26,%r27,1; + st.u32 [%frame+4],%r26; + ld.u32 %r29,[%frame]; + add.u32 %r28,%r29,1; + st.u32 [%frame],%r28; + $L5: + ld.u32 %r30,[%frame]; + ld.u32 %r31,[%frame+8]; + setp.lt.s32 %r32,%r30,%r31; + @%r32 + bra $L6; + ld.u64 %r33,[%frame+16]; + ld.u64 %r34,[%frame+24]; + st.u64 [%r33],%r34; ret; } -// END FUNCTION DEF diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h index 53749da5a0d..e69de29bb2d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/timer.h @@ -1,103 +0,0 @@ - -#include -#include - -static int _Tnum_timers; -static CUevent *_Tstart_events, *_Tstop_events; -static CUstream _Tstream; - -void -init_timers (int ntimers) -{ - int i; - CUresult r; - - _Tnum_timers = ntimers; - - _Tstart_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent)); - _Tstop_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent)); - - r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuStreamCreate failed: %d\n", r); - abort (); - } - - for (i = 0; i < _Tnum_timers; i++) - { - r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventCreate failed: %d\n", r); - abort (); - } - - r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventCreate failed: %d\n", r); - abort (); - } - } -} - -void -fini_timers (void) -{ - int i; - - for (i = 0; i < _Tnum_timers; i++) - { - cuEventDestroy (_Tstart_events[i]); - cuEventDestroy (_Tstop_events[i]); - } - - cuStreamDestroy (_Tstream); - - free (_Tstart_events); - free (_Tstop_events); -} - -void -start_timer (int timer) -{ - CUresult r; - - r = cuEventRecord (_Tstart_events[timer], _Tstream); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventRecord failed: %d\n", r); - abort (); - } -} - -float -stop_timer (int timer) -{ - CUresult r; - float etime; - - r = cuEventRecord (_Tstop_events[timer], _Tstream); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventRecord failed: %d\n", r); - abort (); - } - - r = cuEventSynchronize (_Tstop_events[timer]); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventSynchronize failed: %d\n", r); - abort (); - } - - r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuEventElapsedTime failed: %d\n", r); - abort (); - } - - return etime; -} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 index 19eb4bd6a45..b5586be0609 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/data-3.f90 @@ -55,7 +55,8 @@ program asyncwait c(:) = 0.0 d(:) = 0.0 - !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N)) + !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) & + !$acc& create (d(1:N)) !$acc parallel async (1) do i = 1, N @@ -76,7 +77,8 @@ program asyncwait !$acc end parallel !$acc wait (1) - !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N)) + !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) & + !$acc& copyout (d(1:N)) do i = 1, N if (a(i) .ne. 3.0) STOP 5 @@ -91,7 +93,8 @@ program asyncwait d(:) = 0.0 e(:) = 0.0 - !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) create (d(1:N)) copyin (e(1:N)) + !$acc enter data copyin (a(1:N)) create (b(1:N)) create (c(1:N)) & + !$acc& create (d(1:N)) copyin (e(1:N)) !$acc parallel async (1) do i = 1, N @@ -118,7 +121,8 @@ program asyncwait !$acc end parallel !$acc wait (1) - !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) copyout (d(1:N)) copyout (e(1:N)) + !$acc exit data copyout (a(1:N)) copyout (b(1:N)) copyout (c(1:N)) & + !$acc& copyout (d(1:N)) copyout (e(1:N)) !$acc exit data delete (N) do i = 1, N