From 18c107c58d42314128e485bb79892672a8feaa6b Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Mon, 13 Oct 2014 04:40:51 -0700 Subject: [PATCH 1/3] Tests for libgomp OpenACC support. --- libgomp/testsuite/Makefile.in | 4 + libgomp/testsuite/lib/libgomp.exp | 30 + libgomp/testsuite/libgomp.oacc-c++/c++.exp | 37 +- libgomp/testsuite/libgomp.oacc-c/abort-2.c | 17 + libgomp/testsuite/libgomp.oacc-c/abort.c | 17 + libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c | 25 +- libgomp/testsuite/libgomp.oacc-c/c.exp | 50 +- libgomp/testsuite/libgomp.oacc-c/clauses-1.c | 623 ++++++++++++++++++ libgomp/testsuite/libgomp.oacc-c/clauses-2.c | 67 ++ libgomp/testsuite/libgomp.oacc-c/context-1.c | 213 ++++++ libgomp/testsuite/libgomp.oacc-c/context-2.c | 223 +++++++ libgomp/testsuite/libgomp.oacc-c/context-3.c | 200 ++++++ libgomp/testsuite/libgomp.oacc-c/context-4.c | 213 ++++++ libgomp/testsuite/libgomp.oacc-c/data-1.c | 112 ++-- libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c | 32 + libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c | 3 +- libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c | 3 +- libgomp/testsuite/libgomp.oacc-c/if-1.c | 547 ++++++++++++++++ libgomp/testsuite/libgomp.oacc-c/kernels-1.c | 22 +- libgomp/testsuite/libgomp.oacc-c/lib-1.c | 19 +- libgomp/testsuite/libgomp.oacc-c/lib-10.c | 58 ++ libgomp/testsuite/libgomp.oacc-c/lib-11.c | 22 + libgomp/testsuite/libgomp.oacc-c/lib-12.c | 37 ++ libgomp/testsuite/libgomp.oacc-c/lib-13.c | 60 ++ libgomp/testsuite/libgomp.oacc-c/lib-14.c | 61 ++ libgomp/testsuite/libgomp.oacc-c/lib-15.c | 33 + libgomp/testsuite/libgomp.oacc-c/lib-16.c | 29 + libgomp/testsuite/libgomp.oacc-c/lib-17.c | 31 + libgomp/testsuite/libgomp.oacc-c/lib-18.c | 34 + libgomp/testsuite/libgomp.oacc-c/lib-19.c | 60 ++ libgomp/testsuite/libgomp.oacc-c/lib-2.c | 26 + libgomp/testsuite/libgomp.oacc-c/lib-20.c | 29 + libgomp/testsuite/libgomp.oacc-c/lib-21.c | 29 + libgomp/testsuite/libgomp.oacc-c/lib-22.c | 29 + libgomp/testsuite/libgomp.oacc-c/lib-23.c | 39 ++ libgomp/testsuite/libgomp.oacc-c/lib-24.c | 55 ++ libgomp/testsuite/libgomp.oacc-c/lib-25.c | 30 + libgomp/testsuite/libgomp.oacc-c/lib-26.c | 26 + libgomp/testsuite/libgomp.oacc-c/lib-27.c | 26 + libgomp/testsuite/libgomp.oacc-c/lib-28.c | 26 + libgomp/testsuite/libgomp.oacc-c/lib-29.c | 26 + libgomp/testsuite/libgomp.oacc-c/lib-3.c | 15 + libgomp/testsuite/libgomp.oacc-c/lib-30.c | 26 + libgomp/testsuite/libgomp.oacc-c/lib-31.c | 27 + libgomp/testsuite/libgomp.oacc-c/lib-32.c | 38 ++ libgomp/testsuite/libgomp.oacc-c/lib-33.c | 31 + libgomp/testsuite/libgomp.oacc-c/lib-34.c | 33 + libgomp/testsuite/libgomp.oacc-c/lib-35.c | 26 + libgomp/testsuite/libgomp.oacc-c/lib-36.c | 26 + libgomp/testsuite/libgomp.oacc-c/lib-37.c | 40 ++ libgomp/testsuite/libgomp.oacc-c/lib-38.c | 67 ++ libgomp/testsuite/libgomp.oacc-c/lib-39.c | 41 ++ libgomp/testsuite/libgomp.oacc-c/lib-4.c | 13 + libgomp/testsuite/libgomp.oacc-c/lib-40.c | 42 ++ libgomp/testsuite/libgomp.oacc-c/lib-41.c | 43 ++ libgomp/testsuite/libgomp.oacc-c/lib-42.c | 35 + libgomp/testsuite/libgomp.oacc-c/lib-43.c | 45 ++ libgomp/testsuite/libgomp.oacc-c/lib-44.c | 45 ++ libgomp/testsuite/libgomp.oacc-c/lib-45.c | 50 ++ libgomp/testsuite/libgomp.oacc-c/lib-46.c | 42 ++ libgomp/testsuite/libgomp.oacc-c/lib-47.c | 43 ++ libgomp/testsuite/libgomp.oacc-c/lib-48.c | 43 ++ libgomp/testsuite/libgomp.oacc-c/lib-49.c | 48 ++ libgomp/testsuite/libgomp.oacc-c/lib-5.c | 40 ++ libgomp/testsuite/libgomp.oacc-c/lib-50.c | 30 + libgomp/testsuite/libgomp.oacc-c/lib-51.c | 41 ++ libgomp/testsuite/libgomp.oacc-c/lib-52.c | 28 + libgomp/testsuite/libgomp.oacc-c/lib-53.c | 28 + libgomp/testsuite/libgomp.oacc-c/lib-54.c | 28 + libgomp/testsuite/libgomp.oacc-c/lib-55.c | 48 ++ libgomp/testsuite/libgomp.oacc-c/lib-56.c | 33 + libgomp/testsuite/libgomp.oacc-c/lib-57.c | 28 + libgomp/testsuite/libgomp.oacc-c/lib-58.c | 28 + libgomp/testsuite/libgomp.oacc-c/lib-59.c | 55 ++ libgomp/testsuite/libgomp.oacc-c/lib-6.c | 39 ++ libgomp/testsuite/libgomp.oacc-c/lib-60.c | 54 ++ libgomp/testsuite/libgomp.oacc-c/lib-61.c | 70 ++ libgomp/testsuite/libgomp.oacc-c/lib-62.c | 49 ++ libgomp/testsuite/libgomp.oacc-c/lib-63.c | 43 ++ libgomp/testsuite/libgomp.oacc-c/lib-64.c | 43 ++ libgomp/testsuite/libgomp.oacc-c/lib-65.c | 43 ++ libgomp/testsuite/libgomp.oacc-c/lib-66.c | 47 ++ libgomp/testsuite/libgomp.oacc-c/lib-67.c | 43 ++ libgomp/testsuite/libgomp.oacc-c/lib-68.c | 43 ++ libgomp/testsuite/libgomp.oacc-c/lib-69.c | 124 ++++ libgomp/testsuite/libgomp.oacc-c/lib-7.c | 18 + libgomp/testsuite/libgomp.oacc-c/lib-70.c | 136 ++++ libgomp/testsuite/libgomp.oacc-c/lib-71.c | 119 ++++ libgomp/testsuite/libgomp.oacc-c/lib-72.c | 121 ++++ libgomp/testsuite/libgomp.oacc-c/lib-73.c | 134 ++++ libgomp/testsuite/libgomp.oacc-c/lib-74.c | 139 ++++ libgomp/testsuite/libgomp.oacc-c/lib-75.c | 141 ++++ libgomp/testsuite/libgomp.oacc-c/lib-76.c | 147 +++++ libgomp/testsuite/libgomp.oacc-c/lib-77.c | 135 ++++ libgomp/testsuite/libgomp.oacc-c/lib-78.c | 140 ++++ libgomp/testsuite/libgomp.oacc-c/lib-79.c | 167 +++++ libgomp/testsuite/libgomp.oacc-c/lib-80.c | 132 ++++ libgomp/testsuite/libgomp.oacc-c/lib-81.c | 211 ++++++ libgomp/testsuite/libgomp.oacc-c/lib-82.c | 144 +++++ libgomp/testsuite/libgomp.oacc-c/lib-83.c | 58 ++ libgomp/testsuite/libgomp.oacc-c/lib-84.c | 66 ++ libgomp/testsuite/libgomp.oacc-c/lib-85.c | 52 ++ libgomp/testsuite/libgomp.oacc-c/lib-86.c | 42 ++ libgomp/testsuite/libgomp.oacc-c/lib-87.c | 42 ++ libgomp/testsuite/libgomp.oacc-c/lib-88.c | 111 ++++ libgomp/testsuite/libgomp.oacc-c/lib-89.c | 118 ++++ libgomp/testsuite/libgomp.oacc-c/lib-9.c | 70 ++ libgomp/testsuite/libgomp.oacc-c/lib-90.c | 137 ++++ libgomp/testsuite/libgomp.oacc-c/lib-91.c | 84 +++ libgomp/testsuite/libgomp.oacc-c/lib-92.c | 112 ++++ libgomp/testsuite/libgomp.oacc-c/nested-1.c | 680 ++++++++++++++++++++ libgomp/testsuite/libgomp.oacc-c/nested-2.c | 35 + libgomp/testsuite/libgomp.oacc-c/offset-1.c | 97 +++ libgomp/testsuite/libgomp.oacc-c/parallel-1.c | 76 ++- libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c | 35 + libgomp/testsuite/libgomp.oacc-c/present-1.c | 48 ++ libgomp/testsuite/libgomp.oacc-c/present-2.c | 48 ++ libgomp/testsuite/libgomp.oacc-c/subr.cu | 64 ++ libgomp/testsuite/libgomp.oacc-c/subr.ptx | 148 +++++ libgomp/testsuite/libgomp.oacc-c/timer.h | 103 +++ libgomp/testsuite/libgomp.oacc-c/update-1.c | 280 ++++++++ libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 | 10 + libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 | 13 + .../libgomp.oacc-fortran/acc_on_device-1-1.f90 | 17 +- .../libgomp.oacc-fortran/acc_on_device-1-2.f | 17 +- .../libgomp.oacc-fortran/acc_on_device-1-3.f | 17 +- libgomp/testsuite/libgomp.oacc-fortran/fortran.exp | 42 +- libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 | 10 + libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 | 82 +++ libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 | 82 +++ libgomp/testsuite/libgomp.oacc-fortran/lib-2.f | 10 + libgomp/testsuite/libgomp.oacc-fortran/lib-3.f | 10 + libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 | 35 + libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 | 31 + libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 | 35 + libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 | 31 + libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 | 83 +++ libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 | 83 +++ libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 | 97 +++ .../libgomp.oacc-fortran/pointer-align-1.f90 | 21 + libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 | 229 +++++++ .../testsuite/libgomp.oacc-fortran/subarrays-1.f90 | 97 +++ .../testsuite/libgomp.oacc-fortran/subarrays-2.f90 | 100 +++ 143 files changed, 10476 insertions(+), 93 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c/abort-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/abort.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/clauses-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/clauses-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/context-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/context-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/context-3.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/context-4.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/if-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-10.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-11.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-12.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-13.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-14.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-15.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-16.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-17.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-18.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-19.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-20.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-21.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-22.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-23.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-24.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-25.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-26.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-27.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-28.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-29.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-3.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-30.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-31.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-32.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-33.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-34.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-35.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-36.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-37.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-38.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-39.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-4.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-40.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-41.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-42.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-43.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-44.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-45.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-46.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-47.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-48.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-49.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-5.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-50.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-51.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-52.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-53.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-54.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-55.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-56.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-57.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-58.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-59.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-6.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-60.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-61.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-62.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-63.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-64.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-65.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-66.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-67.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-68.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-69.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-7.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-70.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-71.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-72.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-73.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-74.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-75.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-76.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-77.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-78.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-79.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-80.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-81.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-82.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-83.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-84.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-85.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-86.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-87.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-88.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-89.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-9.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-90.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-91.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/lib-92.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/nested-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/nested-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/offset-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/present-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/present-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c/subr.cu create mode 100644 libgomp/testsuite/libgomp.oacc-c/subr.ptx create mode 100644 libgomp/testsuite/libgomp.oacc-c/timer.h create mode 100644 libgomp/testsuite/libgomp.oacc-c/update-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 diff --git a/libgomp/testsuite/Makefile.in b/libgomp/testsuite/Makefile.in index 5273eaa..77b365e 100644 --- a/libgomp/testsuite/Makefile.in +++ b/libgomp/testsuite/Makefile.in @@ -129,6 +129,10 @@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PERL = @PERL@ +PLUGIN_NVPTX = @PLUGIN_NVPTX@ +PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@ +PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@ +PLUGIN_NVPTX_LIBS = @PLUGIN_NVPTX_LIBS@ RANLIB = @RANLIB@ SECTION_LDFLAGS = @SECTION_LDFLAGS@ SED = @SED@ diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 094e5ed..78a14cb 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -139,6 +139,8 @@ proc libgomp_init { args } { lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/.libs" lappend ALWAYS_CFLAGS "additional_flags=-I${blddir}" lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/.libs" + # The top-level include directory, for libgomp-constants.h. + lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include" } lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/.." @@ -239,3 +241,31 @@ proc libgomp_option_proc { option } { return 0 } } + +# Return 1 if at least one nvidia board is present. + +proc check_effective_target_openacc_nvidia_accel_present { } { + return [check_runtime openacc_nvidia_accel_present { + #include + int main () { + return !(acc_get_num_devices (acc_device_nvidia) > 0); + } + } "" ] +} + +# Return 1 if at least one nvidia board is present, and the nvidia device type +# is selected by default by means of setting the environment variable +# ACC_DEVICE_TYPE. + +proc check_effective_target_openacc_nvidia_accel_selected { } { + if { ![check_effective_target_openacc_nvidia_accel_present] } { + return 0; + } + if { ![info exists ::env(ACC_DEVICE_TYPE)] } { + return 0; + } + if { $::env(ACC_DEVICE_TYPE) == "nvidia" } { + return 1; + } + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c++/c++.exp b/libgomp/testsuite/libgomp.oacc-c++/c++.exp index ae8a1d5..164d7d2 100644 --- a/libgomp/testsuite/libgomp.oacc-c++/c++.exp +++ b/libgomp/testsuite/libgomp.oacc-c++/c++.exp @@ -17,7 +17,8 @@ if [info exists lang_include_flags] then { dg-init # Turn on OpenACC. -lappend ALWAYS_CFLAGS "additional_flags=-fopenacc" +# XXX (TEMPORARY): Remove the -flto once that's properly integrated. +lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto" set blddir [lookfor_file [get_multilibs] libgomp] @@ -61,8 +62,38 @@ if { $lang_test_file_found } { set libstdcxx_includes "" } - # Main loop. - dg-runtest $tests "" $libstdcxx_includes + # Todo: get list of accelerators from configure options --enable-accelerator. + set accels { "nvidia" "host_nonshm" } + + # Run on host (or fallback) accelerator. + lappend accels "host" + + # Test OpenACC with available accelerators. + foreach accel $accels { + set tagopt "-DACC_DEVICE_TYPE_$accel=1" + + # Todo: Determine shared memory or not using run-time test. + switch $accel { + host { + set acc_mem_shared 1 + } + host_nonshm { + set acc_mem_shared 0 + } + nvidia { + set acc_mem_shared 0 + } + default { + set acc_mem_shared 0 + } + } + set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared" + + # Todo: Verify that this works for both local and remote testing. + setenv ACC_DEVICE_TYPE $accel + + dg-runtest $tests "$tagopt" $libstdcxx_includes + } } # All done. diff --git a/libgomp/testsuite/libgomp.oacc-c/abort-2.c b/libgomp/testsuite/libgomp.oacc-c/abort-2.c new file mode 100644 index 0000000..debb81e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/abort-2.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + +#pragma acc parallel + { + if (argc != 1) + abort (); + } + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c/abort.c b/libgomp/testsuite/libgomp.oacc-c/abort.c new file mode 100644 index 0000000..f88b9e3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/abort.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-shouldfail "" { *-*-* } { "*" } { "" } } */ + +#include + +int +main (void) +{ + +#pragma acc parallel + { + abort (); + } + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c b/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c index f216587..81ea476 100644 --- a/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c +++ b/libgomp/testsuite/libgomp.oacc-c/acc_on_device-1.c @@ -1,7 +1,6 @@ /* Disable the acc_on_device builtin; we want to test the libgomp library function. */ -/* TODO: Remove -DACC_DEVICE_TYPE_host once that is set by the test harness. */ -/* { dg-additional-options "-fno-builtin-acc_on_device -DACC_DEVICE_TYPE_host" } */ +/* { dg-additional-options "-fno-builtin-acc_on_device" } */ #include #include @@ -16,8 +15,12 @@ main (int argc, char *argv[]) abort (); if (!acc_on_device (acc_device_host)) abort (); + if (acc_on_device (acc_device_host_nonshm)) + abort (); if (acc_on_device (acc_device_not_host)) abort (); + if (acc_on_device (acc_device_nvidia)) + abort (); } @@ -29,8 +32,12 @@ main (int argc, char *argv[]) abort (); if (!acc_on_device (acc_device_host)) abort (); + if (acc_on_device (acc_device_host_nonshm)) + abort (); if (acc_on_device (acc_device_not_host)) abort (); + if (acc_on_device (acc_device_nvidia)) + abort (); } @@ -44,8 +51,22 @@ main (int argc, char *argv[]) abort (); if (acc_on_device (acc_device_host)) abort (); +#if ACC_DEVICE_TYPE_host_nonshm + if (!acc_on_device (acc_device_host_nonshm)) + abort (); +#else + if (acc_on_device (acc_device_host_nonshm)) + abort (); +#endif if (!acc_on_device (acc_device_not_host)) abort (); +#if ACC_DEVICE_TYPE_nvidia + if (!acc_on_device (acc_device_nvidia)) + abort (); +#else + if (acc_on_device (acc_device_nvidia)) + abort (); +#endif } #endif diff --git a/libgomp/testsuite/libgomp.oacc-c/c.exp b/libgomp/testsuite/libgomp.oacc-c/c.exp index 13a478e..553c225 100644 --- a/libgomp/testsuite/libgomp.oacc-c/c.exp +++ b/libgomp/testsuite/libgomp.oacc-c/c.exp @@ -23,17 +23,61 @@ if ![info exists DEFAULT_CFLAGS] then { dg-init # Turn on OpenACC. -lappend ALWAYS_CFLAGS "additional_flags=-fopenacc" +# XXX (TEMPORARY): Remove the -flto once that's properly integrated. +lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto" # Gather a list of all tests. set tests [lsort [find $srcdir/$subdir *.c]] set ld_library_path $always_ld_library_path append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] +append ld_library_path ":/opt/nvidia/cuda-5.5/lib64" set_ld_library_path_env_vars -# Main loop. -dg-runtest $tests "" $DEFAULT_CFLAGS +# Todo: get list of accelerators from configure options --enable-accelerator. +set accels { "nvidia" "host_nonshm" } + +# Run on host (or fallback) accelerator. +lappend accels "host" + +# Test OpenACC with available accelerators. +set SAVE_ALWAYS_CFLAGS "$ALWAYS_CFLAGS" +foreach accel $accels { + set ALWAYS_CFLAGS "$SAVE_ALWAYS_CFLAGS" + set tagopt "-DACC_DEVICE_TYPE_$accel=1" + + # Todo: Determine shared memory or not using run-time test. + switch $accel { + host { + set acc_mem_shared 1 + } + host_nonshm { + set acc_mem_shared 0 + } + nvidia { + # Copy ptx file (TEMPORARY) + remote_download host $srcdir/libgomp.oacc-c/subr.ptx + + # Where cuda.h lives + # Todo: get that from configure option --with-cuda-driver. + lappend ALWAYS_CFLAGS "additional_flags=-I/opt/nvidia/cuda-5.5/include" + lappend ALWAYS_CFLAGS "additional_flags=-L/opt/nvidia/cuda-5.5/lib64" + + # Where timer.h lives + lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}" + set acc_mem_shared 0 + } + default { + set acc_mem_shared 0 + } + } + set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared" + + # Todo: Verify that this works for both local and remote testing. + setenv ACC_DEVICE_TYPE $accel + + dg-runtest $tests "$tagopt" $DEFAULT_CFLAGS +} # All done. dg-finish diff --git a/libgomp/testsuite/libgomp.oacc-c/clauses-1.c b/libgomp/testsuite/libgomp.oacc-c/clauses-1.c new file mode 100644 index 0000000..51c0cf5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/clauses-1.c @@ -0,0 +1,623 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc parallel copyin (a[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc parallel copyin (a[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 5.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present_or_copyin (a[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc parallel copyin (a[0:N]) present_or_copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 2.0; + } + + d = (float *) acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) present_or_copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 4.0; + } + +#pragma acc parallel copy (a[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = a[ii] + 2; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 7.0; + } + +#pragma acc parallel present_or_copy (a[0:N]) present_or_copy (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 9.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + d = (float *) acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc parallel present_or_copy (a[0:N]) present_or_copy (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 7.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + d = (float *) acc_deviceptr (&a[0]); + acc_unmap_data (&a[0]); + acc_free (d); + + d = (float *) acc_deviceptr (&b[0]); + acc_unmap_data (&b[0]); + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + +#pragma acc parallel copyin (a[0:N]) create (c[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + +#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + d = (float *) acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) present (c[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + + acc_copyin (a, N * sizeof (float)); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (b, d, N * sizeof (float)); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc parallel present (a[0:N]) present (c[0:N]) present (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + if (!acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + acc_copyout (b, N * sizeof (float)); + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + d = (float *) acc_deviceptr (a); + + acc_unmap_data (a); + + acc_free (d); + + d = (float *) acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 6.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) deviceptr (d) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + d[ii] = a[ii]; + b[ii] = d[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel pcopyin (a[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc parallel copyin (a[0:N]) pcopyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 7.0; + } + +#pragma acc parallel copyin (a[0:N]) pcreate (c[0:N]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/clauses-2.c b/libgomp/testsuite/libgomp.oacc-c/clauses-2.c new file mode 100644 index 0000000..8dc45cb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/clauses-2.c @@ -0,0 +1,67 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) present_or_create (c[0:N+1]) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + d = (float *) acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + return 0; +} +/* { dg-shouldfail "libgomp: \[\h+,\d+\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/context-1.c b/libgomp/testsuite/libgomp.oacc-c/context-1.c new file mode 100644 index 0000000..dabc706 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/context-1.c @@ -0,0 +1,213 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include +#include +#include +#include +#include +#include + +void +saxpy (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check (CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent (&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf (stderr, "new context established\n"); + exit (EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context (); + + if (ctx1 != ctx3) + { + fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit (EXIT_FAILURE); + } + + return; +} + +int +main (int argc, char **argv) +{ + cublasStatus_t s; + cudaError_t e; + cublasHandle_t h; + CUcontext pctx, ctx; + CUresult r; + int dev; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 1 - cuBLAS creates, OpenACC shares. */ + + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasCreate failed: %d\n", s); + exit (EXIT_FAILURE); + } + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + e = cudaGetDevice (&dev); + if (e != cudaSuccess) + { + fprintf (stderr, "cudaGetDevice failed: %d\n", e); + exit (EXIT_FAILURE); + } + + acc_set_device_num (dev, acc_device_nvidia); + + h_X = (float *) malloc (N * sizeof (float)); + if (!h_X) + { + fprintf (stderr, "malloc failed: for h_X\n"); + exit (EXIT_FAILURE); + } + + h_Y1 = (float *) malloc (N * sizeof (float)); + if (!h_Y1) + { + fprintf (stderr, "malloc failed: for h_Y1\n"); + exit (EXIT_FAILURE); + } + + h_Y2 = (float *) malloc (N * sizeof (float)); + if (!h_Y2) + { + fprintf (stderr, "malloc failed: for h_Y2\n"); + exit (EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand () / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX; + } + + d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf (stderr, "copyin error h_X\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasSaxpy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float)); + + context_check (pctx); + + saxpy (N, alpha, h_X, h_Y2); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float) sqrt ((double) error_norm); + ref_norm = (float) sqrt ((double) ref_norm); + + if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf (stderr, "math error\n"); + exit (EXIT_FAILURE); + } + + free (h_X); + free (h_Y1); + free (h_Y2); + + acc_free (d_X); + acc_free (d_Y); + + context_check (pctx); + + s = cublasDestroy (h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasDestroy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent (&ctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (!ctx) + { + fprintf (stderr, "Expected context\n"); + exit (EXIT_FAILURE); + } + + if (pctx != ctx) + { + fprintf (stderr, "Unexpected new context\n"); + exit (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/context-2.c b/libgomp/testsuite/libgomp.oacc-c/context-2.c new file mode 100644 index 0000000..16464d5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/context-2.c @@ -0,0 +1,223 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include +#include +#include +#include +#include +#include + +void +saxpy (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check (CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent (&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf (stderr, "new context established\n"); + exit (EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context (); + + if (ctx1 != ctx3) + { + fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit (EXIT_FAILURE); + } + + return; +} + +int +main (int argc, char **argv) +{ + cublasStatus_t s; + cudaError_t e; + cublasHandle_t h; + CUcontext pctx, ctx; + CUresult r; + int dev; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 2 - cuBLAS creates, OpenACC shares. */ + + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasCreate failed: %d\n", s); + exit (EXIT_FAILURE); + } + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + e = cudaGetDevice (&dev); + if (e != cudaSuccess) + { + fprintf (stderr, "cudaGetDevice failed: %d\n", e); + exit (EXIT_FAILURE); + } + + acc_set_device_num (dev, acc_device_nvidia); + + h_X = (float *) malloc (N * sizeof (float)); + if (h_X == 0) + { + fprintf (stderr, "malloc failed: for h_X\n"); + exit (EXIT_FAILURE); + } + + h_Y1 = (float *) malloc (N * sizeof (float)); + if (h_Y1 == 0) + { + fprintf (stderr, "malloc failed: for h_Y1\n"); + exit (EXIT_FAILURE); + } + + h_Y2 = (float *) malloc (N * sizeof (float)); + if (h_Y2 == 0) + { + fprintf (stderr, "malloc failed: for h_Y2\n"); + exit (EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand () / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX; + } + + d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf (stderr, "copyin error h_X\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasSaxpy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float)); + + context_check (pctx); + +#pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copyin (alpha) + { + int i; + + for (i = 0; i < N; i++) + { + h_Y2[i] = alpha * h_X[i] + h_Y2[i]; + } + } + + context_check (pctx); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float) sqrt ((double) error_norm); + ref_norm = (float) sqrt ((double) ref_norm); + + if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf (stderr, "math error\n"); + exit (EXIT_FAILURE); + } + + free (h_X); + free (h_Y1); + free (h_Y2); + + acc_free (d_X); + acc_free (d_Y); + + context_check (pctx); + + s = cublasDestroy (h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasDestroy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent (&ctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (!ctx) + { + fprintf (stderr, "Expected context\n"); + exit (EXIT_FAILURE); + } + + if (pctx != ctx) + { + fprintf (stderr, "Unexpected new context\n"); + exit (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/context-3.c b/libgomp/testsuite/libgomp.oacc-c/context-3.c new file mode 100644 index 0000000..ccd276c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/context-3.c @@ -0,0 +1,200 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include +#include +#include +#include +#include +#include + +void +saxpy (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check (CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent (&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf (stderr, "new context established\n"); + exit (EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context (); + + if (ctx1 != ctx3) + { + fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit (EXIT_FAILURE); + } + + return; +} + +int +main (int argc, char **argv) +{ + cublasStatus_t s; + cublasHandle_t h; + CUcontext pctx; + CUresult r; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 3 - OpenACC creates, cuBLAS shares. */ + + acc_set_device_num (0, acc_device_nvidia); + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + h_X = (float *) malloc (N * sizeof (float)); + if (h_X == 0) + { + fprintf (stderr, "malloc failed: for h_X\n"); + exit (EXIT_FAILURE); + } + + h_Y1 = (float *) malloc (N * sizeof (float)); + if (h_Y1 == 0) + { + fprintf (stderr, "malloc failed: for h_Y1\n"); + exit (EXIT_FAILURE); + } + + h_Y2 = (float *) malloc (N * sizeof (float)); + if (h_Y2 == 0) + { + fprintf (stderr, "malloc failed: for h_Y2\n"); + exit (EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand () / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX; + } + + d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf (stderr, "copyin error h_X\n"); + exit (EXIT_FAILURE); + } + + d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasCreate failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasSaxpy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float)); + + context_check (pctx); + + saxpy (N, alpha, h_X, h_Y2); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float) sqrt ((double) error_norm); + ref_norm = (float) sqrt ((double) ref_norm); + + if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf (stderr, "math error\n"); + exit (EXIT_FAILURE); + } + + free (h_X); + free (h_Y1); + free (h_Y2); + + acc_free (d_X); + acc_free (d_Y); + + context_check (pctx); + + s = cublasDestroy (h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasDestroy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (pctx) + { + fprintf (stderr, "Unexpected context\n"); + exit (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/context-4.c b/libgomp/testsuite/libgomp.oacc-c/context-4.c new file mode 100644 index 0000000..71365e8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/context-4.c @@ -0,0 +1,213 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include +#include +#include +#include +#include +#include + +void +saxpy (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + { + y[i] = a * x[i] + y[i]; + } +} + +void +context_check (CUcontext ctx1) +{ + CUcontext ctx2, ctx3; + CUresult r; + + r = cuCtxGetCurrent (&ctx2); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (ctx1 != ctx2) + { + fprintf (stderr, "new context established\n"); + exit (EXIT_FAILURE); + } + + ctx3 = (CUcontext) acc_get_current_cuda_context (); + + if (ctx1 != ctx3) + { + fprintf (stderr, "acc_get_current_cuda_context returned wrong value\n"); + exit (EXIT_FAILURE); + } + + return; +} + +int +main (int argc, char **argv) +{ + cublasStatus_t s; + cublasHandle_t h; + CUcontext pctx; + CUresult r; + int i; + const int N = 256; + float *h_X, *h_Y1, *h_Y2; + float *d_X,*d_Y; + float alpha = 2.0f; + float error_norm; + float ref_norm; + + /* Test 4 - OpenACC creates, cuBLAS shares. */ + + acc_set_device_num (0, acc_device_nvidia); + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + h_X = (float *) malloc (N * sizeof (float)); + if (h_X == 0) + { + fprintf (stderr, "malloc failed: for h_X\n"); + exit (EXIT_FAILURE); + } + + h_Y1 = (float *) malloc (N * sizeof (float)); + if (h_Y1 == 0) + { + fprintf (stderr, "malloc failed: for h_Y1\n"); + exit (EXIT_FAILURE); + } + + h_Y2 = (float *) malloc (N * sizeof (float)); + if (h_Y2 == 0) + { + fprintf (stderr, "malloc failed: for h_Y2\n"); + exit (EXIT_FAILURE); + } + + for (i = 0; i < N; i++) + { + h_X[i] = rand () / (float) RAND_MAX; + h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX; + } + +#pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copy (alpha) + { + int i; + + for (i = 0; i < N; i++) + { + h_Y2[i] = alpha * h_X[i] + h_Y2[i]; + } + } + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float)); + if (d_X == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float)); + if (d_Y == NULL) + { + fprintf (stderr, "copyin error h_Y1\n"); + exit (EXIT_FAILURE); + } + + s = cublasCreate (&h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasCreate failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasSaxpy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float)); + + context_check (pctx); + + error_norm = 0; + ref_norm = 0; + + for (i = 0; i < N; ++i) + { + float diff; + + diff = h_Y1[i] - h_Y2[i]; + error_norm += diff * diff; + ref_norm += h_Y2[i] * h_Y2[i]; + } + + error_norm = (float) sqrt ((double) error_norm); + ref_norm = (float) sqrt ((double) ref_norm); + + if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f)) + { + fprintf (stderr, "math error\n"); + exit (EXIT_FAILURE); + } + + free (h_X); + free (h_Y1); + free (h_Y2); + + acc_free (d_X); + acc_free (d_Y); + + context_check (pctx); + + s = cublasDestroy (h); + if (s != CUBLAS_STATUS_SUCCESS) + { + fprintf (stderr, "cublasDestroy failed: %d\n", s); + exit (EXIT_FAILURE); + } + + context_check (pctx); + + acc_shutdown (acc_device_nvidia); + + r = cuCtxGetCurrent (&pctx); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r); + exit (EXIT_FAILURE); + } + + if (pctx) + { + fprintf (stderr, "Unexpected context\n"); + exit (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/data-1.c b/libgomp/testsuite/libgomp.oacc-c/data-1.c index 8f9a17a..e7564cc 100644 --- a/libgomp/testsuite/libgomp.oacc-c/data-1.c +++ b/libgomp/testsuite/libgomp.oacc-c/data-1.c @@ -1,19 +1,30 @@ /* { dg-do run } */ -extern void abort (); +#include +#include int i; +int +is_mapped (void *p, size_t n) +{ +#if ACC_MEM_SHARED + return 1; +#else + return acc_is_present (p, n); +#endif +} + int main(void) { int j; -#if 0 i = -1; j = -2; #pragma acc data copyin (i, j) { - // TODO: check that variables have been mapped. + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); if (i != -1 || j != -2) abort (); i = 2; @@ -28,37 +39,30 @@ int main(void) j = -2; #pragma acc data copyout (i, j) { - // TODO: check that variables have been mapped. - if (i != -1 || j != -2) - abort (); - i = 2; - j = 1; - if (i != 2 || j != 1) + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) abort (); - } - if (i != -1 || j != -2) - abort (); - - i = -1; - j = -2; -#pragma acc data copy (i, j) - { - // TODO: check that variables have been mapped. if (i != -1 || j != -2) abort (); i = 2; j = 1; if (i != 2 || j != 1) abort (); + +#pragma acc parallel present (i, j) + { + i = 4; + j = 2; + } } - if (i != -1 || j != -2) + if (i != 4 || j != 2) abort (); i = -1; j = -2; #pragma acc data create (i, j) { - // TODO: check that variables have been mapped. + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); if (i != -1 || j != -2) abort (); i = 2; @@ -66,15 +70,15 @@ int main(void) if (i != 2 || j != 1) abort (); } - if (i != -1 || j != -2) + if (i != 2 || j != 1) abort (); -#endif i = -1; j = -2; #pragma acc data present_or_copyin (i, j) { - // TODO: check that variables have been mapped. + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); if (i != -1 || j != -2) abort (); i = 2; @@ -85,28 +89,34 @@ int main(void) if (i != 2 || j != 1) abort (); -#if 0 i = -1; j = -2; #pragma acc data present_or_copyout (i, j) { - // TODO: check that variables have been mapped. + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); if (i != -1 || j != -2) abort (); i = 2; j = 1; if (i != 2 || j != 1) abort (); + +#pragma acc parallel present (i, j) + { + i = 4; + j = 2; + } } - if (i != -1 || j != -2) + if (i != 4 || j != 2) abort (); -#endif i = -1; j = -2; #pragma acc data present_or_copy (i, j) { - // TODO: check that variables have been mapped. + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); if (i != -1 || j != -2) abort (); i = 2; @@ -114,47 +124,56 @@ int main(void) if (i != 2 || j != 1) abort (); } +#if ACC_MEM_SHARED + if (i != 2 || j != 1) + abort (); +#else if (i != -1 || j != -2) abort (); +#endif -#if 0 i = -1; j = -2; #pragma acc data present_or_create (i, j) { - // TODO: check that variables have been mapped. + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); i = 2; j = 1; if (i != 2 || j != 1) abort (); } - if (i != -1 || j != -2) + + if (i != 2 || j != 1) abort (); -#endif -#if 0 i = -1; j = -2; -#pragma acc data present (i, j) +#pragma acc data copyin (i, j) { - // TODO: check that variables have been mapped. - if (i != -1 || j != -2) - abort (); - i = 2; - j = 1; - if (i != 2 || j != 1) - abort (); +#pragma acc data present (i, j) + { + if (!is_mapped (&i, sizeof (i)) || !is_mapped (&j, sizeof (j))) + abort (); + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + } } - if (i != -1 || j != -2) + if (i != 2 || j != 1) abort (); -#endif -#if 0 i = -1; j = -2; #pragma acc data { - // TODO: check that variables have been mapped. +#if !ACC_MEM_SHARED + if (is_mapped (&i, sizeof (i)) || is_mapped (&j, sizeof (j))) + abort (); +#endif if (i != -1 || j != -2) abort (); i = 2; @@ -162,9 +181,8 @@ int main(void) if (i != 2 || j != 1) abort (); } - if (i != -1 || j != -2) + if (i != 2 || j != 1) abort (); -#endif return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c b/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c new file mode 100644 index 0000000..e271a37 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/deviceptr-1.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ + +#include + +int main (void) +{ + void *a, *a_1, *a_2; + +#define A (void *) 0x123 + a = A; + +#pragma acc data copyout (a_1, a_2) +#pragma acc kernels deviceptr (a) + { + a_1 = a; + a_2 = &a; + } + + if (a != A) + abort (); + if (a_1 != a) + abort (); +#if ACC_MEM_SHARED + if (a_2 != &a) + abort (); +#else + if (a_2 == &a) + abort (); +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c b/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c index b41e558..683fefa 100644 --- a/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c +++ b/libgomp/testsuite/libgomp.oacc-c/goacc_kernels.c @@ -1,4 +1,5 @@ /* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_DEVICE_TYPE_host=1" } } */ #include "libgomp_g.h" @@ -19,7 +20,7 @@ int main(void) i = -1; GOACC_kernels (0, f, (const void *) 0, 0, (void *) 0, (void *) 0, (void *) 0, - 1, 1, 1); + 1, 1, 1, -2, -1); if (i != 42) abort (); diff --git a/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c b/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c index 4ab1e9b..232ce8a 100644 --- a/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c +++ b/libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c @@ -1,4 +1,5 @@ /* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_DEVICE_TYPE_host=1" } } */ #include "libgomp_g.h" @@ -19,7 +20,7 @@ int main(void) i = -1; GOACC_parallel (0, f, (const void *) 0, 0, (void *) 0, (void *) 0, (void *) 0, - 1, 1, 1); + 1, 1, 1, -2, -1); if (i != 42) abort (); diff --git a/libgomp/testsuite/libgomp.oacc-c/if-1.c b/libgomp/testsuite/libgomp.oacc-c/if-1.c new file mode 100644 index 0000000..e289f40 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/if-1.c @@ -0,0 +1,547 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fno-builtin-acc_on_device" } */ + +#include +#include +#include + +#define N 32 + +int +main(int argc, char **argv) +{ + float *a, *b, *d_a, *d_b, exp, exp2; + int i; + const int one = 1; + const int zero = 0; + int n; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + d_a = (float *) acc_malloc (N * sizeof (float)); + d_b = (float *) acc_malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + a[i] = 4.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(1) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 5.0; +#else + exp = 4.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 16.0; + +#pragma acc parallel if(0) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 17.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 8.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 9.0; +#else + exp = 8.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 22.0; + +#pragma acc parallel if(zero) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 23.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 16.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(true) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 17.0; +#else + exp = 16.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 76.0; + +#pragma acc parallel if(false) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 77.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 22.0; + + n = 1; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 23.0; +#else + exp = 22.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 18.0; + + n = 0; + +#pragma acc parallel if(n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 19.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 49.0; + + n = 1; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(n + n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 50.0; +#else + exp = 49.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 38.0; + + n = 0; + +#pragma acc parallel if(n + n) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 39.0) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 91.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(-2) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 92.0; +#else + exp = 91.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 43.0; + +#pragma acc parallel copyin(a[0:N]) copyout(b[0:N]) if(one == 1) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + +#if ACC_MEM_SHARED + exp = 44.0; +#else + exp = 43.0; +#endif + + for (i = 0; i < N; i++) + { + if (b[i] != exp) + abort(); + } + + for (i = 0; i < N; i++) + a[i] = 87.0; + +#pragma acc parallel if(one == 0) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + if (acc_on_device (acc_device_host)) + b[ii] = a[ii] + 1; + else + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 88.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 9.0; + } + +#if ACC_MEM_SHARED + exp = 0.0; + exp2 = 0.0; +#else + acc_map_data (a, d_a, N * sizeof (float)); + acc_map_data (b, d_b, N * sizeof (float)); + exp = 3.0; + exp2 = 9.0; +#endif + +#pragma acc update device(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + a[i] = 0.0; + b[i] = 0.0; + } + +#pragma acc update host(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + if (a[i] != exp) + abort(); + + if (b[i] != exp2) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 12.0; + } + +#pragma acc update device(a[0:N], b[0:N]) if(0) + + for (i = 0; i < N; i++) + { + a[i] = 0.0; + b[i] = 0.0; + } + +#pragma acc update host(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + if (a[i] != exp) + abort(); + + if (b[i] != exp2) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 26.0; + b[i] = 21.0; + } + +#pragma acc update device(a[0:N], b[0:N]) if(1) + + for (i = 0; i < N; i++) + { + a[i] = 0.0; + b[i] = 0.0; + } + +#pragma acc update host(a[0:N], b[0:N]) if(0) + + for (i = 0; i < N; i++) + { + if (a[i] != 0.0) + abort(); + + if (b[i] != 0.0) + abort(); + } + +#if !ACC_MEM_SHARED + acc_unmap_data (a); + acc_unmap_data (b); +#endif + + acc_free (d_a); + acc_free (d_b); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 0.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(1) +{ +#pragma acc parallel present(a[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + b[ii] = a[ii]; + } + } +} + + for (i = 0; i < N; i++) + { + if (b[i] != 4.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 8.0; + b[i] = 1.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) if(0) +{ +#if !ACC_MEM_SHARED + if (acc_is_present (a, N * sizeof (float))) + abort (); +#endif + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif +} + + for (i = 0; i < N; i++) + { + a[i] = 18.0; + b[i] = 21.0; + } + +#pragma acc data copyin(a[0:N]) if(1) +{ +#if !ACC_MEM_SHARED + if (!acc_is_present (a, N * sizeof (float))) + abort (); +#endif + +#pragma acc data copyout(b[0:N]) if(0) + { +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + +#pragma acc data copyout(b[0:N]) if(1) + { +#pragma acc parallel present(a[0:N]) present(b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + b[ii] = a[ii]; + } + } + } + +#if !ACC_MEM_SHARED + if (acc_is_present (b, N * sizeof (float))) + abort (); +#endif + } +} + + for (i = 0; i < N; i++) + { + if (b[i] != 18.0) + abort (); + } + +#ifdef XXX_TODO_ENTER_END_DATA +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/kernels-1.c b/libgomp/testsuite/libgomp.oacc-c/kernels-1.c index 8550662..3acfdf5 100644 --- a/libgomp/testsuite/libgomp.oacc-c/kernels-1.c +++ b/libgomp/testsuite/libgomp.oacc-c/kernels-1.c @@ -1,10 +1,10 @@ /* { dg-do run } */ -extern void abort (); +#include int i; -int main(void) +int main (void) { int j, v; @@ -83,8 +83,15 @@ int main(void) abort (); v = 1; } - if (v != 1 || i != -1 || j != -2) + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (i != 2 || j != 1) abort (); +#else + if (i != -1 || j != -2) + abort (); +#endif i = -1; j = -2; @@ -127,8 +134,15 @@ int main(void) abort (); v = 1; } - if (v != 1 || i != -1 || j != -2) + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (i != 2 || j != 1) abort (); +#else + if (i != -1 || j != -2) + abort (); +#endif #if 0 i = -1; diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-1.c b/libgomp/testsuite/libgomp.oacc-c/lib-1.c index 8ad1b19..17129d8 100644 --- a/libgomp/testsuite/libgomp.oacc-c/lib-1.c +++ b/libgomp/testsuite/libgomp.oacc-c/lib-1.c @@ -1,7 +1,24 @@ +/* { dg-do run } */ + #include int -main (void) +main (int argc, char **argv) { + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; + + if (acc_get_num_devices (devtype) == 0) + return 0; +#endif + + acc_init (devtype); + + acc_init (devtype); + return 0; } + +/* { dg-shouldfail "libgomp: device already active" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-10.c b/libgomp/testsuite/libgomp.oacc-c/lib-10.c new file mode 100644 index 0000000..cf1af8c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-10.c @@ -0,0 +1,58 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + void *d; + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; +#endif + + acc_init (devtype); + + d = acc_malloc (0); + if (d != NULL) + abort (); + + acc_free (0); + + acc_shutdown (devtype); + + acc_set_device_type (devtype); + + d = acc_malloc (0); + if (d != NULL) + abort (); + + acc_shutdown (devtype); + + acc_init (devtype); + + d = acc_malloc (1024); + if (d == NULL) + abort (); + + acc_free (d); + + acc_shutdown (devtype); + + acc_set_device_type (devtype); + + d = acc_malloc (1024); + if (d == NULL) + abort (); + + acc_free (d); + + acc_shutdown (devtype); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-11.c b/libgomp/testsuite/libgomp.oacc-c/lib-11.c new file mode 100644 index 0000000..b4583ae --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-11.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 512; + void *d; + + d = acc_malloc (N); + if (d == NULL) + abort (); + + acc_free ((void *)((uintptr_t) d + (uintptr_t) (N >> 1))); + + return 0; +} + +/* { dg-shouldfail "libgomp: mem free failed 1" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-12.c b/libgomp/testsuite/libgomp.oacc-c/lib-12.c new file mode 100644 index 0000000..b46f590 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-12.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + memset (h, 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-13.c b/libgomp/testsuite/libgomp.oacc-c/lib-13.c new file mode 100644 index 0000000..7098ef3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-13.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ + +#include +#include + +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + + if (acc_is_present (h, 1) != 1) + abort (); + + if (acc_is_present (h, N + 1) != 0) + abort (); + + if (acc_is_present (h + 1, N) != 0) + abort (); + + if (acc_is_present (h - 1, N) != 0) + abort (); + + if (acc_is_present (h - 1, N - 1) != 0) + abort (); + + if (acc_is_present (h + N, 0) != 0) + abort (); + + if (acc_is_present (h + N, N) != 0) + abort (); + + if (acc_is_present (0, N) != 0) + abort (); + + if (acc_is_present (h, 0) != 0) + abort (); + + acc_free (d); + + if (acc_is_present (h, 1) != 0) + abort (); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-14.c b/libgomp/testsuite/libgomp.oacc-c/lib-14.c new file mode 100644 index 0000000..a9632f7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-14.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ + +#include +#include + +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + + if (acc_is_present (h, 1) != 1) + abort (); + + if (acc_is_present (h + N - 1, 1) != 1) + abort (); + + if (acc_is_present (h - 1, 1) != 0) + abort (); + + if (acc_is_present (h + N, 1) != 0) + abort (); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 1) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, N - i) != 1) + abort (); + } + + acc_free (d); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, N - i) != 0) + abort (); + } + + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-15.c b/libgomp/testsuite/libgomp.oacc-c/lib-15.c new file mode 100644 index 0000000..4f6a731 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-15.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-16.c b/libgomp/testsuite/libgomp.oacc-c/lib-16.c new file mode 100644 index 0000000..9d277ac --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-16.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + (void) acc_copyin (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+256\] already mapped to \[\h+,\+256\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-17.c b/libgomp/testsuite/libgomp.oacc-c/lib-17.c new file mode 100644 index 0000000..5ff894c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-17.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, N); + + acc_copyout (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-18.c b/libgomp/testsuite/libgomp.oacc-c/lib-18.c new file mode 100644 index 0000000..2bc3263 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-18.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ + +#include +#include + +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + + acc_free (d); + + acc_copyout (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-19.c b/libgomp/testsuite/libgomp.oacc-c/lib-19.c new file mode 100644 index 0000000..3581616 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-19.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h[N]; + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + h[i] = (unsigned char *) malloc (N); + p = h[i]; + + for (j = 0; j < N; j++) + { + p[j] = i; + } + + (void) acc_copyin (p, N); + } + + for (i = 0; i < N; i++) + { + memset (h[i], 0, i); + } + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + acc_copyout (h[i], N); + + p = h[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + } + + for (i = 0; i < N; i++) + { + free (h[i]); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-2.c b/libgomp/testsuite/libgomp.oacc-c/lib-2.c new file mode 100644 index 0000000..9a4501f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-2.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; +#endif + + acc_init (devtype); + + acc_shutdown (devtype); + + acc_shutdown (devtype); + + return 0; +} + +/* { dg-shouldfail "libgomp: no device initialized" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-20.c b/libgomp/testsuite/libgomp.oacc-c/lib-20.c new file mode 100644 index 0000000..b379a8f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-20.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, N + 1); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surounds2 \[\h+,\+257\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-21.c b/libgomp/testsuite/libgomp.oacc-c/lib-21.c new file mode 100644 index 0000000..3a67400 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-21.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h, 0); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-22.c b/libgomp/testsuite/libgomp.oacc-c/lib-22.c new file mode 100644 index 0000000..2b86da8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-22.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + (void) acc_copyin (h, N); + + acc_copyout (h + 1, N - 1); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+255\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-23.c b/libgomp/testsuite/libgomp.oacc-c/lib-23.c new file mode 100644 index 0000000..38f236d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-23.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h1, *h2; + + h1 = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h1[i] = 0xab; + } + + (void) acc_copyin (h1, N); + + h2 = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h2[i] = 0xde; + } + + (void) acc_copyin (h2, N); + + acc_copyout (h1, N + N); + + free (h1); + free (h2); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+512\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-24.c b/libgomp/testsuite/libgomp.oacc-c/lib-24.c new file mode 100644 index 0000000..d7de8e3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-24.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 1) + abort (); + } + + acc_delete (h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + d = acc_create (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 1) + abort (); + } + + acc_delete (h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-25.c b/libgomp/testsuite/libgomp.oacc-c/lib-25.c new file mode 100644 index 0000000..1145828 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-25.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] already mapped to \[\h+,256\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-26.c b/libgomp/testsuite/libgomp.oacc-c/lib-26.c new file mode 100644 index 0000000..a23f56e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-26.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, 0); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-27.c b/libgomp/testsuite/libgomp.oacc-c/lib-27.c new file mode 100644 index 0000000..074fddb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-27.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (0, N); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\)\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-28.c b/libgomp/testsuite/libgomp.oacc-c/lib-28.c new file mode 100644 index 0000000..027f7cc --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-28.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (0, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-29.c b/libgomp/testsuite/libgomp.oacc-c/lib-29.c new file mode 100644 index 0000000..a66de0f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-29.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (h, 0); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-3.c b/libgomp/testsuite/libgomp.oacc-c/lib-3.c new file mode 100644 index 0000000..e823a41 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-3.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + acc_init (acc_device_host); + + acc_shutdown (acc_device_not_host); + + return 0; +} + +/* { dg-shouldfail "libgomp: device 4(4) is initialized" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-30.c b/libgomp/testsuite/libgomp.oacc-c/lib-30.c new file mode 100644 index 0000000..ce2bdb4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-30.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_create (h, N); + if (!d) + abort (); + + acc_delete (h, N - 2); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] surrounds2 \[\h+,\+254\]" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-31.c b/libgomp/testsuite/libgomp.oacc-c/lib-31.c new file mode 100644 index 0000000..25ce5a9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-31.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_present_or_create (h, N); + if (!d) + abort (); + + if (acc_is_present (h, 1) != 1) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-32.c b/libgomp/testsuite/libgomp.oacc-c/lib-32.c new file mode 100644 index 0000000..e3f87a8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-32.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + d1 = acc_present_or_create (h, N); + if (!d1) + abort (); + + d2 = acc_present_or_create (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + d2 = acc_pcreate (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-33.c b/libgomp/testsuite/libgomp.oacc-c/lib-33.c new file mode 100644 index 0000000..4abaa02 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-33.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + d1 = acc_present_or_create (h, N); + if (!d1) + abort (); + + d2 = acc_present_or_create (h, N - 2); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-34.c b/libgomp/testsuite/libgomp.oacc-c/lib-34.c new file mode 100644 index 0000000..32d5d51 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-34.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + d1 = acc_present_or_create (h, N); + if (!d1) + abort (); + + d2 = acc_present_or_create (h + 2, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+256\] not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-35.c b/libgomp/testsuite/libgomp.oacc-c/lib-35.c new file mode 100644 index 0000000..ca8edab --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-35.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_present_or_create (0, N); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-36.c b/libgomp/testsuite/libgomp.oacc-c/lib-36.c new file mode 100644 index 0000000..cb29397 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-36.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_present_or_create (h, 0); + if (!d) + abort (); + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-37.c b/libgomp/testsuite/libgomp.oacc-c/lib-37.c new file mode 100644 index 0000000..5a7d533 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-37.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_present_or_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-38.c b/libgomp/testsuite/libgomp.oacc-c/lib-38.c new file mode 100644 index 0000000..1e16a1d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-38.c @@ -0,0 +1,67 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d1, *d2; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d1 = acc_present_or_copyin (h, N); + if (!d1) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + d2 = acc_present_or_copyin (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + d2 = acc_pcopyin (h, N); + if (!d2) + abort (); + + if (d1 != d2) + abort (); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-39.c b/libgomp/testsuite/libgomp.oacc-c/lib-39.c new file mode 100644 index 0000000..db1e0b3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-39.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_present_or_copyin (0, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),+256\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-4.c b/libgomp/testsuite/libgomp.oacc-c/lib-4.c new file mode 100644 index 0000000..060275b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-4.c @@ -0,0 +1,13 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char **argv) +{ + acc_init ((acc_device_t) 99); + + return 0; +} + +/* { dg-shouldfail "libgomp: device 99 is out of range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-40.c b/libgomp/testsuite/libgomp.oacc-c/lib-40.c new file mode 100644 index 0000000..cb6c422 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-40.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ + +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_present_or_copyin (h, 0); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\] is a bad range" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-41.c b/libgomp/testsuite/libgomp.oacc-c/lib-41.c new file mode 100644 index 0000000..01c5f3c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-41.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (h, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-42.c b/libgomp/testsuite/libgomp.oacc-c/lib-42.c new file mode 100644 index 0000000..d577fe3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-42.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + acc_update_device (h, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-43.c b/libgomp/testsuite/libgomp.oacc-c/lib-43.c new file mode 100644 index 0000000..ceeb155 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-43.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (0, N); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-44.c b/libgomp/testsuite/libgomp.oacc-c/lib-44.c new file mode 100644 index 0000000..0cabb0d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-44.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (h, 0); + + acc_copyout (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != 0xab) + abort (); + } + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-45.c b/libgomp/testsuite/libgomp.oacc-c/lib-45.c new file mode 100644 index 0000000..f9a6294 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-45.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + for (i = 0; i < N; i++) + { + h[i] = 0xab; + } + + acc_update_device (h, N - 2); + + acc_copyout (h, N); + + for (i = 0; i < N - 2; i++) + { + if (h[i] != 0xab) + abort (); + } + + for (i = N - 2; i < N; i++) + { + if (h[i] != i) + abort (); + } + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-46.c b/libgomp/testsuite/libgomp.oacc-c/lib-46.c new file mode 100644 index 0000000..b195725 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-46.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (h, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-47.c b/libgomp/testsuite/libgomp.oacc-c/lib-47.c new file mode 100644 index 0000000..a7ff904 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-47.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (0, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\(nil\),256\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-48.c b/libgomp/testsuite/libgomp.oacc-c/lib-48.c new file mode 100644 index 0000000..01d3c6c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-48.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (h, 0); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,0\] is not mapped" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-49.c b/libgomp/testsuite/libgomp.oacc-c/lib-49.c new file mode 100644 index 0000000..a33324c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-49.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_copyin (h, N); + if (!d) + abort (); + + memset (&h[0], 0, N); + + acc_update_self (h, N - 2); + + for (i = 0; i < N - 2; i++) + { + if (h[i] != i) + abort (); + } + + for (i = N - 2; i < N; i++) + { + if (h[i] != 0) + abort (); + } + + acc_delete (h, N); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-5.c b/libgomp/testsuite/libgomp.oacc-c/lib-5.c new file mode 100644 index 0000000..961a62c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-5.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + if (acc_get_device_type () == acc_device_default) + abort (); + + acc_init (acc_device_default); + + if (acc_get_device_type () == acc_device_default) + abort (); + + acc_shutdown (acc_device_default); + + if (acc_get_num_devices (acc_device_nvidia) != 0) + { + acc_init (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + acc_shutdown (acc_device_nvidia); + + acc_init (acc_device_default); + + acc_set_device_type (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + acc_shutdown (acc_device_nvidia); + } + + return 0; + +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-50.c b/libgomp/testsuite/libgomp.oacc-c/lib-50.c new file mode 100644 index 0000000..e8294e1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-50.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + if (acc_is_present (h, N) != 1) + abort (); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-51.c b/libgomp/testsuite/libgomp.oacc-c/lib-51.c new file mode 100644 index 0000000..29d28f2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-51.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h[N]; + void *d[N]; + + for (i = 0; i < N; i++) + { + h[i] = (unsigned char *) malloc (N); + d[i] = acc_malloc (N); + + acc_map_data (h[i], d[i], N); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h[i], N) != 1) + abort (); + } + + for (i = 0; i < N; i++) + { + acc_unmap_data (h[i]); + + if (acc_is_present (h[i], N) != 0) + abort (); + + acc_free (d[i]); + free (h[i]); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-52.c b/libgomp/testsuite/libgomp.oacc-c/lib-52.c new file mode 100644 index 0000000..780db31 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-52.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (0, d, N); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[(nil),+256\]->\[\h+,\+256\] is a bad map" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-53.c b/libgomp/testsuite/libgomp.oacc-c/lib-53.c new file mode 100644 index 0000000..657adde --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-53.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, 0, N); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+256\]->\[(nil),\+256\] is a bad map" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-54.c b/libgomp/testsuite/libgomp.oacc-c/lib-54.c new file mode 100644 index 0000000..1f3df80 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-54.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, 0); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \[\h+,\+0\]->\[\h+,\+0\] is a bad map" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-55.c b/libgomp/testsuite/libgomp.oacc-c/lib-55.c new file mode 100644 index 0000000..286653f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-55.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + int i; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + for (i = 0; i < N; i++) + { + acc_map_data ((void *)((uintptr_t) h + (uintptr_t) i), + (void *)((uintptr_t) d + (uintptr_t) i), 1); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + 1, 1) != 1) + abort (); + } + + for (i = 0; i < N; i++) + { + acc_unmap_data (h + i); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + 1, 1) != 0) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-56.c b/libgomp/testsuite/libgomp.oacc-c/lib-56.c new file mode 100644 index 0000000..e3f5a80 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-56.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N >> 1); + + if (acc_is_present (h, 1) != 1) + abort (); + + if (acc_is_present (h + (N >> 1), 1) != 0) + abort (); + + acc_unmap_data (h); + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-57.c b/libgomp/testsuite/libgomp.oacc-c/lib-57.c new file mode 100644 index 0000000..f9043a4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-57.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + acc_unmap_data (d); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \h+ is not a mapped block" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-58.c b/libgomp/testsuite/libgomp.oacc-c/lib-58.c new file mode 100644 index 0000000..9d6e27d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-58.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + acc_unmap_data (0); + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: \(nil\) is not a mapped block" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-59.c b/libgomp/testsuite/libgomp.oacc-c/lib-59.c new file mode 100644 index 0000000..2f087ae --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-59.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + for (i = 0; i < N; i++) + { + if (acc_hostptr ((void *)((uintptr_t) d + (uintptr_t) i)) != + (void *)((uintptr_t) h + (uintptr_t) i)) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_deviceptr ((void *)((uintptr_t) h + (uintptr_t) i)) != + (void *)((uintptr_t) d + (uintptr_t) i)) + abort (); + } + + acc_unmap_data (h); + + for (i = 0; i < N; i++) + { + if (acc_hostptr ((void *)((uintptr_t) d + (uintptr_t) i)) != 0) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_deviceptr (h + i) != 0) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-6.c b/libgomp/testsuite/libgomp.oacc-c/lib-6.c new file mode 100644 index 0000000..afdd480 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-6.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + int devnum; + + if (acc_get_device_type () == acc_device_default) + abort (); + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_set_device_type (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + acc_shutdown (acc_device_nvidia); + + acc_set_device_type (acc_device_nvidia); + + if (acc_get_device_type () != acc_device_nvidia) + abort (); + + devnum = acc_get_num_devices (acc_device_host); + if (devnum != 1) + abort (); + + acc_shutdown (acc_device_nvidia); + + if (acc_get_device_type () == acc_device_default) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-60.c b/libgomp/testsuite/libgomp.oacc-c/lib-60.c new file mode 100644 index 0000000..ccae728 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-60.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + for (i = 0; i < N; i++) + { + if (acc_is_present (h + i, 1) != 0) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-61.c b/libgomp/testsuite/libgomp.oacc-c/lib-61.c new file mode 100644 index 0000000..ce66ced --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-61.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h[N]; + void *d[N]; + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + h[i] = (unsigned char *) malloc (N); + + p = h[i]; + + for (j = 0; j < N; j++) + { + p[j] = i; + } + + d[i] = acc_malloc (N); + + acc_memcpy_to_device (d[i], h[i], N); + + for (j = 0; j < N; j++) + { + if (acc_is_present (h[i] + j, 1) != 0) + abort (); + } + } + + for (i = 0; i < N; i++) + { + int j; + unsigned char *p; + + memset (h[i], 0, N); + + acc_memcpy_from_device (h[i], d[i], N); + + p = h[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + + for (j = 0; j < N; j++) + { + if (acc_is_present (h[i] + j, 1) != 0) + abort (); + } + + acc_free (d[i]); + + free (h[i]); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-62.c b/libgomp/testsuite/libgomp.oacc-c/lib-62.c new file mode 100644 index 0000000..e6178e2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-62.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + acc_init (acc_device_nvidia); + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_to_device (d, h, N << 1); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid size" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-63.c b/libgomp/testsuite/libgomp.oacc-c/lib-63.c new file mode 100644 index 0000000..ca237ec --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-63.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (0, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid device address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-64.c b/libgomp/testsuite/libgomp.oacc-c/lib-64.c new file mode 100644 index 0000000..850fd2e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-64.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, 0, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid host address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-65.c b/libgomp/testsuite/libgomp.oacc-c/lib-65.c new file mode 100644 index 0000000..26c8cef --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-65.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, d, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid host or device address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-66.c b/libgomp/testsuite/libgomp.oacc-c/lib-66.c new file mode 100644 index 0000000..360c05b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-66.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + acc_init (acc_device_nvidia); + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_to_device (d, h, 0); + + acc_memcpy_from_device (h, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + acc_shutdown (acc_device_nvidia); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-67.c b/libgomp/testsuite/libgomp.oacc-c/lib-67.c new file mode 100644 index 0000000..01b8b2d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-67.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (0, d, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid host address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-68.c b/libgomp/testsuite/libgomp.oacc-c/lib-68.c new file mode 100644 index 0000000..3ff5bd7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-68.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 256; + int i; + unsigned char *h; + void *d; + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_memcpy_to_device (d, h, N); + + memset (&h[0], 0, N); + + acc_memcpy_from_device (h, 0, N); + + for (i = 0; i < N; i++) + { + if (h[i] != i) + abort (); + } + + acc_free (d); + + free (h); + + return 0; +} + +/* { dg-shouldfail "libgomp: invalid device address" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-69.c b/libgomp/testsuite/libgomp.oacc-c/lib-69.c new file mode 100644 index 0000000..5462f12 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-69.c @@ -0,0 +1,124 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test (0) != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep (1); + + if (acc_async_test (0) != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-7.c b/libgomp/testsuite/libgomp.oacc-c/lib-7.c new file mode 100644 index 0000000..e78734b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-7.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + if (acc_get_num_devices (acc_device_none) != 0) + abort (); + + if (acc_get_num_devices (acc_device_host) == 0) + abort (); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-70.c b/libgomp/testsuite/libgomp.oacc-c/lib-70.c new file mode 100644 index 0000000..912b266 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-70.c @@ -0,0 +1,136 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + const int N = 10; + int i; + CUstream streams[N]; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test (i) != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + } + + sleep ((int) (dtime / 1000.0f) + 1); + + for (i = 0; i < N; i++) + { + if (acc_async_test (i) != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-71.c b/libgomp/testsuite/libgomp.oacc-c/lib-71.c new file mode 100644 index 0000000..a045379 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-71.c @@ -0,0 +1,119 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (0, stream); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test (1) != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep ((int) (dtime / 1000.0f) + 1); + + if (acc_async_test (1) != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: unknown async \d" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-72.c b/libgomp/testsuite/libgomp.oacc-c/lib-72.c new file mode 100644 index 0000000..e383ba0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-72.c @@ -0,0 +1,121 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + if (acc_async_test_all () != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep ((int) (dtime / 1000.f) + 1); + + if (acc_async_test_all () != 1) + { + fprintf (stderr, "found asynchronous operation still running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-73.c b/libgomp/testsuite/libgomp.oacc-c/lib-73.c new file mode 100644 index 0000000..43a8b7e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-73.c @@ -0,0 +1,134 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + const int N = 10; + int i; + CUstream streams[N]; + unsigned long *a, *d_a, dticks; + int nbytes; + float dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + } + + if (acc_async_test_all () != 0) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + sleep ((int) (dtime / 1000.0f) + 1); + + if (acc_async_test_all () != 1) + { + fprintf (stderr, "asynchronous operation not running\n"); + abort (); + } + + acc_unmap_data (a); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-74.c b/libgomp/testsuite/libgomp.oacc-c/lib-74.c new file mode 100644 index 0000000..0726ee4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-74.c @@ -0,0 +1,139 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + start_timer (0); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (0); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + acc_wait (0); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-75.c b/libgomp/testsuite/libgomp.oacc-c/lib-75.c new file mode 100644 index 0000000..1942211 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-75.c @@ -0,0 +1,141 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + int N; + int i; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime, hitime, lotime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (0); + } + + atime = stop_timer (0); + + hitime = dtime * N; + hitime += hitime * 0.02; + + lotime = dtime * N; + lotime -= lotime * 0.02; + + if (atime > hitime || atime < lotime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-76.c b/libgomp/testsuite/libgomp.oacc-c/lib-76.c new file mode 100644 index 0000000..11d9d62 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-76.c @@ -0,0 +1,147 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + int N; + int i; + CUstream *streams; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime, hitime, lotime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + streams = (CUstream *) malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (i); + } + + atime = stop_timer (0); + + hitime = dtime * N; + hitime += hitime * 0.02; + + lotime = dtime * N; + lotime -= lotime * 0.02; + + if (atime > hitime || atime < lotime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (streams); + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-77.c b/libgomp/testsuite/libgomp.oacc-c/lib-77.c new file mode 100644 index 0000000..e47212b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-77.c @@ -0,0 +1,135 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (0, stream); + + init_timers (1); + + start_timer (0); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait (1); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + acc_wait (1); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: unknown async \d" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-78.c b/libgomp/testsuite/libgomp.oacc-c/lib-78.c new file mode 100644 index 0000000..4f58fb2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-78.c @@ -0,0 +1,140 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + start_timer (0); + + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + acc_wait_all (); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + acc_wait_all (); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-79.c b/libgomp/testsuite/libgomp.oacc-c/lib-79.c new file mode 100644 index 0000000..ef3df13 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-79.c @@ -0,0 +1,167 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + int N; + int i; + CUstream stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime, hitime, lotime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + devnum = 2; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (1, stream)) + abort (); + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + } + + acc_wait_async (0, 1); + + if (acc_async_test (0) != 0) + abort (); + + if (acc_async_test (1) != 0) + abort (); + + acc_wait (1); + + atime = stop_timer (0); + + if (acc_async_test (0) != 1) + abort (); + + if (acc_async_test (1) != 1) + abort (); + + hitime = dtime * N; + hitime += hitime * 0.02; + + lotime = dtime * N; + lotime -= lotime * 0.02; + + if (atime > hitime || atime < lotime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-80.c b/libgomp/testsuite/libgomp.oacc-c/lib-80.c new file mode 100644 index 0000000..0b5ec24 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-80.c @@ -0,0 +1,132 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + CUstream stream; + int N; + int i; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 200.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + acc_set_cuda_stream (1, stream); + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + } + + acc_wait_async (1, 1); + + acc_wait (1); + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-shouldfail "libgomp: identical parameters" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-81.c b/libgomp/testsuite/libgomp.oacc-c/lib-81.c new file mode 100644 index 0000000..d5f18f0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-81.c @@ -0,0 +1,211 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay; + CUmodule module; + CUresult r; + int N; + int i; + CUstream *streams, stream; + unsigned long *a, *d_a, dticks; + int nbytes; + float atime, dtime; + void *kargs[2]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay, module, "delay"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = nprocs * sizeof (unsigned long); + + dtime = 500.0; + + dticks = (unsigned long) (dtime * clkrate); + + N = nprocs; + + a = (unsigned long *) malloc (nbytes); + d_a = (unsigned long *) acc_malloc (nbytes); + + acc_map_data (a, d_a, nbytes); + + streams = (CUstream *) malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + init_timers (1); + + kargs[0] = (void *) &d_a; + kargs[1] = (void *) &dticks; + + stream = (CUstream) acc_get_cuda_stream (N); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (N, stream)) + abort (); + + start_timer (0); + + for (i = 0; i < N; i++) + { + r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + } + + acc_wait_all_async (N); + + for (i = 0; i <= N; i++) + { + if (acc_async_test (i) != 0) + abort (); + } + + acc_wait (N); + + for (i = 0; i <= N; i++) + { + if (acc_async_test (i) != 1) + abort (); + } + + atime = stop_timer (0); + + if (atime < dtime) + { + fprintf (stderr, "actual time < delay time\n"); + abort (); + } + + start_timer (0); + + stream = (CUstream) acc_get_cuda_stream (N + 1); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (N + 1, stream)) + abort (); + + acc_wait_all_async (N + 1); + + acc_wait (N + 1); + + atime = stop_timer (0); + + if (0.10 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + start_timer (0); + + acc_wait_all_async (N); + + acc_wait (N); + + atime = stop_timer (0); + + if (0.10 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + acc_unmap_data (a); + + fini_timers (); + + free (streams); + free (a); + acc_free (d_a); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-82.c b/libgomp/testsuite/libgomp.oacc-c/lib-82.c new file mode 100644 index 0000000..be30a7f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-82.c @@ -0,0 +1,144 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + CUdevice dev; + CUfunction delay2; + CUmodule module; + CUresult r; + int N; + int i; + CUstream *streams; + unsigned long **a, **d_a, *tid, ticks; + int nbytes; + void *kargs[3]; + int clkrate; + int devnum, nprocs; + + acc_init (acc_device_nvidia); + + devnum = acc_get_device_num (acc_device_nvidia); + + r = cuDeviceGet (&dev, devnum); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGet failed: %d\n", r); + abort (); + } + + r = + cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); + abort (); + } + + r = cuModuleLoad (&module, "subr.ptx"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleLoad failed: %d\n", r); + abort (); + } + + r = cuModuleGetFunction (&delay2, module, "delay2"); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); + abort (); + } + + nbytes = sizeof (int); + + ticks = (unsigned long) (200.0 * clkrate); + + N = nprocs; + + streams = (CUstream *) malloc (N * sizeof (void *)); + + a = (unsigned long **) malloc (N * sizeof (unsigned long *)); + d_a = (unsigned long **) malloc (N * sizeof (unsigned long *)); + tid = (unsigned long *) malloc (N * sizeof (unsigned long)); + + for (i = 0; i < N; i++) + { + a[i] = (unsigned long *) malloc (sizeof (unsigned long)); + *a[i] = N; + d_a[i] = (unsigned long *) acc_malloc (nbytes); + tid[i] = i; + + acc_map_data (a[i], d_a[i], nbytes); + + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + kargs[0] = (void *) &d_a[i]; + kargs[1] = (void *) &ticks; + kargs[2] = (void *) &tid[i]; + + r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuLaunchKernel failed: %d\n", r); + abort (); + } + + ticks = (unsigned long) (50.0 * clkrate); + } + + acc_wait_all_async (0); + + for (i = 0; i < N; i++) + { + acc_copyout (a[i], nbytes); + if (*a[i] != i) + abort (); + } + + free (streams); + + for (i = 0; i < N; i++) + { + free (a[i]); + } + + free (a); + free (d_a); + free (tid); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-83.c b/libgomp/testsuite/libgomp.oacc-c/lib-83.c new file mode 100644 index 0000000..1c2e52b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-83.c @@ -0,0 +1,58 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include "timer.h" + +int +main (int argc, char **argv) +{ + float atime; + CUstream stream; + CUresult r; + + acc_init (acc_device_nvidia); + + (void) acc_get_device_num (acc_device_nvidia); + + init_timers (1); + + stream = (CUstream) acc_get_cuda_stream (0); + if (stream != NULL) + abort (); + + r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, stream)) + abort (); + + start_timer (0); + + acc_wait_all_async (0); + + acc_wait (0); + + atime = stop_timer (0); + + if (0.010 < atime) + { + fprintf (stderr, "actual time too long\n"); + abort (); + } + + fini_timers (); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-84.c b/libgomp/testsuite/libgomp.oacc-c/lib-84.c new file mode 100644 index 0000000..786b908 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-84.c @@ -0,0 +1,66 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 100; + int i; + CUstream *streams; + CUstream s; + CUresult r; + + acc_init (acc_device_nvidia); + + (void) acc_get_device_num (acc_device_nvidia); + + streams = (CUstream *) malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + for (i = 0; i < N; i++) + { + int j; + int cnt; + + cnt = 0; + + s = streams[i]; + + for (j = 0; j < N; j++) + { + if (s == streams[j]) + cnt++; + } + + if (cnt != 1) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-85.c b/libgomp/testsuite/libgomp.oacc-c/lib-85.c new file mode 100644 index 0000000..cf925a7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-85.c @@ -0,0 +1,52 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 100; + int i; + CUstream *streams; + CUstream s; + CUresult r; + + acc_init (acc_device_nvidia); + + (void) acc_get_device_num (acc_device_nvidia); + + streams = (CUstream *) malloc (N * sizeof (void *)); + + for (i = 0; i < N; i++) + { + streams[i] = (CUstream) acc_get_cuda_stream (i); + if (streams[i] != NULL) + abort (); + + r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (i, streams[i])) + abort (); + } + + s = NULL; + + if (acc_set_cuda_stream (N + 1, s) != 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + exit (0); +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-86.c b/libgomp/testsuite/libgomp.oacc-c/lib-86.c new file mode 100644 index 0000000..b8a8ee9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-86.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_device () != 0) + abort (); + + acc_init (acc_device_host); + + if (acc_get_current_cuda_device () != 0) + abort (); + + acc_shutdown (acc_device_host); + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_device () != 0) + abort (); + + acc_init (acc_device_nvidia); + + if (acc_get_current_cuda_device () == 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + if (acc_get_current_cuda_device () != 0) + abort (); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-87.c b/libgomp/testsuite/libgomp.oacc-c/lib-87.c new file mode 100644 index 0000000..147d443 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-87.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ + +#include +#include +#include + +int +main (int argc, char **argv) +{ + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_context () != 0) + abort (); + + acc_init (acc_device_host); + + if (acc_get_current_cuda_context () != 0) + abort (); + + acc_shutdown (acc_device_host); + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + if (acc_get_current_cuda_context () != 0) + abort (); + + acc_init (acc_device_nvidia); + + if (acc_get_current_cuda_context () == 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + if (acc_get_current_cuda_context () != 0) + abort (); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-88.c b/libgomp/testsuite/libgomp.oacc-c/lib-88.c new file mode 100644 index 0000000..10f4ad8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-88.c @@ -0,0 +1,111 @@ +/* { dg-do run } */ + +#include +#include +#include +#include +#include +#include + +unsigned char *x; +void *d_x; +const int N = 256; + +static void * +test (void *arg) +{ + int i; + + if (acc_get_current_cuda_context () != NULL) + abort (); + + if (acc_is_present (x, N) != 1) + abort (); + + memset (x, 0, N); + + acc_copyout (x, N); + + for (i = 0; i < N; i++) + { + if (x[i] != i) + abort (); + + x[i] = N - i - 1; + } + + d_x = acc_copyin (x, N); + + return 0; +} + +int +main (int argc, char **argv) +{ + const int nthreads = 1; + int i; + pthread_attr_t attr; + pthread_t *tid; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_init (acc_device_nvidia); + + x = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + x[i] = i; + } + + d_x = acc_copyin (x, N); + + if (acc_is_present (x, N) != 1) + abort (); + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (nthreads * sizeof (pthread_t)); + + for (i = 0; i < nthreads; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < nthreads; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + if (acc_is_present (x, N) != 1) + abort (); + + memset (x, 0, N); + + acc_copyout (x, N); + + for (i = 0; i < N; i++) + { + if (x[i] != N - i - 1) + abort (); + } + + if (acc_is_present (x, N) != 0) + abort (); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-89.c b/libgomp/testsuite/libgomp.oacc-c/lib-89.c new file mode 100644 index 0000000..061c409 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-89.c @@ -0,0 +1,118 @@ +/* { dg-do run } */ + +#include +#include +#include +#include +#include +#include +#include + +unsigned char **x; +void **d_x; +const int N = 16; +const int NTHREADS = 32; + +static void * +test (void *arg) +{ + int i; + int tid; + unsigned char *p; + int devnum; + + tid = (int) (long) arg; + + devnum = acc_get_device_num (acc_device_nvidia); + acc_set_device_num (devnum, acc_device_nvidia); + + if (acc_get_current_cuda_context () == NULL) + abort (); + + p = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + p[i] = tid; + } + + x[tid] = p; + + d_x[tid] = acc_copyin (p, N); + + return 0; +} + +int +main (int argc, char **argv) +{ + int i; + pthread_attr_t attr; + pthread_t *tid; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_init (acc_device_nvidia); + + x = (unsigned char **) malloc (NTHREADS * N); + d_x = (void **) malloc (NTHREADS * N); + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t)); + + for (i = 0; i < NTHREADS; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < NTHREADS; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + for (i = 0; i < NTHREADS; i++) + { + if (acc_is_present (x[i], N) != 1) + abort (); + } + + for (i = 0; i < NTHREADS; i++) + { + memset (x[i], 0, N); + acc_copyout (x[i], N); + } + + for (i = 0; i < NTHREADS; i++) + { + unsigned char *p; + int j; + + p = x[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + + if (acc_is_present (x[i], N) != 0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-9.c b/libgomp/testsuite/libgomp.oacc-c/lib-9.c new file mode 100644 index 0000000..a4cf7f2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-9.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ + +#include +#include + +int +main (int argc, char **argv) +{ + int i; + int num_devices; + int devnum; + acc_device_t devtype = acc_device_host; + +#if ACC_DEVICE_TYPE_nvidia + devtype = acc_device_nvidia; +#endif + + num_devices = acc_get_num_devices (devtype); + if (num_devices == 0) + return 0; + + acc_init (devtype); + + for (i = 0; i < num_devices; i++) + { + acc_set_device_num (i, devtype); + devnum = acc_get_device_num (devtype); + if (devnum != i) + abort (); + } + + acc_shutdown (devtype); + + num_devices = acc_get_num_devices (devtype); + if (num_devices == 0) + abort (); + + for (i = 0; i < num_devices; i++) + { + acc_set_device_num (i, devtype); + devnum = acc_get_device_num (devtype); + if (devnum != i) + abort (); + } + + acc_shutdown (devtype); + + acc_init (devtype); + + acc_set_device_num (0, devtype); + + devnum = acc_get_device_num (devtype); + if (devnum != 0) + abort (); + + if (num_devices > 1) + { + acc_set_device_num (1, (acc_device_t) 0); + + devnum = acc_get_device_num (devtype); + if (devnum != 1) + abort (); + } + + acc_shutdown (devtype); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-90.c b/libgomp/testsuite/libgomp.oacc-c/lib-90.c new file mode 100644 index 0000000..d17755b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-90.c @@ -0,0 +1,137 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned char **x; +void **d_x; +const int N = 16; +const int NTHREADS = 32; + +static void * +test (void *arg) +{ + int i; + int tid; + unsigned char *p; + int devnum; + + tid = (int) (long) arg; + + devnum = acc_get_device_num (acc_device_nvidia); + acc_set_device_num (devnum, acc_device_nvidia); + + if (acc_get_current_cuda_context () == NULL) + abort (); + + p = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + p[i] = tid; + } + + x[tid] = p; + + d_x[tid] = acc_copyin (p, N); + + acc_wait_all (); + + return 0; +} + +int +main (int argc, char **argv) +{ + int i; + pthread_attr_t attr; + pthread_t *tid; + CUresult r; + CUstream s; + + acc_init (acc_device_nvidia); + + x = (unsigned char **) malloc (NTHREADS * N); + d_x = (void **) malloc (NTHREADS * N); + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t)); + + r = cuStreamCreate (&s, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, s)) + abort (); + + for (i = 0; i < NTHREADS; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < NTHREADS; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + + for (i = 0; i < NTHREADS; i++) + { + if (acc_is_present (x[i], N) != 1) + abort (); + } + + acc_get_cuda_stream (1); + + for (i = 0; i < NTHREADS; i++) + { + memset (x[i], 0, N); + acc_copyout (x[i], N); + } + + acc_wait_all (); + + for (i = 0; i < NTHREADS; i++) + { + unsigned char *p; + int j; + + p = x[i]; + + for (j = 0; j < N; j++) + { + if (p[j] != i) + abort (); + } + + if (acc_is_present (x[i], N) != 0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-91.c b/libgomp/testsuite/libgomp.oacc-c/lib-91.c new file mode 100644 index 0000000..e00ef4f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-91.c @@ -0,0 +1,84 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda" } */ + +#include +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + const int N = 1024 * 1024; + int i; + unsigned char *h; + void *d; + float async, sync; + struct timeval start, stop; + CUresult r; + CUstream s; + + acc_init (acc_device_nvidia); + + h = (unsigned char *) malloc (N); + + for (i = 0; i < N; i++) + { + h[i] = i; + } + + d = acc_malloc (N); + + acc_map_data (h, d, N); + + gettimeofday (&start, NULL); + + for (i = 0; i < 100; i++) + { +#pragma acc update device(h[0:N]) + } + + gettimeofday (&stop, NULL); + + sync = (float) (stop.tv_sec - start.tv_sec); + sync += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0); + + gettimeofday (&start, NULL); + + r = cuStreamCreate (&s, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + if (!acc_set_cuda_stream (0, s)) + abort (); + + for (i = 0; i < 100; i++) + { +#pragma acc update device(h[0:N]) async(0) + } + + acc_wait_all (); + + gettimeofday (&stop, NULL); + + async = (float) (stop.tv_sec - start.tv_sec); + async += (float) ((stop.tv_usec - start.tv_usec) / 1000000.0); + + if (async > (sync * 1.5)) + abort (); + + acc_free (d); + + free (h); + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/lib-92.c b/libgomp/testsuite/libgomp.oacc-c/lib-92.c new file mode 100644 index 0000000..18193e0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/lib-92.c @@ -0,0 +1,112 @@ +/* { dg-do run } */ + +#include +#include +#include +#include +#include +#include + +unsigned char **x; +void **d_x; +const int N = 32; +const int NTHREADS = 32; + +static void * +test (void *arg) +{ + int i; + int tid; + unsigned char *p; + int devnum; + + tid = (int) (long) arg; + + devnum = acc_get_device_num (acc_device_nvidia); + acc_set_device_num (devnum, acc_device_nvidia); + + if (acc_get_current_cuda_context () == NULL) + abort (); + + acc_copyout (x[tid], N); + + p = x[tid]; + + for (i = 0; i < N; i++) + { + if (p[i] != i) + abort (); + } + + return 0; +} + +int +main (int argc, char **argv) +{ + int i; + pthread_attr_t attr; + pthread_t *tid; + unsigned char *p; + + if (acc_get_num_devices (acc_device_nvidia) == 0) + return 0; + + acc_init (acc_device_nvidia); + + x = (unsigned char **) malloc (NTHREADS * N); + d_x = (void **) malloc (NTHREADS * N); + + for (i = 0; i < N; i++) + { + int j; + + p = (unsigned char *) malloc (N); + + x[i] = p; + + for (j = 0; j < N; j++) + { + p[j] = j; + } + + d_x[i] = acc_copyin (p, N); + } + + if (pthread_attr_init (&attr) != 0) + perror ("pthread_attr_init failed"); + + tid = (pthread_t *) malloc (NTHREADS * sizeof (pthread_t)); + + acc_get_cuda_stream (1); + + for (i = 0; i < NTHREADS; i++) + { + if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i)) + != 0) + perror ("pthread_create failed"); + } + + if (pthread_attr_destroy (&attr) != 0) + perror ("pthread_attr_destroy failed"); + + for (i = 0; i < NTHREADS; i++) + { + void *res; + + if (pthread_join (tid[i], &res) != 0) + perror ("pthread join failed"); + } + + for (i = 0; i < NTHREADS; i++) + { + if (acc_is_present (x[i], N) != 0) + abort (); + } + + acc_shutdown (acc_device_nvidia); + + return 0; +} + +/* { dg-output "" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-1.c b/libgomp/testsuite/libgomp.oacc-c/nested-1.c new file mode 100644 index 0000000..ededf2b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/nested-1.c @@ -0,0 +1,680 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + +#pragma acc data copyin (a[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc data copyin (a[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 5.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc data present_or_copyin (a[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc data copyin (a[0:N]) present_or_copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 2.0; + } + + d = (float *) acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc data copyin (a[0:N]) present_or_copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 4.0; + } + +#pragma acc data copy (a[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = a[ii] + 2; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 7.0; + } + +#pragma acc data present_or_copy (a[0:N]) present_or_copy (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 9.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + d = (float *) acc_copyin (&b[0], N * sizeof (float)); + +#pragma acc data present_or_copy (a[0:N]) present_or_copy (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = a[ii] + 1; + b[ii] = b[ii] + 2; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 7.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + d = (float *) acc_deviceptr (&a[0]); + acc_unmap_data (&a[0]); + acc_free (d); + + d = (float *) acc_deviceptr (&b[0]); + acc_unmap_data (&b[0]); + acc_free (d); + + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 7.0; + } + +#pragma acc data copyin (a[0:N]) create (c[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + +#pragma acc data copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc data copyin (a[0:N]) present_or_create (c[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 2.0) + abort (); + + if (b[i] != 2.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + d = (float *) acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 8.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc data copyin (a[0:N]) present (c[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + acc_unmap_data (c); + + if (acc_is_present (c, (N * sizeof (float)))) + abort (); + + acc_free (d); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (b, d, N * sizeof (float)); + + if (!acc_is_present (b, (N * sizeof (float)))) + abort (); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (a, d, N * sizeof (float)); + + if (!acc_is_present (a, (N * sizeof (float)))) + abort (); + +#pragma acc data present (a[0:N]) present (c[0:N]) present (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + a[ii] = 1.0; + c[ii] = 2.0; + b[ii] = 4.0; + } + } + } + + if (!acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (b, (N * sizeof (float)))) + abort (); + + if (!acc_is_present (c, (N * sizeof (float)))) + abort (); + + acc_copyout (b, N * sizeof (float)); + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + d = (float *) acc_deviceptr (a); + + acc_unmap_data (a); + + acc_free (d); + + d = (float *) acc_deviceptr (c); + + acc_unmap_data (c); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 6.0; + } + + d = (float *) acc_malloc (N * sizeof (float)); + +#pragma acc parallel copyin (a[0:N]) deviceptr (d) copyout (b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + d[ii] = a[ii]; + b[ii] = d[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort (); + + if (acc_is_present (b, (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + + d = (float *) acc_copyin (&a[0], N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc data pcopyin (a[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + acc_free (d); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc data copyin (a[0:N]) pcopyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + } + + for (i = 0; i < N; i++) + { + if (b[i] != 6.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 7.0; + } + +#pragma acc data copyin (a[0:N]) pcreate (c[0:N]) copyout (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + if (acc_is_present (&c[0], (N * sizeof (float)))) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/nested-2.c b/libgomp/testsuite/libgomp.oacc-c/nested-2.c new file mode 100644 index 0000000..0579185 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/nested-2.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ + +#include + +int +main (int argc, char *argv[]) +{ +#define N 10 + char a[N]; + + { + int i; + for (i = 0; i < N; ++i) + a[i] = 0; + } + +#pragma acc data copyout (a) + { +#pragma acc parallel /* will result in a "dummy frame" */ present (a) + { + int i; + for (i = 0; i < N; ++i) + a[i] = i; + } + } + + { + int i; + for (i = 0; i < N; ++i) + if (a[i] != i) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/offset-1.c b/libgomp/testsuite/libgomp.oacc-c/offset-1.c new file mode 100644 index 0000000..0bae23a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/offset-1.c @@ -0,0 +1,97 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main(int argc, char **argv) +{ + int N = 8; + float *a, *b; + int i; + + a = (float *) malloc(N * sizeof (float)); + b = (float *) malloc(N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 2.0; + b[i] = 5.0; + } + +#pragma acc parallel copyin(a[2:4]) copyout(b[2:4]) + { + b[2] = a[2]; + b[3] = a[3]; + } + + for (i = 2; i < 4; i++) + { + if (a[i] != 2.0) + abort(); + + if (b[i] != 2.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 1.0; + } + +#pragma acc parallel copyin(a[0:4]) copyout(b[0:4]) + { + b[0] = a[0]; + b[1] = a[1]; + b[2] = a[2]; + b[3] = a[3]; + } + + for (i = 0; i < 4; i++) + { + if (a[i] != 3.0) + abort(); + + if (b[i] != 3.0) + abort(); + } + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + b[i] = 6.0; + } + +#pragma acc parallel copyin(a[0:4]) copyout(b[4:4]) + { + b[4] = a[0]; + b[5] = a[1]; + b[6] = a[2]; + b[7] = a[3]; + } + + for (i = 0; i < 4; i++) + { + if (a[i] != 9.0) + abort(); + } + + for (i = 4; i < 8; i++) + { + if (b[i] != 9.0) + abort(); + } + + if (acc_is_present (a, (N * sizeof (float)))) + abort(); + + if (acc_is_present (b, (N * sizeof (float)))) + abort(); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/parallel-1.c b/libgomp/testsuite/libgomp.oacc-c/parallel-1.c index 68f7de5..fd9df33 100644 --- a/libgomp/testsuite/libgomp.oacc-c/parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c/parallel-1.c @@ -1,6 +1,6 @@ /* { dg-do run } */ -extern void abort (); +#include int i; @@ -8,7 +8,6 @@ int main(void) { int j, v; -#if 0 i = -1; j = -2; v = 0; @@ -22,8 +21,13 @@ int main(void) abort (); v = 1; } +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else if (v != 1 || i != -1 || j != -2) abort (); +#endif i = -1; j = -2; @@ -66,6 +70,10 @@ int main(void) abort (); v = 1; } +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else if (v != 1 || i != -1 || j != -2) abort (); #endif @@ -83,8 +91,15 @@ int main(void) abort (); v = 1; } + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else if (v != 1 || i != -1 || j != -2) abort (); +#endif i = -1; j = -2; @@ -127,43 +142,64 @@ int main(void) abort (); v = 1; } + if (v != 1) + abort (); +#if ACC_MEM_SHARED + if (v != 1 || i != 2 || j != 1) + abort (); +#else if (v != 1 || i != -1 || j != -2) abort (); +#endif -#if 0 i = -1; j = -2; v = 0; -#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j) + +#pragma acc data copyin (i, j) { - if (i != -1 || j != -2) - abort (); - i = 2; - j = 1; - if (i != 2 || j != 1) - abort (); - v = 1; +#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } } +#if ACC_MEM_SHARED if (v != 1 || i != 2 || j != 1) abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); #endif -#if 0 i = -1; j = -2; v = 0; -#pragma acc parallel /* copyout */ present_or_copyout (v) + +#pragma acc data copyin(i, j) { - if (i != -1 || j != -2) - abort (); - i = 2; - j = 1; - if (i != 2 || j != 1) - abort (); - v = 1; +#pragma acc parallel /* copyout */ present_or_copyout (v) + { + if (i != -1 || j != -2) + abort (); + i = 2; + j = 1; + if (i != 2 || j != 1) + abort (); + v = 1; + } } +#if ACC_MEM_SHARED if (v != 1 || i != 2 || j != 1) abort (); +#else + if (v != 1 || i != -1 || j != -2) + abort (); #endif return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c b/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c new file mode 100644 index 0000000..f7d5b9b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/pointer-align-1.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ + +/* PR middle-end/63247 */ + +#include + +int +main(int argc, char **argv) +{ +#define N 4 + short a[N]; + + a[0] = 10; + a[1] = 10; + a[2] = 10; + a[3] = 10; + +#pragma acc parallel copy(a[1:N-1]) + { + a[1] = 51; + a[2] = 52; + a[3] = 53; + } + + if (a[0] != 10) + abort (); + if (a[1] != 51) + abort (); + if (a[2] != 52) + abort (); + if (a[3] != 53) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/present-1.c b/libgomp/testsuite/libgomp.oacc-c/present-1.c new file mode 100644 index 0000000..f331f1f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/present-1.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c, *d; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + d = (float *) acc_malloc (N * sizeof (float)); + acc_map_data (c, d, N * sizeof (float)); + +#pragma acc data present (a[0:N]) present (c[0:N]) present (b[0:N]) + { +#pragma acc parallel + { + int ii; + + for (ii = 0; ii < N; ii++) + { + c[ii] = a[ii]; + b[ii] = c[ii]; + } + } + } + + d = (float *) acc_deviceptr (c); + acc_unmap_data (c); + acc_free (d); + + free (a); + free (b); + free (c); + + return 0; +} +/* { dg-shouldfail "libgomp: present clause: !acc_is_present" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c/present-2.c b/libgomp/testsuite/libgomp.oacc-c/present-2.c new file mode 100644 index 0000000..41efa70 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/present-2.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 4.0; + b[i] = 0.0; + } + +#pragma acc data copyin(a[0:N]) copyout(b[0:N]) + { + +#pragma acc parallel present(a[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + { + b[ii] = a[ii]; + } + } + + } + + for (i = 0; i < N; i++) + { + if (a[i] != 4.0) + abort (); + + if (b[i] != 4.0) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/subr.cu b/libgomp/testsuite/libgomp.oacc-c/subr.cu new file mode 100644 index 0000000..e86e0fc --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/subr.cu @@ -0,0 +1,64 @@ + +extern "C" __global__ void +delay (clock_t * d_o, clock_t delay) +{ + clock_t start, ticks; + + start = clock (); + + ticks = 0; + + while (ticks < delay) + ticks = clock () - start; +} + +extern "C" __global__ void +delay2 (unsigned long *d_o, clock_t delay, unsigned long tid) +{ + clock_t start, ticks; + + start = clock (); + + ticks = 0; + + while (ticks < delay) + ticks = clock () - start; + + d_o[0] = tid; +} + +extern "C" __global__ void +sum (clock_t * d_o, int N) +{ + int i; + clock_t sum; + __shared__ clock_t ticks[32]; + + sum = 0; + + for (i = threadIdx.x; i < N; i += blockDim.x) + sum += d_o[i]; + + ticks[threadIdx.x] = sum; + + syncthreads (); + + for (i = 16; i >= 1; i >>= 1) + { + if (threadIdx.x < i) + ticks[threadIdx.x] += ticks[threadIdx.x + i]; + + syncthreads (); + } + + d_o[0] = ticks[0]; +} + +extern "C" __global__ void +mult (int n, float *x, float *y) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + + for (i = 0; i < n; i++) + y[i] = x[i] * x[i]; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/subr.ptx b/libgomp/testsuite/libgomp.oacc-c/subr.ptx new file mode 100644 index 0000000..6f748fc --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/subr.ptx @@ -0,0 +1,148 @@ +// BEGIN PREAMBLE + .version 3.1 + .target sm_30 + .address_size 64 +// END PREAMBLE + +// BEGIN FUNCTION DEF: clock +.func (.param.u32 %out_retval)clock +{ +.reg.u32 %retval; + .reg.u64 %hr10; + .reg.u32 %r22; + .reg.u32 %r23; + .reg.u32 %r24; + .local.align 8 .b8 %frame[8]; + // #APP +// 7 "subr.c" 1 + mov.u32 %r24, %clock; +// 0 "" 2 + // #NO_APP + st.local.u32 [%frame], %r24; + ld.local.u32 %r22, [%frame]; + mov.u32 %r23, %r22; + mov.u32 %retval, %r23; + st.param.u32 [%out_retval], %retval; + ret; + } +// END FUNCTION DEF +// BEGIN GLOBAL FUNCTION DEF: delay +.visible .entry delay(.param.u64 %in_ar1, .param.u64 %in_ar2) +{ + .reg.u64 %ar1; + .reg.u64 %ar2; + .reg.u64 %hr10; + .reg.u64 %r22; + .reg.u32 %r23; + .reg.u64 %r24; + .reg.u64 %r25; + .reg.u32 %r26; + .reg.u32 %r27; + .reg.u32 %r28; + .reg.u32 %r29; + .reg.u32 %r30; + .reg.u64 %r31; + .reg.pred %r32; + .local.align 8 .b8 %frame[24]; + ld.param.u64 %ar1, [%in_ar1]; + ld.param.u64 %ar2, [%in_ar2]; + mov.u64 %r24, %ar1; + st.u64 [%frame+8], %r24; + mov.u64 %r25, %ar2; + st.local.u64 [%frame+16], %r25; + { + .param.u32 %retval_in; + { + call (%retval_in), clock; + } + ld.param.u32 %r26, [%retval_in]; +} + st.local.u32 [%frame+4], %r26; + mov.u32 %r27, 0; + st.local.u32 [%frame], %r27; + bra $L4; +$L5: + { + .param.u32 %retval_in; + { + call (%retval_in), clock; + } + ld.param.u32 %r28, [%retval_in]; +} + mov.u32 %r23, %r28; + ld.local.u32 %r30, [%frame+4]; + sub.u32 %r29, %r23, %r30; + st.local.u32 [%frame], %r29; +$L4: + ld.local.s32 %r22, [%frame]; + ld.local.u64 %r31, [%frame+16]; + setp.lo.u64 %r32,%r22,%r31; + @%r32 bra $L5; + ret; + } +// END FUNCTION DEF +// BEGIN GLOBAL FUNCTION DEF: delay2 +.visible .entry delay2(.param.u64 %in_ar1, .param.u64 %in_ar2, .param.u64 %in_ar3) +{ + .reg.u64 %ar1; + .reg.u64 %ar2; + .reg.u64 %ar3; + .reg.u64 %hr10; + .reg.u64 %r22; + .reg.u32 %r23; + .reg.u64 %r24; + .reg.u64 %r25; + .reg.u64 %r26; + .reg.u32 %r27; + .reg.u32 %r28; + .reg.u32 %r29; + .reg.u32 %r30; + .reg.u32 %r31; + .reg.u64 %r32; + .reg.pred %r33; + .reg.u64 %r34; + .reg.u64 %r35; + .local.align 8 .b8 %frame[32]; + ld.param.u64 %ar1, [%in_ar1]; + ld.param.u64 %ar2, [%in_ar2]; + ld.param.u64 %ar3, [%in_ar3]; + mov.u64 %r24, %ar1; + st.local.u64 [%frame+8], %r24; + mov.u64 %r25, %ar2; + st.local.u64 [%frame+16], %r25; + mov.u64 %r26, %ar3; + st.local.u64 [%frame+24], %r26; + { + .param.u32 %retval_in; + { + call (%retval_in), clock; + } + ld.param.u32 %r27, [%retval_in]; +} + st.local.u32 [%frame+4], %r27; + mov.u32 %r28, 0; + st.local.u32 [%frame], %r28; + bra $L8; +$L9: + { + .param.u32 %retval_in; + { + call (%retval_in), clock; + } + ld.param.u32 %r29, [%retval_in]; +} + mov.u32 %r23, %r29; + ld.local.u32 %r31, [%frame+4]; + sub.u32 %r30, %r23, %r31; + st.local.u32 [%frame], %r30; +$L8: + ld.local.s32 %r22, [%frame]; + ld.local.u64 %r32, [%frame+16]; + setp.lo.u64 %r33,%r22,%r32; + @%r33 bra $L9; + ld.local.u64 %r34, [%frame+8]; + ld.local.u64 %r35, [%frame+24]; + st.u64 [%r34], %r35; + ret; + } +// END FUNCTION DEF diff --git a/libgomp/testsuite/libgomp.oacc-c/timer.h b/libgomp/testsuite/libgomp.oacc-c/timer.h new file mode 100644 index 0000000..53749da --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/timer.h @@ -0,0 +1,103 @@ + +#include +#include + +static int _Tnum_timers; +static CUevent *_Tstart_events, *_Tstop_events; +static CUstream _Tstream; + +void +init_timers (int ntimers) +{ + int i; + CUresult r; + + _Tnum_timers = ntimers; + + _Tstart_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent)); + _Tstop_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent)); + + r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuStreamCreate failed: %d\n", r); + abort (); + } + + for (i = 0; i < _Tnum_timers; i++) + { + r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventCreate failed: %d\n", r); + abort (); + } + + r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventCreate failed: %d\n", r); + abort (); + } + } +} + +void +fini_timers (void) +{ + int i; + + for (i = 0; i < _Tnum_timers; i++) + { + cuEventDestroy (_Tstart_events[i]); + cuEventDestroy (_Tstop_events[i]); + } + + cuStreamDestroy (_Tstream); + + free (_Tstart_events); + free (_Tstop_events); +} + +void +start_timer (int timer) +{ + CUresult r; + + r = cuEventRecord (_Tstart_events[timer], _Tstream); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventRecord failed: %d\n", r); + abort (); + } +} + +float +stop_timer (int timer) +{ + CUresult r; + float etime; + + r = cuEventRecord (_Tstop_events[timer], _Tstream); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventRecord failed: %d\n", r); + abort (); + } + + r = cuEventSynchronize (_Tstop_events[timer]); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventSynchronize failed: %d\n", r); + abort (); + } + + r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]); + if (r != CUDA_SUCCESS) + { + fprintf (stderr, "cuEventElapsedTime failed: %d\n", r); + abort (); + } + + return etime; +} diff --git a/libgomp/testsuite/libgomp.oacc-c/update-1.c b/libgomp/testsuite/libgomp.oacc-c/update-1.c new file mode 100644 index 0000000..dff139f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c/update-1.c @@ -0,0 +1,280 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include +#include +#include +#include +#include + +int +main (int argc, char **argv) +{ + int N = 8; + float *a, *b, *c; + float *d_a, *d_b, *d_c; + int i; + + a = (float *) malloc (N * sizeof (float)); + b = (float *) malloc (N * sizeof (float)); + c = (float *) malloc (N * sizeof (float)); + + d_a = (float *) acc_malloc (N * sizeof (float)); + d_b = (float *) acc_malloc (N * sizeof (float)); + d_c = (float *) acc_malloc (N * sizeof (float)); + + for (i = 0; i < N; i++) + { + a[i] = 3.0; + b[i] = 0.0; + } + + acc_map_data (a, d_a, N * sizeof (float)); + acc_map_data (b, d_b, N * sizeof (float)); + acc_map_data (c, d_c, N * sizeof (float)); + +#pragma acc update device (a[0:N], b[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 3.0) + abort (); + + if (b[i] != 3.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + b[i] = 1.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update self (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + b[i] = 0.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 6.0) + abort (); + + if (b[i] != 6.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 7.0; + b[i] = 2.0; + } + +#pragma acc update device (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 7.0) + abort (); + + if (b[i] != 7.0) + abort (); + } + + for (i = 0; i < N; i++) + { + a[i] = 9.0; + } + +#pragma acc update device (a[0:N]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < N; i++) + { + if (a[i] != 9.0) + abort (); + + if (b[i] != 9.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + for (i = 0; i < N; i++) + { + a[i] = 5.0; + } + +#pragma acc update device (a[0:N]) + + for (i = 0; i < N; i++) + { + a[i] = 6.0; + } + +#pragma acc update device (a[0:N >> 1]) + +#pragma acc parallel present (a[0:N], b[0:N]) + { + int ii; + + for (ii = 0; ii < N; ii++) + b[ii] = a[ii]; + } + +#pragma acc update host (a[0:N], b[0:N]) + + for (i = 0; i < (N >> 1); i++) + { + if (a[i] != 6.0) + abort (); + + if (b[i] != 6.0) + abort (); + } + + for (i = (N >> 1); i < N; i++) + { + if (a[i] != 5.0) + abort (); + + if (b[i] != 5.0) + abort (); + } + + if (!acc_is_present (&a[0], (N * sizeof (float)))) + abort (); + + if (!acc_is_present (&b[0], (N * sizeof (float)))) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 new file mode 100644 index 0000000..52b030b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-1.f90 @@ -0,0 +1,10 @@ +! { dg-shouldfail "" { *-*-* } { "*" } { "" } } + +program main + implicit none + + !$acc parallel + call abort + !$acc end parallel + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 new file mode 100644 index 0000000..2ba2bcb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/abort-2.f90 @@ -0,0 +1,13 @@ +program main + implicit none + + integer :: argc + argc = command_argument_count () + + !$acc parallel copyin(argc) + if (argc .ne. 0) then + call abort + end if + !$acc end parallel + +end program main diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 index c4597a6..4488818 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-1.f90 @@ -1,5 +1,4 @@ -! TODO: Remove -DACC_DEVICE_TYPE_host once that is set by the test harness. -! { dg-additional-options "-cpp -DACC_DEVICE_TYPE_host" } +! { dg-additional-options "-cpp" } ! TODO: Have to disable the acc_on_device builtin for we want to test the ! libgomp library function? The command line option ! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not for @@ -12,7 +11,9 @@ implicit none if (.not. acc_on_device (acc_device_none)) call abort if (.not. acc_on_device (acc_device_host)) call abort +if (acc_on_device (acc_device_host_nonshm)) call abort if (acc_on_device (acc_device_not_host)) call abort +if (acc_on_device (acc_device_nvidia)) call abort ! Host via offloading fallback mode. @@ -20,7 +21,9 @@ if (acc_on_device (acc_device_not_host)) call abort !$acc parallel if(.false.) if (.not. acc_on_device (acc_device_none)) call abort if (.not. acc_on_device (acc_device_host)) call abort +if (acc_on_device (acc_device_host_nonshm)) call abort if (acc_on_device (acc_device_not_host)) call abort +if (acc_on_device (acc_device_nvidia)) call abort !$acc end parallel @@ -31,7 +34,17 @@ if (acc_on_device (acc_device_not_host)) call abort !$acc parallel if (acc_on_device (acc_device_none)) call abort if (acc_on_device (acc_device_host)) call abort +#if ACC_DEVICE_TYPE_host_nonshm +if (.not. acc_on_device (acc_device_host_nonshm)) call abort +#else +if (acc_on_device (acc_device_host_nonshm)) call abort +#endif if (.not. acc_on_device (acc_device_not_host)) call abort +#if ACC_DEVICE_TYPE_nvidia +if (.not. acc_on_device (acc_device_nvidia)) call abort +#else +if (acc_on_device (acc_device_nvidia)) call abort +#endif !$acc end parallel #endif diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f index 3787e1e..0047a19 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-2.f @@ -1,5 +1,4 @@ -! TODO: Remove -DACC_DEVICE_TYPE_host once that is set by the test harness. -! { dg-additional-options "-cpp -DACC_DEVICE_TYPE_host" } +! { dg-additional-options "-cpp" } ! TODO: Have to disable the acc_on_device builtin for we want to test ! the libgomp library function? The command line option ! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not @@ -12,7 +11,9 @@ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT !Host via offloading fallback mode. @@ -20,7 +21,9 @@ !$ACC PARALLEL IF(.FALSE.) IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT !$ACC END PARALLEL @@ -31,7 +34,17 @@ !$ACC PARALLEL IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT +#if ACC_DEVICE_TYPE_host_nonshm + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT +#endif IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT +#if ACC_DEVICE_TYPE_nvidia + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#endif !$ACC END PARALLEL #endif diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f index 1ee5926..49d7a72 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_on_device-1-3.f @@ -1,5 +1,4 @@ -! TODO: Remove -DACC_DEVICE_TYPE_host once that is set by the test harness. -! { dg-additional-options "-cpp -DACC_DEVICE_TYPE_host" } +! { dg-additional-options "-cpp" } ! TODO: Have to disable the acc_on_device builtin for we want to test ! the libgomp library function? The command line option ! '-fno-builtin-acc_on_device' is valid for C/C++/ObjC/ObjC++ but not @@ -12,7 +11,9 @@ IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT !Host via offloading fallback mode. @@ -20,7 +21,9 @@ !$ACC PARALLEL IF(.FALSE.) IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT IF (ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT !$ACC END PARALLEL @@ -31,7 +34,17 @@ !$ACC PARALLEL IF (ACC_ON_DEVICE (ACC_DEVICE_NONE)) CALL ABORT IF (ACC_ON_DEVICE (ACC_DEVICE_HOST)) CALL ABORT +#if ACC_DEVICE_TYPE_host_nonshm + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_HOST_NONSHM)) CALL ABORT +#endif IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NOT_HOST)) CALL ABORT +#if ACC_DEVICE_TYPE_nvidia + IF (.NOT. ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#else + IF (ACC_ON_DEVICE (ACC_DEVICE_NVIDIA)) CALL ABORT +#endif !$ACC END PARALLEL #endif diff --git a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp index cd0ab26..312f947 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp +++ b/libgomp/testsuite/libgomp.oacc-fortran/fortran.exp @@ -21,7 +21,8 @@ set quadmath_library_path "../libquadmath/.libs" dg-init # Turn on OpenACC. -lappend ALWAYS_CFLAGS "additional_flags=-fopenacc" +# XXX (TEMPORARY): Remove the -flto once that's properly integrated. +lappend ALWAYS_CFLAGS "additional_flags=-fopenacc -flto" if { $blddir != "" } { set lang_source_re {^.*\.[fF](|90|95|03|08)$} @@ -65,10 +66,41 @@ if { $lang_test_file_found } { append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST] set_ld_library_path_env_vars - # For Fortran we're doing torture testing, as Fortran has far more tests - # with arrays etc. that testing just -O0 or -O2 is insufficient, that is - # typically not the case for C/C++. - gfortran-dg-runtest $tests "" "" + # Todo: get list of accelerators from configure options --enable-accelerator. + set accels { "nvidia" "host_nonshm" } + + # Run on host (or fallback) accelerator. + lappend accels "host" + + # Test OpenACC with available accelerators. + foreach accel $accels { + set tagopt "-DACC_DEVICE_TYPE_$accel=1" + + # Todo: Determine shared memory or not using run-time test. + switch $accel { + host { + set acc_mem_shared 1 + } + host_nonshm { + set acc_mem_shared 0 + } + nvidia { + set acc_mem_shared 0 + } + default { + set acc_mem_shared 0 + } + } + set tagopt "$tagopt -DACC_MEM_SHARED=$acc_mem_shared" + + # Todo: Verify that this works for both local and remote testing. + setenv ACC_DEVICE_TYPE $accel + + # For Fortran we're doing torture testing, as Fortran has far more tests + # with arrays etc. that testing just -O0 or -O2 is insufficient, that is + # typically not the case for C/C++. + gfortran-dg-runtest $tests "$tagopt" "" + } } # All done. diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 index 124aa87..51dc452 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-1.f90 @@ -1,3 +1,13 @@ use openacc +if (acc_get_num_devices (acc_device_host) .ne. 1) call abort +call acc_set_device_type (acc_device_host) +if (acc_get_device_type () .ne. acc_device_host) call abort +call acc_set_device_num (0, acc_device_host) +if (acc_get_device_num (acc_device_host) .ne. 0) call abort +call acc_shutdown (acc_device_host) + +call acc_init (acc_device_host) +call acc_shutdown (acc_device_host) + end diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 new file mode 100644 index 0000000..a54d6a7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-10.f90 @@ -0,0 +1,82 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer, target :: a_3d_i(10, 10, 10) + complex a_3d_c(10, 10, 10) + real a_3d_r(10, 10, 10) + + integer i, j, k + complex c + real r + integer, parameter :: i_size = sizeof (i) + integer, parameter :: c_size = sizeof (c) + integer, parameter :: r_size = sizeof (r) + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) + + call acc_copyin (a_3d_i) + call acc_copyin (a_3d_c) + call acc_copyin (a_3d_r) + + if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort + end do + end do + end do + + call acc_shutdown (acc_device_nvidia) + +contains + + subroutine set3d (clear, a_i, a_c, a_r) + logical clear + integer, dimension (:,:,:), intent (inout) :: a_i + complex, dimension (:,:,:), intent (inout) :: a_c + real, dimension (:,:,:), intent (inout) :: a_r + + integer i, j, k + integer lb1, ub1, lb2, ub2, lb3, ub3 + + lb1 = lbound (a_i, 1) + ub1 = ubound (a_i, 1) + + lb2 = lbound (a_i, 2) + ub2 = ubound (a_i, 2) + + lb3 = lbound (a_i, 3) + ub3 = ubound (a_i, 3) + + do i = lb1, ub1 + do j = lb2, ub2 + do k = lb3, ub3 + if (clear) then + a_i(i, j, k) = 0 + a_c(i, j, k) = cmplx (0.0, 0.0) + a_r(i, j, k) = 0.0 + else + a_i(i, j, k) = i + a_c(i, j, k) = cmplx (i, j) + a_r(i, j, k) = i + end if + end do + end do + end do + + end subroutine + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 new file mode 100644 index 0000000..a54d6a7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-11.f90 @@ -0,0 +1,82 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer, target :: a_3d_i(10, 10, 10) + complex a_3d_c(10, 10, 10) + real a_3d_r(10, 10, 10) + + integer i, j, k + complex c + real r + integer, parameter :: i_size = sizeof (i) + integer, parameter :: c_size = sizeof (c) + integer, parameter :: r_size = sizeof (r) + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) + + call acc_copyin (a_3d_i) + call acc_copyin (a_3d_c) + call acc_copyin (a_3d_r) + + if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort + end do + end do + end do + + call acc_shutdown (acc_device_nvidia) + +contains + + subroutine set3d (clear, a_i, a_c, a_r) + logical clear + integer, dimension (:,:,:), intent (inout) :: a_i + complex, dimension (:,:,:), intent (inout) :: a_c + real, dimension (:,:,:), intent (inout) :: a_r + + integer i, j, k + integer lb1, ub1, lb2, ub2, lb3, ub3 + + lb1 = lbound (a_i, 1) + ub1 = ubound (a_i, 1) + + lb2 = lbound (a_i, 2) + ub2 = ubound (a_i, 2) + + lb3 = lbound (a_i, 3) + ub3 = ubound (a_i, 3) + + do i = lb1, ub1 + do j = lb2, ub2 + do k = lb3, ub3 + if (clear) then + a_i(i, j, k) = 0 + a_c(i, j, k) = cmplx (0.0, 0.0) + a_r(i, j, k) = 0.0 + else + a_i(i, j, k) = i + a_c(i, j, k) = cmplx (i, j) + a_r(i, j, k) = i + end if + end do + end do + end do + + end subroutine + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f index 64beb9e..a9d70b2 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-2.f @@ -1,3 +1,13 @@ USE OPENACC + IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT + CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT + CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + + CALL ACC_INIT (ACC_DEVICE_HOST) + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + END diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f index 3f9940b..56d2cd2 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-3.f @@ -1,3 +1,13 @@ INCLUDE "openacc_lib.h" + IF (ACC_GET_NUM_DEVICES (ACC_DEVICE_HOST) .NE. 1) CALL ABORT + CALL ACC_SET_DEVICE_TYPE (ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_TYPE () .NE. ACC_DEVICE_HOST) CALL ABORT + CALL ACC_SET_DEVICE_NUM (0, ACC_DEVICE_HOST) + IF (ACC_GET_DEVICE_NUM (ACC_DEVICE_HOST) .NE. 0) CALL ABORT + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + + CALL ACC_INIT (ACC_DEVICE_HOST) + CALL ACC_SHUTDOWN (ACC_DEVICE_HOST) + END diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 new file mode 100644 index 0000000..3a2b661 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-4.f90 @@ -0,0 +1,35 @@ +! { dg-do run } + +program main + use openacc + implicit none + + integer n + + if (acc_get_num_devices (acc_device_host) .ne. 1) call abort + + if (acc_get_num_devices (acc_device_none) .ne. 0) call abort + + call acc_init (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + call acc_set_device_type (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + n = 0 + + call acc_set_device_num (n, acc_device_host) + + if (acc_get_device_num (acc_device_host) .ne. 0) call abort + + if (.NOT. acc_async_test (n) ) call abort + + call acc_wait (n) + + call acc_wait_all () + + call acc_shutdown (acc_device_host) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 new file mode 100644 index 0000000..e68eb89 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-5.f90 @@ -0,0 +1,31 @@ +! { dg-do run } + +program main + use openacc + implicit none + + integer n + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + n = 0 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort + + if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then + + n = 1 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort + + end if + + call acc_shutdown (acc_device_nvidia) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 new file mode 100644 index 0000000..401ad66 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-6.f90 @@ -0,0 +1,35 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer n + + if (acc_get_num_devices (acc_device_host) .ne. 1) call abort + + if (acc_get_num_devices (acc_device_none) .ne. 0) call abort + + call acc_init (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + call acc_set_device_type (acc_device_host) + + if (acc_get_device_type () .ne. acc_device_host) call abort + + n = 0 + + call acc_set_device_num (n, acc_device_host) + + if (acc_get_device_num (acc_device_host) .ne. 0) call abort + + if (.NOT. acc_async_test (n) ) call abort + + call acc_wait (n) + + call acc_wait_all () + + call acc_shutdown (acc_device_host) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 new file mode 100644 index 0000000..422df53 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-7.f90 @@ -0,0 +1,31 @@ +! { dg-do run } + +program main + implicit none + include "openacc_lib.h" + + integer n + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + n = 0 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 0) call abort + + if (acc_get_num_devices (acc_device_nvidia) .gt. 1) then + + n = 1 + + call acc_set_device_num (n, acc_device_nvidia) + + if (acc_get_device_num (acc_device_nvidia) .ne. 1) call abort + + end if + + call acc_shutdown (acc_device_nvidia) + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 new file mode 100644 index 0000000..ad758b2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-8.f90 @@ -0,0 +1,83 @@ +! { dg-do run } + +program main + use openacc + use iso_c_binding + implicit none + + integer, target :: a_3d_i(10, 10, 10) + complex a_3d_c(10, 10, 10) + real a_3d_r(10, 10, 10) + + integer i, j, k + complex c + real r + integer, parameter :: i_size = sizeof (i) + integer, parameter :: c_size = sizeof (c) + integer, parameter :: r_size = sizeof (r) + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) + + call acc_copyin (a_3d_i) + call acc_copyin (a_3d_c) + call acc_copyin (a_3d_r) + + if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort + end do + end do + end do + + call acc_shutdown (acc_device_nvidia) + +contains + + subroutine set3d (clear, a_i, a_c, a_r) + logical clear + integer, dimension (:,:,:), intent (inout) :: a_i + complex, dimension (:,:,:), intent (inout) :: a_c + real, dimension (:,:,:), intent (inout) :: a_r + + integer i, j, k + integer lb1, ub1, lb2, ub2, lb3, ub3 + + lb1 = lbound (a_i, 1) + ub1 = ubound (a_i, 1) + + lb2 = lbound (a_i, 2) + ub2 = ubound (a_i, 2) + + lb3 = lbound (a_i, 3) + ub3 = ubound (a_i, 3) + + do i = lb1, ub1 + do j = lb2, ub2 + do k = lb3, ub3 + if (clear) then + a_i(i, j, k) = 0 + a_c(i, j, k) = cmplx (0.0, 0.0) + a_r(i, j, k) = 0.0 + else + a_i(i, j, k) = i + a_c(i, j, k) = cmplx (i, j) + a_r(i, j, k) = i + end if + end do + end do + end do + + end subroutine + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 new file mode 100644 index 0000000..ad758b2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-9.f90 @@ -0,0 +1,83 @@ +! { dg-do run } + +program main + use openacc + use iso_c_binding + implicit none + + integer, target :: a_3d_i(10, 10, 10) + complex a_3d_c(10, 10, 10) + real a_3d_r(10, 10, 10) + + integer i, j, k + complex c + real r + integer, parameter :: i_size = sizeof (i) + integer, parameter :: c_size = sizeof (c) + integer, parameter :: r_size = sizeof (r) + + if (acc_get_num_devices (acc_device_nvidia) .eq. 0) call exit + + call acc_init (acc_device_nvidia) + + call set3d (.FALSE., a_3d_i, a_3d_c, a_3d_r) + + call acc_copyin (a_3d_i) + call acc_copyin (a_3d_c) + call acc_copyin (a_3d_r) + + if (acc_is_present (a_3d_i) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r) .neqv. .TRUE.) call abort + + do i = 1, 10 + do j = 1, 10 + do k = 1, 10 + if (acc_is_present (a_3d_i(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_c(i, j, k), i_size) .neqv. .TRUE.) call abort + if (acc_is_present (a_3d_r(i, j, k), i_size) .neqv. .TRUE.) call abort + end do + end do + end do + + call acc_shutdown (acc_device_nvidia) + +contains + + subroutine set3d (clear, a_i, a_c, a_r) + logical clear + integer, dimension (:,:,:), intent (inout) :: a_i + complex, dimension (:,:,:), intent (inout) :: a_c + real, dimension (:,:,:), intent (inout) :: a_r + + integer i, j, k + integer lb1, ub1, lb2, ub2, lb3, ub3 + + lb1 = lbound (a_i, 1) + ub1 = ubound (a_i, 1) + + lb2 = lbound (a_i, 2) + ub2 = ubound (a_i, 2) + + lb3 = lbound (a_i, 3) + ub3 = ubound (a_i, 3) + + do i = lb1, ub1 + do j = lb2, ub2 + do k = lb3, ub3 + if (clear) then + a_i(i, j, k) = 0 + a_c(i, j, k) = cmplx (0.0, 0.0) + a_r(i, j, k) = 0.0 + else + a_i(i, j, k) = i + a_c(i, j, k) = cmplx (i, j) + a_r(i, j, k) = i + end if + end do + end do + end do + + end subroutine + +end program diff --git a/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 new file mode 100644 index 0000000..082dd8a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/map-1.f90 @@ -0,0 +1,97 @@ +program map + integer, parameter :: n = 20, c = 10 + integer :: i, a(n), b(n) + + a(:) = 0 + b(:) = 0 + + ! COPY + + !$acc parallel copy (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + b(i) = i + end do + + call check (a, b, n) + + ! COPYOUT + + a(:) = 0 + + !$acc parallel copyout (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do + call check (a, b, n) + + ! COPYIN + + a(:) = 0 + + !$acc parallel copyout (a) copyin (b) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPY + + !$acc parallel pcopy (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYOUT + + a(:) = 0 + + !$acc parallel pcopyout (a) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYIN + + a(:) = 0 + + !$acc parallel pcopyout (a) pcopyin (b) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) +end program map + +subroutine check (a, b, n) + integer :: n, a(n), b(n) + integer :: i + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do +end subroutine check diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 new file mode 100644 index 0000000..a5e1fcb --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pointer-align-1.f90 @@ -0,0 +1,21 @@ +! PR middle-end/63247 + +program test + implicit none + + integer(kind=2) a(4) + + a = 10; + + !$acc parallel copy(a(2:4)) + a(2) = 52 + a(3) = 53 + a(4) = 54 + !$acc end parallel + + if (a(1) .ne. 10) call abort + if (a(2) .ne. 52) call abort + if (a(3) .ne. 53) call abort + if (a(4) .ne. 54) call abort + +end program test diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 new file mode 100644 index 0000000..1a1d4c7 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pset-1.f90 @@ -0,0 +1,229 @@ +! { dg-do run } + +program test + implicit none + integer, allocatable :: a1(:) + integer, allocatable :: b1(:) + integer, allocatable :: c1(:) + integer, allocatable :: b2(:,:) + integer, allocatable :: c3(:,:,:) + + allocate (a1(5)) + if (.not.allocated (a1)) call abort() + + a1 = 10 + + !$acc parallel copy(a1(1:5)) + a1(1) = 1 + a1(2) = 2 + a1(3) = 3 + a1(4) = 4 + a1(5) = 5 + !$acc end parallel + + if (a1(1) .ne. 1) call abort + if (a1(2) .ne. 2) call abort + if (a1(3) .ne. 3) call abort + if (a1(4) .ne. 4) call abort + if (a1(5) .ne. 5) call abort + + deallocate(a1) + + allocate (a1(0:4)) + if (.not.allocated (a1)) call abort() + + a1 = 10 + + !$acc parallel copy(a1(0:4)) + a1(0) = 1 + a1(1) = 2 + a1(2) = 3 + a1(3) = 4 + a1(4) = 5 + !$acc end parallel + + if (a1(0) .ne. 1) call abort + if (a1(1) .ne. 2) call abort + if (a1(2) .ne. 3) call abort + if (a1(3) .ne. 4) call abort + if (a1(4) .ne. 5) call abort + + deallocate(a1) + + allocate (b2(5,5)) + if (.not.allocated (b2)) call abort() + + b2 = 11 + + !$acc parallel copy(b2(1:5,1:5)) + b2(1,1) = 1 + b2(2,2) = 2 + b2(3,3) = 3 + b2(4,4) = 4 + b2(5,5) = 5 + !$acc end parallel + + if (b2(1,1) .ne. 1) call abort + if (b2(2,2) .ne. 2) call abort + if (b2(3,3) .ne. 3) call abort + if (b2(4,4) .ne. 4) call abort + if (b2(5,5) .ne. 5) call abort + + deallocate(b2) + + allocate (b2(0:4,0:4)) + if (.not.allocated (b2)) call abort() + + b2 = 11 + + !$acc parallel copy(b2(0:4,0:4)) + b2(0,0) = 1 + b2(1,1) = 2 + b2(2,2) = 3 + b2(3,3) = 4 + b2(4,4) = 5 + !$acc end parallel + + if (b2(0,0) .ne. 1) call abort + if (b2(1,1) .ne. 2) call abort + if (b2(2,2) .ne. 3) call abort + if (b2(3,3) .ne. 4) call abort + if (b2(4,4) .ne. 5) call abort + + deallocate(b2) + + allocate (c3(5,5,5)) + if (.not.allocated (c3)) call abort() + + c3 = 12 + + !$acc parallel copy(c3(1:5,1:5,1:5)) + c3(1,1,1) = 1 + c3(2,2,2) = 2 + c3(3,3,3) = 3 + c3(4,4,4) = 4 + c3(5,5,5) = 5 + !$acc end parallel + + if (c3(1,1,1) .ne. 1) call abort + if (c3(2,2,2) .ne. 2) call abort + if (c3(3,3,3) .ne. 3) call abort + if (c3(4,4,4) .ne. 4) call abort + if (c3(5,5,5) .ne. 5) call abort + + deallocate(c3) + + allocate (c3(0:4,0:4,0:4)) + if (.not.allocated (c3)) call abort() + + c3 = 12 + + !$acc parallel copy(c3(0:4,0:4,0:4)) + c3(0,0,0) = 1 + c3(1,1,1) = 2 + c3(2,2,2) = 3 + c3(3,3,3) = 4 + c3(4,4,4) = 5 + !$acc end parallel + + if (c3(0,0,0) .ne. 1) call abort + if (c3(1,1,1) .ne. 2) call abort + if (c3(2,2,2) .ne. 3) call abort + if (c3(3,3,3) .ne. 4) call abort + if (c3(4,4,4) .ne. 5) call abort + + deallocate(c3) + + allocate (a1(5)) + if (.not.allocated (a1)) call abort() + + allocate (b1(5)) + if (.not.allocated (b1)) call abort() + + allocate (c1(5)) + if (.not.allocated (c1)) call abort() + + a1 = 10 + b1 = 3 + c1 = 7 + + !$acc parallel copyin(a1(1:5)) create(c1(1:5)) copyout(b1(1:5)) + c1(1) = a1(1) + c1(2) = a1(2) + c1(3) = a1(3) + c1(4) = a1(4) + c1(5) = a1(5) + + b1(1) = c1(1) + b1(2) = c1(2) + b1(3) = c1(3) + b1(4) = c1(4) + b1(5) = c1(5) + !$acc end parallel + + if (b1(1) .ne. 10) call abort + if (b1(2) .ne. 10) call abort + if (b1(3) .ne. 10) call abort + if (b1(4) .ne. 10) call abort + if (b1(5) .ne. 10) call abort + + deallocate(a1) + deallocate(b1) + deallocate(c1) + + allocate (a1(0:4)) + if (.not.allocated (a1)) call abort() + + allocate (b1(0:4)) + if (.not.allocated (b1)) call abort() + + allocate (c1(0:4)) + if (.not.allocated (c1)) call abort() + + a1 = 10 + b1 = 3 + c1 = 7 + + !$acc parallel copyin(a1(0:4)) create(c1(0:4)) copyout(b1(0:4)) + c1(0) = a1(0) + c1(1) = a1(1) + c1(2) = a1(2) + c1(3) = a1(3) + c1(4) = a1(4) + + b1(0) = c1(0) + b1(1) = c1(1) + b1(2) = c1(2) + b1(3) = c1(3) + b1(4) = c1(4) + !$acc end parallel + + if (b1(0) .ne. 10) call abort + if (b1(1) .ne. 10) call abort + if (b1(2) .ne. 10) call abort + if (b1(3) .ne. 10) call abort + if (b1(4) .ne. 10) call abort + + deallocate(a1) + deallocate(b1) + deallocate(c1) + + allocate (a1(5)) + if (.not.allocated (a1)) call abort() + + a1 = 10 + + !$acc parallel copy(a1(2:3)) + a1(2) = 2 + a1(3) = 3 + !$acc end parallel + + if (a1(1) .ne. 10) call abort + if (a1(2) .ne. 2) call abort + if (a1(3) .ne. 3) call abort + if (a1(4) .ne. 10) call abort + if (a1(5) .ne. 10) call abort + + deallocate(a1) + +end program test diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 new file mode 100644 index 0000000..b39414f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-1.f90 @@ -0,0 +1,97 @@ +program subarrays + integer, parameter :: n = 20, c = 10 + integer :: i, a(n), b(n) + + a(:) = 0 + b(:) = 0 + + ! COPY + + !$acc parallel copy (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + b(i) = i + end do + + call check (a, b, n) + + ! COPYOUT + + a(:) = 0 + + !$acc parallel copyout (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do + call check (a, b, n) + + ! COPYIN + + a(:) = 0 + + !$acc parallel copyout (a(1:n)) copyin (b(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPY + + !$acc parallel pcopy (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYOUT + + a(:) = 0 + + !$acc parallel pcopyout (a(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYIN + + a(:) = 0 + + !$acc parallel pcopyout (a(1:n)) pcopyin (b(1:n)) + !$acc loop + do i = 1, n + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) +end program subarrays + +subroutine check (a, b, n) + integer :: n, a(n), b(n) + integer :: i + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do +end subroutine check diff --git a/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 new file mode 100644 index 0000000..81799f6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/subarrays-2.f90 @@ -0,0 +1,100 @@ +program subarrays + integer, parameter :: n = 20, c = 10, low = 5, high = 10 + integer :: i, a(n), b(n) + + a(:) = 0 + b(:) = 0 + + ! COPY + + !$acc parallel copy (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + do i = low, high + b(i) = i + end do + + call check (a, b, n) + + ! COPYOUT + + a(:) = 0 + + !$acc parallel copyout (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + do i = low, high + if (a(i) .ne. b(i)) call abort + end do + call check (a, b, n) + + ! COPYIN + + a(:) = 0 + + !$acc parallel copyout (a(low:high)) copyin (b(low:high)) + !$acc loop + do i = low, high + a(i) = b(i) + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPY + + a(:) = 0 + + !$acc parallel pcopy (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYOUT + + a(:) = 0 + + !$acc parallel pcopyout (a(low:high)) + !$acc loop + do i = low, high + a(i) = i + end do + !$acc end parallel + + call check (a, b, n) + + ! PRESENT_OR_COPYIN + + a(:) = 0 + + !$acc parallel pcopyout (a(low:high)) & + !$acc & pcopyin (b(low:high)) + !$acc loop + do i = low, high + a(i) = b(i) + end do + !$acc end parallel + + call check (a, b, n) +end program subarrays + +subroutine check (a, b, n) + integer :: n, a(n), b(n) + integer :: i + + do i = 1, n + if (a(i) .ne. b(i)) call abort + end do +end subroutine check -- 1.7.10.4