public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [WIP] OpenMP 4 NVPTX support
@ 2015-04-21 15:58 Jakub Jelinek
  2015-04-22 15:08 ` Bernd Schmidt
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Jakub Jelinek @ 2015-04-21 15:58 UTC (permalink / raw)
  To: Julian Brown, Thomas Schwinge, Bernd Schmidt, Tobias Burnus, Ilya Verbin
  Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2578 bytes --]

Hi!

Attached is a minimal patch to get at least a trivial OpenMP 4.0 testcase
offloading to NVPTX (the first patch).  The second patch is WIP, just first
few needed changes to make libgomp to build for NVPTX (several weeks of work
at least).

The following seems to work and the output suggests that it was offloaded to
a non-SHM arch:

int
main ()
{
  int v = 0;
  int *w = 0;
  int x = 0;
#pragma omp target
  {
    v = 6;
    w = &v;
    x = 1; // omp_is_initial_device ();
  }
  __builtin_printf ("%d %p %p %d\n", v, &v, w, x);
  return 0;
}

but already tiny bit more complicated testcase:

extern void *malloc (__SIZE_TYPE__);
extern void free (void *);

int
main ()
{
  int v = 0;
  int *w = 0;
  int x = 0;
#pragma omp target
  {
    v = 6;
    w = &v;
    char *p = malloc (64);
    x = 1; // omp_is_initial_device ();
    free (p);
  }
  __builtin_printf ("%d %p %p %d\n", v, &v, w, x);
  return 0;
}

suggests that while it is nice that when building nvptx accel compiler
we build libgcc.a, libc.a, libm.a, libgfortran.a (and in the future hopefully libgomp.a),
nothing attempts to link those in :(.

Is the plan to link those in at mkoffload time (haven't seen any attempt
of mkoffload to invoke the nvptx-none-ld linker though), or link those in
somehow at link_ptx time in the plugin?
In either case, it isn't clear to me how things will work (if at all) in the
case where multiple shared libraries (or executable and at least one shared
library) have their own offloading bits, and if you try to e.g. call an
offloaded function defined in the shared library from an offloaded kernel in
the executable, because if any library needs some global singleton case, if
it is linked multiple times, no idea what the PTX JIT will do.

Once that is resolved, another thing will be to figure out how to
efficiently implement the TLS libgomp needs for its ICVs and other state
- right now it uses either __thread, or pthread_getspecific, neither of
these is usable of course.  I've been thinking about an array of those
structures in .shared memory indexed by %tid.x, but I guess that runs into
the issue that the array would need to be declared fixed size and there is a
very small size limitation on .shared memory size.
So perhaps a file scope .shared pointer to global memory, where whomever
launches an OpenMP 4.0 kernel (either the libgomp-plugin-nvptx.so.1 doing
GOMP_run, or later on dynamic parallelism from GOMP_target in the nvptx
libgomp.a) allocates the memory and some wrapper sets the .shared variable
to that allocated memory, then calls the kernel?

	Jakub

[-- Attachment #2: U1 --]
[-- Type: text/plain, Size: 3122 bytes --]

--- libgomp/plugin/plugin-nvptx.c.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/plugin/plugin-nvptx.c	2015-04-21 16:55:25.247470080 +0200
@@ -978,8 +978,8 @@ event_add (enum ptx_event_type type, CUe
 
 void
 nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
-	  size_t *sizes, unsigned short *kinds, int num_gangs, int num_workers,
-	  int vector_length, int async, void *targ_mem_desc)
+	    size_t *sizes, unsigned short *kinds, int num_gangs,
+	    int num_workers, int vector_length, int async, void *targ_mem_desc)
 {
   struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
   CUfunction function;
@@ -1137,7 +1137,6 @@ nvptx_host2dev (void *d, const void *h,
   CUresult r;
   CUdeviceptr pb;
   size_t ps;
-  struct nvptx_thread *nvthd = nvptx_thread ();
 
   if (!s)
     return 0;
@@ -1162,7 +1161,8 @@ nvptx_host2dev (void *d, const void *h,
     GOMP_PLUGIN_fatal ("invalid size");
 
 #ifndef DISABLE_ASYNC
-  if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+  struct nvptx_thread *nvthd = nvptx_thread ();
+  if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream)
     {
       CUevent *e;
 
@@ -1202,7 +1202,6 @@ nvptx_dev2host (void *h, const void *d,
   CUresult r;
   CUdeviceptr pb;
   size_t ps;
-  struct nvptx_thread *nvthd = nvptx_thread ();
 
   if (!s)
     return 0;
@@ -1227,7 +1226,8 @@ nvptx_dev2host (void *h, const void *d,
     GOMP_PLUGIN_fatal ("invalid size");
 
 #ifndef DISABLE_ASYNC
-  if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+  struct nvptx_thread *nvthd = nvptx_thread ();
+  if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream)
     {
       CUevent *e;
 
@@ -1559,7 +1559,8 @@ GOMP_OFFLOAD_get_name (void)
 unsigned int
 GOMP_OFFLOAD_get_caps (void)
 {
-  return GOMP_OFFLOAD_CAP_OPENACC_200;
+  return GOMP_OFFLOAD_CAP_OPENACC_200
+	 | GOMP_OFFLOAD_CAP_OPENMP_400;
 }
 
 int
@@ -1759,7 +1760,7 @@ GOMP_OFFLOAD_openacc_parallel (void (*fn
 			       void *targ_mem_desc)
 {
   nvptx_exec (fn, mapnum, hostaddrs, devaddrs, sizes, kinds, num_gangs,
-	    num_workers, vector_length, async, targ_mem_desc);
+	      num_workers, vector_length, async, targ_mem_desc);
 }
 
 void
@@ -1889,3 +1890,27 @@ GOMP_OFFLOAD_openacc_set_cuda_stream (in
 {
   return nvptx_set_cuda_stream (async, stream);
 }
+
+void
+GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars)
+{
+  CUfunction function = ((struct targ_fn_descriptor *) tgt_fn)->fn;
+  CUresult r;
+  struct ptx_device *ptx_dev = ptx_devices[ord];
+  const char *maybe_abort_msg = "(perhaps abort was called)";
+  void *args = &tgt_vars;
+
+  r = cuLaunchKernel (function,
+		      1, 1, 1,
+		      1, 1, 1,
+		      0, ptx_dev->null_stream->stream, &args, 0);
+  if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r));
+
+  r = cuCtxSynchronize ();
+  if (r == CUDA_ERROR_LAUNCH_FAILED)
+    GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r),
+		       maybe_abort_msg);
+  else if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s", cuda_error (r));
+}

[-- Attachment #3: U2 --]
[-- Type: text/plain, Size: 2686 bytes --]

--- configure.jj	2015-04-21 08:38:24.000000000 +0200
+++ configure	2015-04-21 09:16:42.994959648 +0200
@@ -3171,6 +3171,9 @@ if test x$enable_libgomp = x ; then
 	;;
     *-*-darwin* | *-*-aix*)
 	;;
+    # And on NVPTX as an offloading target.
+    nvptx*-*-*)
+	;;
     *)
 	noconfigdirs="$noconfigdirs target-libgomp"
 	;;
--- libgomp/configure.jj	2015-04-21 11:08:08.347628799 +0200
+++ libgomp/configure	2015-04-21 11:07:39.000000000 +0200
@@ -15038,6 +15038,9 @@ case "$host" in
   *-*-rtems*)
     # RTEMS supports Pthreads, but the library is not available at GCC build time.
     ;;
+  nvptx*-*-*)
+    # NVPTX does not support Pthreads, has its own code replacement.
+    ;;
   *)
     # Check to see if -pthread or -lpthread is needed.  Prefer the former.
     # In case the pthread.h system header is not found, this test will fail.
--- libgomp/configure.tgt.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/configure.tgt	2015-04-21 10:59:30.857197475 +0200
@@ -151,6 +151,10 @@ case "${target}" in
 	XLDFLAGS="${XLDFLAGS} -lpthread"
 	;;
 
+  nvptx*-*-*)
+	config_path="nvptx"
+	;;
+
   *)
 	;;
 
--- libgomp/config.h.in.jj	2015-04-21 08:38:01.000000000 +0200
+++ libgomp/config.h.in	2015-04-21 08:38:01.000000000 +0200
@@ -39,6 +39,9 @@
 /* Define if pthread_{,attr_}{g,s}etaffinity_np is supported. */
 #undef HAVE_PTHREAD_AFFINITY_NP
 
+/* Define to 1 if you have the <pthread.h> header file. */
+#undef HAVE_PTHREAD_H
+
 /* Define to 1 if you have the <semaphore.h> header file. */
 #undef HAVE_SEMAPHORE_H
 
--- libgomp/libgomp.h.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/libgomp.h	2015-04-21 11:15:35.952217394 +0200
@@ -40,7 +40,9 @@
 #include "gstdint.h"
 #include "libgomp-plugin.h"
 
+#ifdef HAVE_PTHREAD_H
 #include <pthread.h>
+#endif
 #include <stdbool.h>
 #include <stdlib.h>
 #include <stdarg.h>
--- libgomp/configure.ac.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/configure.ac	2015-04-21 11:06:38.418117846 +0200
@@ -179,6 +179,9 @@ case "$host" in
   *-*-rtems*)
     # RTEMS supports Pthreads, but the library is not available at GCC build time.
     ;;
+  nvptx*-*-*)
+    # NVPTX does not support Pthreads, has its own code replacement.
+    ;;
   *)
     # Check to see if -pthread or -lpthread is needed.  Prefer the former.
     # In case the pthread.h system header is not found, this test will fail.
--- configure.ac.jj	2015-04-21 08:38:09.000000000 +0200
+++ configure.ac	2015-04-21 09:14:50.107827544 +0200
@@ -539,6 +539,9 @@ if test x$enable_libgomp = x ; then
 	;;
     *-*-darwin* | *-*-aix*)
 	;;
+    # And on NVPTX as an offloading target.
+    nvptx*-*-*)
+	;;
     *)
 	noconfigdirs="$noconfigdirs target-libgomp"
 	;;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [WIP] OpenMP 4 NVPTX support
  2015-04-21 15:58 [WIP] OpenMP 4 NVPTX support Jakub Jelinek
@ 2015-04-22 15:08 ` Bernd Schmidt
  2015-05-13 20:19   ` [gomp4] nvptx offloading linking (was: [WIP] OpenMP 4 NVPTX support) Thomas Schwinge
  2015-04-23 14:40 ` [WIP] OpenMP 4 NVPTX support Jakub Jelinek
  2015-07-22 16:13 ` Thomas Schwinge
  2 siblings, 1 reply; 11+ messages in thread
From: Bernd Schmidt @ 2015-04-22 15:08 UTC (permalink / raw)
  To: Jakub Jelinek, Julian Brown, Thomas Schwinge, Tobias Burnus, Ilya Verbin
  Cc: gcc-patches

On 04/21/2015 05:58 PM, Jakub Jelinek wrote:

> suggests that while it is nice that when building nvptx accel compiler
> we build libgcc.a, libc.a, libm.a, libgfortran.a (and in the future hopefully libgomp.a),
> nothing attempts to link those in :(.

I have that fixed; I expect I'll get around to posting this at some 
point now that stage1 is open.


Bernd

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [WIP] OpenMP 4 NVPTX support
  2015-04-21 15:58 [WIP] OpenMP 4 NVPTX support Jakub Jelinek
  2015-04-22 15:08 ` Bernd Schmidt
@ 2015-04-23 14:40 ` Jakub Jelinek
  2015-07-22 16:13 ` Thomas Schwinge
  2 siblings, 0 replies; 11+ messages in thread
From: Jakub Jelinek @ 2015-04-23 14:40 UTC (permalink / raw)
  To: Julian Brown, Thomas Schwinge, Bernd Schmidt, Tobias Burnus, Ilya Verbin
  Cc: gcc-patches

On Tue, Apr 21, 2015 at 05:58:39PM +0200, Jakub Jelinek wrote:
> Attached is a minimal patch to get at least a trivial OpenMP 4.0 testcase
> offloading to NVPTX (the first patch).  The second patch is WIP, just first
> few needed changes to make libgomp to build for NVPTX (several weeks of work
> at least).

Here is an updated patch, which allows libgomp.a to be built on nvptx-none
target.  Nothing is really tested and there will be a lot of work porting
it, so that it will actually work properly, but at least it is a start.

--- libgomp/configure.tgt.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/configure.tgt	2015-04-23 16:19:07.179401650 +0200
@@ -7,6 +7,8 @@
 #  config_path		An ordered list of directories to search for
 #			sources and headers.  This is relative to the
 #			config subdirectory of the source tree.
+#  use_pthreads		"yes" if POSIX threads should be used.
+#  broken_alloca	"yes" if alloca nor VLAs should be used in libgomp.
 #  XCFLAGS		Add extra compile flags to use.
 #  XLDFLAGS		Add extra link flags to use.
 
@@ -27,8 +29,10 @@ if test $gcc_cv_have_tls = yes ; then
   esac
 fi
 
-# Since we require POSIX threads, assume a POSIX system by default.
+# On most targets we require POSIX threads, assume a POSIX system by default.
 config_path="posix"
+use_pthreads=yes
+broken_alloca=no
 
 # Check for futex enabled all at once.
 if test x$enable_linux_futex = xyes; then
@@ -151,6 +155,12 @@ case "${target}" in
 	XLDFLAGS="${XLDFLAGS} -lpthread"
 	;;
 
+  nvptx*-*-*)
+	config_path="nvptx"
+	use_pthreads=no
+	broken_alloca=yes
+	;;
+
   *)
 	;;
 
--- libgomp/team.c.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/team.c	2015-04-23 13:13:19.654259364 +0200
@@ -30,11 +30,13 @@
 #include <stdlib.h>
 #include <string.h>
 
+#ifdef LIBGOMP_USE_PTHREADS
 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
 pthread_attr_t gomp_thread_attr;
 
 /* This key is for the thread destructor.  */
 pthread_key_t gomp_thread_destructor;
+#endif
 
 
 /* This is the libgomp per-thread data structure.  */
@@ -59,6 +61,7 @@ struct gomp_thread_start_data
 };
 
 
+#ifdef LIBGOMP_USE_PTHREADS
 /* This function is a pthread_create entry point.  This contains the idle
    loop in which a thread waits to be called up to become part of a team.  */
 
@@ -133,6 +136,7 @@ gomp_thread_start (void *xdata)
   thr->task = NULL;
   return NULL;
 }
+#endif
 
 
 /* Create a new team data structure.  */
@@ -194,6 +198,7 @@ free_team (struct gomp_team *team)
 
 /* Allocate and initialize a thread pool. */
 
+#ifdef LIBGOMP_USE_PTHREADS
 static struct gomp_thread_pool *gomp_new_thread_pool (void)
 {
   struct gomp_thread_pool *pool
@@ -204,6 +209,7 @@ static struct gomp_thread_pool *gomp_new
   pool->last_team = NULL;
   return pool;
 }
+#endif
 
 static void
 gomp_free_pool_helper (void *thread_pool)
@@ -215,7 +221,9 @@ gomp_free_pool_helper (void *thread_pool
   gomp_sem_destroy (&thr->release);
   thr->thread_pool = NULL;
   thr->task = NULL;
+#ifdef LIBGOMP_USE_PTHREADS
   pthread_exit (NULL);
+#endif
 }
 
 /* Free a thread pool and release its threads. */
@@ -267,6 +275,7 @@ gomp_free_thread (void *arg __attribute_
     }
 }
 
+#ifdef LIBGOMP_USE_PTHREADS
 /* Launch a team.  */
 
 void
@@ -834,6 +843,7 @@ gomp_team_start (void (*fn) (void *), vo
       && team->prev_ts.place_partition_len > 64)
     free (affinity_thr);
 }
+#endif
 
 
 /* Terminate the current team.  This is only to be called by the master
@@ -911,7 +921,7 @@ gomp_team_end (void)
     }
 }
 
-
+#ifdef LIBGOMP_USE_PTHREADS
 /* Constructors for this file.  */
 
 static void __attribute__((constructor))
@@ -935,6 +945,7 @@ team_destructor (void)
      crashes.  */
   pthread_key_delete (gomp_thread_destructor);
 }
+#endif
 
 struct gomp_task_icv *
 gomp_new_icv (void)
@@ -943,6 +954,8 @@ gomp_new_icv (void)
   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
   gomp_init_task (task, NULL, &gomp_global_icv);
   thr->task = task;
+#ifdef LIBGOMP_USE_PTHREADS
   pthread_setspecific (gomp_thread_destructor, thr);
+#endif
   return &task->icv;
 }
--- libgomp/config.h.in.jj	2015-04-21 08:38:01.000000000 +0200
+++ libgomp/config.h.in	2015-04-23 12:40:18.000000000 +0200
@@ -12,6 +12,9 @@
 /* Define to 1 if the target supports __attribute__((visibility(...))). */
 #undef HAVE_ATTRIBUTE_VISIBILITY
 
+/* Define to 1 if neither alloca nor VLAs are usable. */
+#undef HAVE_BROKEN_ALLOCA
+
 /* Define if the POSIX Semaphores do not work on your system. */
 #undef HAVE_BROKEN_POSIX_SEMAPHORES
 
@@ -39,6 +42,9 @@
 /* Define if pthread_{,attr_}{g,s}etaffinity_np is supported. */
 #undef HAVE_PTHREAD_AFFINITY_NP
 
+/* Define to 1 if you have the <pthread.h> header file. */
+#undef HAVE_PTHREAD_H
+
 /* Define to 1 if you have the <semaphore.h> header file. */
 #undef HAVE_SEMAPHORE_H
 
@@ -85,6 +91,9 @@
 /* Define to 1 if GNU symbol versioning is used for libgomp. */
 #undef LIBGOMP_GNU_SYMBOL_VERSIONING
 
+/* Define to 1 if libgomp should use POSIX threads. */
+#undef LIBGOMP_USE_PTHREADS
+
 /* Define to the sub-directory in which libtool stores uninstalled libraries.
    */
 #undef LT_OBJDIR
--- libgomp/Makefile.am.jj	2015-04-21 08:38:01.000000000 +0200
+++ libgomp/Makefile.am	2015-04-23 16:18:14.718252400 +0200
@@ -61,9 +61,12 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L
 libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
 	iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \
 	task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \
-	time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \
+	time.c fortran.c affinity.c
+if USE_PTHREADS
+libgomp_la_SOURCES += target.c splay-tree.c libgomp-plugin.c \
 	oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \
 	oacc-plugin.c oacc-cuda.c
+endif
 
 include $(top_srcdir)/plugin/Makefrag.am
 
--- libgomp/Makefile.in.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/Makefile.in	2015-04-23 16:19:16.884244269 +0200
@@ -64,6 +64,10 @@ POST_UNINSTALL = :
 build_triplet = @build@
 host_triplet = @host@
 target_triplet = @target@
+@USE_PTHREADS_TRUE@am__append_1 = target.c splay-tree.c libgomp-plugin.c \
+@USE_PTHREADS_TRUE@	oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \
+@USE_PTHREADS_TRUE@	oacc-plugin.c oacc-cuda.c
+
 DIST_COMMON = $(top_srcdir)/plugin/Makefrag.am ChangeLog \
 	$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
 	$(top_srcdir)/configure $(am__configure_deps) \
@@ -71,8 +75,8 @@ DIST_COMMON = $(top_srcdir)/plugin/Makef
 	$(srcdir)/omp.h.in $(srcdir)/omp_lib.h.in \
 	$(srcdir)/omp_lib.f90.in $(srcdir)/libgomp_f.h.in \
 	$(srcdir)/libgomp.spec.in $(srcdir)/../depcomp
-@PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la
-@USE_FORTRAN_TRUE@am__append_2 = openacc.f90
+@PLUGIN_NVPTX_TRUE@am__append_2 = libgomp-plugin-nvptx.la
+@USE_FORTRAN_TRUE@am__append_3 = openacc.f90
 subdir = .
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
@@ -146,15 +150,16 @@ libgomp_plugin_nvptx_la_LINK = $(LIBTOOL
 @PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_rpath = -rpath \
 @PLUGIN_NVPTX_TRUE@	$(toolexeclibdir)
 libgomp_la_LIBADD =
-@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo
+@USE_PTHREADS_TRUE@am__objects_1 = target.lo splay-tree.lo \
+@USE_PTHREADS_TRUE@	libgomp-plugin.lo oacc-parallel.lo \
+@USE_PTHREADS_TRUE@	oacc-host.lo oacc-init.lo oacc-mem.lo \
+@USE_PTHREADS_TRUE@	oacc-async.lo oacc-plugin.lo oacc-cuda.lo
+@USE_FORTRAN_TRUE@am__objects_2 = openacc.lo
 am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \
 	error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \
 	parallel.lo sections.lo single.lo task.lo team.lo work.lo \
 	lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \
-	fortran.lo affinity.lo target.lo splay-tree.lo \
-	libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \
-	oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \
-	$(am__objects_1)
+	fortran.lo affinity.lo $(am__objects_1) $(am__objects_2)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 DEFAULT_INCLUDES = -I.@am__isrc@
 depcomp = $(SHELL) $(top_srcdir)/../depcomp
@@ -373,7 +378,7 @@ libsubincludedir = $(libdir)/gcc/$(targe
 AM_CPPFLAGS = $(addprefix -I, $(search_path))
 AM_CFLAGS = $(XCFLAGS)
 AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS)
-toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) \
+toolexeclib_LTLIBRARIES = libgomp.la $(am__append_2) \
 	libgomp-plugin-host_nonshm.la
 nodist_toolexeclib_HEADERS = libgomp.spec
 
@@ -395,10 +400,8 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L
 libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \
 	iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \
 	single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \
-	bar.c ptrlock.c time.c fortran.c affinity.c target.c \
-	splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
-	oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
-	$(am__append_2)
+	bar.c ptrlock.c time.c fortran.c affinity.c $(am__append_1) \
+	$(am__append_3)
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
--- libgomp/plugin/plugin-nvptx.c.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/plugin/plugin-nvptx.c	2015-04-21 16:55:25.247470080 +0200
@@ -978,8 +978,8 @@ event_add (enum ptx_event_type type, CUe
 
 void
 nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
-	  size_t *sizes, unsigned short *kinds, int num_gangs, int num_workers,
-	  int vector_length, int async, void *targ_mem_desc)
+	    size_t *sizes, unsigned short *kinds, int num_gangs,
+	    int num_workers, int vector_length, int async, void *targ_mem_desc)
 {
   struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
   CUfunction function;
@@ -1137,7 +1137,6 @@ nvptx_host2dev (void *d, const void *h,
   CUresult r;
   CUdeviceptr pb;
   size_t ps;
-  struct nvptx_thread *nvthd = nvptx_thread ();
 
   if (!s)
     return 0;
@@ -1162,7 +1161,8 @@ nvptx_host2dev (void *d, const void *h,
     GOMP_PLUGIN_fatal ("invalid size");
 
 #ifndef DISABLE_ASYNC
-  if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+  struct nvptx_thread *nvthd = nvptx_thread ();
+  if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream)
     {
       CUevent *e;
 
@@ -1202,7 +1202,6 @@ nvptx_dev2host (void *h, const void *d,
   CUresult r;
   CUdeviceptr pb;
   size_t ps;
-  struct nvptx_thread *nvthd = nvptx_thread ();
 
   if (!s)
     return 0;
@@ -1227,7 +1226,8 @@ nvptx_dev2host (void *h, const void *d,
     GOMP_PLUGIN_fatal ("invalid size");
 
 #ifndef DISABLE_ASYNC
-  if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
+  struct nvptx_thread *nvthd = nvptx_thread ();
+  if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream)
     {
       CUevent *e;
 
@@ -1559,7 +1559,8 @@ GOMP_OFFLOAD_get_name (void)
 unsigned int
 GOMP_OFFLOAD_get_caps (void)
 {
-  return GOMP_OFFLOAD_CAP_OPENACC_200;
+  return GOMP_OFFLOAD_CAP_OPENACC_200
+	 | GOMP_OFFLOAD_CAP_OPENMP_400;
 }
 
 int
@@ -1759,7 +1760,7 @@ GOMP_OFFLOAD_openacc_parallel (void (*fn
 			       void *targ_mem_desc)
 {
   nvptx_exec (fn, mapnum, hostaddrs, devaddrs, sizes, kinds, num_gangs,
-	    num_workers, vector_length, async, targ_mem_desc);
+	      num_workers, vector_length, async, targ_mem_desc);
 }
 
 void
@@ -1889,3 +1890,27 @@ GOMP_OFFLOAD_openacc_set_cuda_stream (in
 {
   return nvptx_set_cuda_stream (async, stream);
 }
+
+void
+GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars)
+{
+  CUfunction function = ((struct targ_fn_descriptor *) tgt_fn)->fn;
+  CUresult r;
+  struct ptx_device *ptx_dev = ptx_devices[ord];
+  const char *maybe_abort_msg = "(perhaps abort was called)";
+  void *args = &tgt_vars;
+
+  r = cuLaunchKernel (function,
+		      1, 1, 1,
+		      1, 1, 1,
+		      0, ptx_dev->null_stream->stream, &args, 0);
+  if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r));
+
+  r = cuCtxSynchronize ();
+  if (r == CUDA_ERROR_LAUNCH_FAILED)
+    GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r),
+		       maybe_abort_msg);
+  else if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s", cuda_error (r));
+}
--- libgomp/task.c.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/task.c	2015-04-23 12:48:09.158446644 +0200
@@ -162,11 +162,22 @@ GOMP_task (void (*fn) (void *), void *da
       thr->task = &task;
       if (__builtin_expect (cpyfn != NULL, 0))
 	{
+#ifdef HAVE_BROKEN_ALLOCA
+	  char buf_fixed[128];
+	  char *buf = buf_fixed;
+	  if (arg_size + arg_align - 1 > sizeof buf_fixed)
+	    buf = gomp_malloc (arg_size + arg_align - 1);
+#else
 	  char buf[arg_size + arg_align - 1];
+#endif
 	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
 				& ~(uintptr_t) (arg_align - 1));
 	  cpyfn (arg, data);
 	  fn (arg);
+#ifdef HAVE_BROKEN_ALLOCA
+	  if (buf != buf_fixed)
+	    free (buf);
+#endif
 	}
       else
 	fn (data);
--- libgomp/libgomp.h.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/libgomp.h	2015-04-23 12:08:50.410950510 +0200
@@ -40,7 +40,9 @@
 #include "gstdint.h"
 #include "libgomp-plugin.h"
 
+#ifdef HAVE_PTHREAD_H
 #include <pthread.h>
+#endif
 #include <stdbool.h>
 #include <stdlib.h>
 #include <stdarg.h>
@@ -508,14 +510,18 @@ static inline struct gomp_task_icv *gomp
 }
 
 /* The attributes to be used during thread creation.  */
+#ifdef LIBGOMP_USE_PTHREADS
 extern pthread_attr_t gomp_thread_attr;
+#endif
 
 /* Function prototypes.  */
 
 /* affinity.c */
 
 extern void gomp_init_affinity (void);
+#ifdef LIBGOMP_USE_PTHREADS
 extern void gomp_init_thread_affinity (pthread_attr_t *, unsigned int);
+#endif
 extern void **gomp_affinity_alloc (unsigned long, bool);
 extern void gomp_affinity_init_place (void *);
 extern bool gomp_affinity_add_cpus (void *, unsigned long, unsigned long,
--- libgomp/configure.ac.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/configure.ac	2015-04-23 16:16:09.358286266 +0200
@@ -179,6 +179,9 @@ case "$host" in
   *-*-rtems*)
     # RTEMS supports Pthreads, but the library is not available at GCC build time.
     ;;
+  nvptx*-*-*)
+    # NVPTX does not support Pthreads, has its own code replacement.
+    ;;
   *)
     # Check to see if -pthread or -lpthread is needed.  Prefer the former.
     # In case the pthread.h system header is not found, this test will fail.
@@ -268,6 +271,18 @@ CFLAGS="$save_CFLAGS $XCFLAGS"
 # had a chance to set XCFLAGS.
 LIBGOMP_CHECK_SYNC_BUILTINS
 
+if test x$use_pthreads = xyes; then
+  AC_DEFINE(LIBGOMP_USE_PTHREADS, 1,
+	    [Define to 1 if libgomp should use POSIX threads.])
+fi
+
+if test x$broken_alloca = xyes; then
+  AC_DEFINE(HAVE_BROKEN_ALLOCA, 1,
+	    [Define to 1 if neither alloca nor VLAs are usable.])
+fi
+
+AM_CONDITIONAL([USE_PTHREADS], [test "x$use_pthreads" = xyes])
+
 XCFLAGS="$XCFLAGS$XPCFLAGS"
 
 AC_SUBST(config_path)
--- libgomp/env.c.jj	2015-04-21 08:38:00.000000000 +0200
+++ libgomp/env.c	2015-04-23 12:18:03.238667435 +0200
@@ -82,6 +82,7 @@ int gomp_debug_var;
 char *goacc_device_type;
 int goacc_device_num;
 
+#ifdef LIBGOMP_USE_PTHREADS
 /* Parse the OMP_SCHEDULE environment variable.  */
 
 static void
@@ -1297,6 +1298,7 @@ initialize_env (void)
 
   goacc_runtime_initialize ();
 }
+#endif
 
 \f
 /* The public OpenMP API routines that access these variables.  */
--- libgomp/config/nvptx/mutex.h.jj	2015-04-23 14:22:11.549627818 +0200
+++ libgomp/config/nvptx/mutex.h	2015-04-23 14:22:06.834706015 +0200
@@ -0,0 +1,65 @@
+/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is a Linux specific implementation of a mutex synchronization
+   mechanism for libgomp.  This type is private to the library.  This
+   implementation uses atomic instructions and the futex syscall.  */
+
+#ifndef GOMP_MUTEX_H
+#define GOMP_MUTEX_H 1
+
+typedef int gomp_mutex_t;
+
+#define GOMP_MUTEX_INIT_0 1
+
+extern void gomp_mutex_lock_slow (gomp_mutex_t *mutex, int);
+extern void gomp_mutex_unlock_slow (gomp_mutex_t *mutex);
+
+static inline void
+gomp_mutex_init (gomp_mutex_t *mutex)
+{
+  *mutex = 0;
+}
+
+static inline void
+gomp_mutex_destroy (gomp_mutex_t *mutex)
+{
+}
+
+static inline void
+gomp_mutex_lock (gomp_mutex_t *mutex)
+{
+  int oldval = 0;
+  while (__atomic_compare_exchange_n (mutex, &oldval, 1, false,
+				      MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+    ;
+}
+
+static inline void
+gomp_mutex_unlock (gomp_mutex_t *mutex)
+{
+  __atomic_exchange_n (mutex, 0, MEMMODEL_RELEASE);
+}
+#endif /* GOMP_MUTEX_H */
--- libgomp/config/nvptx/sem.h.jj	2015-04-23 16:04:34.368646584 +0200
+++ libgomp/config/nvptx/sem.h	2015-04-21 11:23:31.672337605 +0200
@@ -0,0 +1,56 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is the default POSIX 1003.1b implementation of a semaphore
+   synchronization mechanism for libgomp.  This type is private to
+   the library.
+
+   This is a bit heavy weight for what we need, in that we're not
+   interested in sem_wait as a cancelation point, but it's not too
+   bad for a default.  */
+
+#ifndef GOMP_SEM_H
+#define GOMP_SEM_H 1
+
+typedef int gomp_sem_t;
+
+static inline void gomp_sem_init (gomp_sem_t *sem, int value)
+{
+  *sem = 0;
+}
+
+static inline void gomp_sem_wait (gomp_sem_t *sem)
+{
+}
+
+static inline void gomp_sem_post (gomp_sem_t *sem)
+{
+}
+
+static inline void gomp_sem_destroy (gomp_sem_t *sem)
+{
+}
+
+#endif /* GOMP_SEM_H  */
--- libgomp/config/nvptx/bar.c.jj	2015-04-23 16:09:36.208706337 +0200
+++ libgomp/config/nvptx/bar.c	2015-04-23 16:09:13.000000000 +0200
@@ -0,0 +1 @@
+/* To be implemented.  */
--- libgomp/config/nvptx/affinity.c.jj	2015-04-23 16:10:08.981171168 +0200
+++ libgomp/config/nvptx/affinity.c	2015-04-23 16:09:13.000000000 +0200
@@ -0,0 +1 @@
+/* To be implemented.  */
--- libgomp/config/nvptx/proc.c.jj	2015-04-23 16:09:05.611205989 +0200
+++ libgomp/config/nvptx/proc.c	2015-04-23 16:09:13.818071972 +0200
@@ -0,0 +1 @@
+/* To be implemented.  */
--- libgomp/config/nvptx/lock.c.jj	2015-04-23 15:19:09.217347370 +0200
+++ libgomp/config/nvptx/lock.c	2015-04-23 16:06:21.354893215 +0200
@@ -0,0 +1,135 @@
+/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is a Linux specific implementation of the public OpenMP locking
+   primitives.  This implementation uses atomic instructions and the futex
+   syscall.  */
+
+#include <string.h>
+#include "libgomp.h"
+
+
+/* The internal gomp_mutex_t and the external non-recursive omp_lock_t
+   have the same form.  Re-use it.  */
+
+void
+gomp_init_lock_30 (omp_lock_t *lock)
+{
+  gomp_mutex_init (lock);
+}
+
+void
+gomp_destroy_lock_30 (omp_lock_t *lock)
+{
+  gomp_mutex_destroy (lock);
+}
+
+void
+gomp_set_lock_30 (omp_lock_t *lock)
+{
+  gomp_mutex_lock (lock);
+}
+
+void
+gomp_unset_lock_30 (omp_lock_t *lock)
+{
+  gomp_mutex_unlock (lock);
+}
+
+int
+gomp_test_lock_30 (omp_lock_t *lock)
+{
+  int oldval = 0;
+
+  return __atomic_compare_exchange_n (lock, &oldval, 1, false,
+				      MEMMODEL_ACQUIRE, MEMMODEL_RELAXED);
+}
+
+void
+gomp_init_nest_lock_30 (omp_nest_lock_t *lock)
+{
+  memset (lock, '\0', sizeof (*lock));
+}
+
+void
+gomp_destroy_nest_lock_30 (omp_nest_lock_t *lock)
+{
+}
+
+void
+gomp_set_nest_lock_30 (omp_nest_lock_t *lock)
+{
+  void *me = gomp_icv (true);
+
+  if (lock->owner != me)
+    {
+      gomp_mutex_lock (&lock->lock);
+      lock->owner = me;
+    }
+
+  lock->count++;
+}
+
+void
+gomp_unset_nest_lock_30 (omp_nest_lock_t *lock)
+{
+  if (--lock->count == 0)
+    {
+      lock->owner = NULL;
+      gomp_mutex_unlock (&lock->lock);
+    }
+}
+
+int
+gomp_test_nest_lock_30 (omp_nest_lock_t *lock)
+{
+  void *me = gomp_icv (true);
+  int oldval;
+
+  if (lock->owner == me)
+    return ++lock->count;
+
+  oldval = 0;
+  if (__atomic_compare_exchange_n (&lock->lock, &oldval, 1, false,
+				   MEMMODEL_ACQUIRE, MEMMODEL_RELAXED))
+    {
+      lock->owner = me;
+      lock->count = 1;
+      return 1;
+    }
+
+  return 0;
+}
+
+ialias (omp_init_lock)
+ialias (omp_init_nest_lock)
+ialias (omp_destroy_lock)
+ialias (omp_destroy_nest_lock)
+ialias (omp_set_lock)
+ialias (omp_set_nest_lock)
+ialias (omp_unset_lock)
+ialias (omp_unset_nest_lock)
+ialias (omp_test_lock)
+ialias (omp_test_nest_lock)
--- libgomp/config/nvptx/sem.c.jj	2015-04-23 16:04:53.082339890 +0200
+++ libgomp/config/nvptx/sem.c	2015-04-21 08:38:01.000000000 +0200
@@ -0,0 +1 @@
+/* Everything is in the header.  */
--- libgomp/config/nvptx/time.c.jj	2015-04-23 16:09:54.143413466 +0200
+++ libgomp/config/nvptx/time.c	2015-04-23 16:09:13.000000000 +0200
@@ -0,0 +1 @@
+/* To be implemented.  */
--- libgomp/config/nvptx/ptrlock.h.jj	2015-04-23 16:04:30.088716726 +0200
+++ libgomp/config/nvptx/ptrlock.h	2015-04-21 11:46:31.091467128 +0200
@@ -0,0 +1,66 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is a generic POSIX implementation of a mutex synchronization
+   mechanism for libgomp.  This type is private to the library.  */
+
+#ifndef GOMP_PTRLOCK_H
+#define GOMP_PTRLOCK_H 1
+
+typedef struct { void *ptr; gomp_mutex_t lock; } gomp_ptrlock_t;
+
+static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr)
+{
+  ptrlock->ptr = ptr;
+  gomp_mutex_init (&ptrlock->lock);
+}
+
+static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock)
+{
+  if (ptrlock->ptr != NULL)
+    return ptrlock->ptr;
+
+  gomp_mutex_lock (&ptrlock->lock);
+  if (ptrlock->ptr != NULL)
+    {
+      gomp_mutex_unlock (&ptrlock->lock);
+      return ptrlock->ptr;
+    }
+
+  return NULL;
+}
+
+static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr)
+{
+  ptrlock->ptr = ptr;
+  gomp_mutex_unlock (&ptrlock->lock);
+}
+
+static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock)
+{
+  gomp_mutex_destroy (&ptrlock->lock);
+}
+
+#endif /* GOMP_PTRLOCK_H */
--- libgomp/config/nvptx/ptrlock.c.jj	2015-04-23 16:04:27.253763188 +0200
+++ libgomp/config/nvptx/ptrlock.c	2015-04-21 08:38:01.000000000 +0200
@@ -0,0 +1 @@
+/* Everything is in the header.  */
--- libgomp/config/nvptx/omp-lock.h.jj	2015-04-23 15:19:03.295444592 +0200
+++ libgomp/config/nvptx/omp-lock.h	2015-04-23 16:02:38.020553381 +0200
@@ -0,0 +1,12 @@
+/* This header is used during the build process to find the size and 
+   alignment of the public OpenMP locks, so that we can export data
+   structures without polluting the namespace.
+
+   When using the Linux futex primitive, non-recursive locks require
+   one int.  Recursive locks require we identify the owning task
+   and so require in addition one int and a pointer.  */
+
+typedef int omp_lock_t;
+typedef struct { int lock, count; void *owner; } omp_nest_lock_t;
+typedef int omp_lock_25_t;
+typedef omp_nest_lock_t omp_nest_lock_25_t;
--- libgomp/config/nvptx/mutex.c.jj	2015-04-23 14:25:20.393497758 +0200
+++ libgomp/config/nvptx/mutex.c	2015-04-21 08:38:01.000000000 +0200
@@ -0,0 +1 @@
+/* Everything is in the header.  */
--- libgomp/configure.jj	2015-04-21 11:08:08.347628799 +0200
+++ libgomp/configure	2015-04-23 16:18:33.517947530 +0200
@@ -619,6 +619,8 @@ link_gomp
 XLDFLAGS
 XCFLAGS
 config_path
+USE_PTHREADS_FALSE
+USE_PTHREADS_TRUE
 LIBGOMP_BUILD_VERSIONED_SHLIB_SUN_FALSE
 LIBGOMP_BUILD_VERSIONED_SHLIB_SUN_TRUE
 LIBGOMP_BUILD_VERSIONED_SHLIB_GNU_FALSE
@@ -11118,7 +11120,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11121 "configure"
+#line 11123 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11224,7 +11226,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11227 "configure"
+#line 11229 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -15038,6 +15040,9 @@ case "$host" in
   *-*-rtems*)
     # RTEMS supports Pthreads, but the library is not available at GCC build time.
     ;;
+  nvptx*-*-*)
+    # NVPTX does not support Pthreads, has its own code replacement.
+    ;;
   *)
     # Check to see if -pthread or -lpthread is needed.  Prefer the former.
     # In case the pthread.h system header is not found, this test will fail.
@@ -16353,6 +16358,27 @@ $as_echo "#define HAVE_SYNC_BUILTINS 1"
 
   fi
 
+if test x$use_pthreads = xyes; then
+
+$as_echo "#define LIBGOMP_USE_PTHREADS 1" >>confdefs.h
+
+fi
+
+if test x$broken_alloca = xyes; then
+
+$as_echo "#define HAVE_BROKEN_ALLOCA 1" >>confdefs.h
+
+fi
+
+ if test "x$use_pthreads" = xyes; then
+  USE_PTHREADS_TRUE=
+  USE_PTHREADS_FALSE='#'
+else
+  USE_PTHREADS_TRUE='#'
+  USE_PTHREADS_FALSE=
+fi
+
+
 XCFLAGS="$XCFLAGS$XPCFLAGS"
 
 
@@ -16702,6 +16728,10 @@ if test -z "${LIBGOMP_BUILD_VERSIONED_SH
   as_fn_error "conditional \"LIBGOMP_BUILD_VERSIONED_SHLIB_SUN\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
+if test -z "${USE_PTHREADS_TRUE}" && test -z "${USE_PTHREADS_FALSE}"; then
+  as_fn_error "conditional \"USE_PTHREADS\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
 if test -z "${USE_FORTRAN_TRUE}" && test -z "${USE_FORTRAN_FALSE}"; then
   as_fn_error "conditional \"USE_FORTRAN\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5


	Jakub

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gomp4] nvptx offloading linking (was: [WIP] OpenMP 4 NVPTX support)
  2015-04-22 15:08 ` Bernd Schmidt
@ 2015-05-13 20:19   ` Thomas Schwinge
  2015-10-02 19:46     ` nvptx offloading linking Thomas Schwinge
  2015-10-05 16:08     ` [gomp4] [nvptx] Don't explicitly pass "-lgomp" to the offload compiler (was: nvptx offloading linking) Thomas Schwinge
  0 siblings, 2 replies; 11+ messages in thread
From: Thomas Schwinge @ 2015-05-13 20:19 UTC (permalink / raw)
  To: Bernd Schmidt, Jakub Jelinek, gcc-patches
  Cc: Julian Brown, Tobias Burnus, Ilya Verbin

[-- Attachment #1: Type: text/plain, Size: 63162 bytes --]

Hi!

On Wed, 22 Apr 2015 17:08:26 +0200, Bernd Schmidt <bernds@codesourcery.com> wrote:
> On 04/21/2015 05:58 PM, Jakub Jelinek wrote:
> 
> > suggests that while it is nice that when building nvptx accel compiler
> > we build libgcc.a, libc.a, libm.a, libgfortran.a (and in the future hopefully libgomp.a),
> > nothing attempts to link those in :(.
> 
> I have that fixed; I expect I'll get around to posting this at some 
> point now that stage1 is open.

I have committed the following to gomp-4_0-branch in r223176.  We'll be
submitting this for trunk later on; some changes will need to be done, as
already discussed.

Note that this patch has some dependencies on a patch that I'll be
committing later, »Assorted OpenACC changes«.  These include
GOACC_get_num_threads and GOACC_get_thread_num interface changes; I
didn't see the point in completely disentangling these changes now.

You'll also want to update your nvptx newlib sources.

The nvptx-tools and offload-nvptx-none GCC installations need to be in
the same prefix, so that the latter can find the nvptx-none assembler,
and doesn't resort to using the »as« binary found first in $PATH, which
likely will be the host system's, and only spewing out a cascade of error
messages when confronted with PTX assembly code.  If you've been using my
build scripts (trunk-offload-big.tar.bz2, trunk-offload-light.tar.bz2;
will upload fixed tarballs later) as posted on
<https://gcc.gnu.org/wiki/Offloading#How_to_try_offloading_enabled_GCC>,
you'll need to apply the following patch:

diff --git BUILD-gcc-offload-nvptx-none BUILD-gcc-offload-nvptx-none
index 664a781..1e815eb 100755
--- BUILD-gcc-offload-nvptx-none
+++ BUILD-gcc-offload-nvptx-none
@@ -11,5 +11,5 @@ if ! test -f .have-configure; then
   ln -vs "$T"/source-newlib/newlib "$T"/source-gcc/newlib &&
-  rm -f "$T"/install/nvptx-none/usr &&
-  mkdir -p "$T"/install/nvptx-none &&
-  ln -vs . "$T"/install/nvptx-none/usr &&
+  rm -f "$T"/install/offload-nvptx-none/nvptx-none/usr &&
+  mkdir -p "$T"/install/offload-nvptx-none/nvptx-none &&
+  ln -vs . "$T"/install/offload-nvptx-none/nvptx-none/usr &&
   target=$("$T"/source-gcc/config.guess) &&
@@ -32,4 +32,4 @@ if ! test -f .have-configure; then
     --with-sysroot=/nvptx-none \
-    --with-build-sysroot="$T"/install/nvptx-none \
-    --with-build-time-tools="$T"/install/nvptx-none/bin \
+    --with-build-sysroot="$T"/install/offload-nvptx-none/nvptx-none \
+    --with-build-time-tools="$T"/install/offload-nvptx-none/nvptx-none/bin \
     --disable-sjlj-exceptions \
diff --git BUILD-nvptx-tools BUILD-nvptx-tools
index b58715d..c093983 100755
--- BUILD-nvptx-tools
+++ BUILD-nvptx-tools
@@ -11,3 +11,3 @@ if ! test -f .have-configure; then
     --target=nvptx-none \
-    --prefix="$T"/install \
+    --prefix="$T"/install/offload-nvptx-none \
     --with-cuda-driver-include=$CUDA/targets/x86_64-linux/include \

commit c4e9c60e860e4bd9996df196bee54d52cda64038
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Wed May 13 20:05:52 2015 +0000

    nvptx offloading linking
    
    	gcc/
    	* config/nvptx/mkoffload.c (enum Kind, struct Token, enum Vis)
    	(struct Stmt): Remove.
    	(read_file, tokenize, write_token, write_tokens, alloc_stmt)
    	(alloc_comment, append_stmt, rev_stmts, write_stmt, write_stmts)
    	(parse_insn, parse_list_nosemi, parse_init, parse_file): Remove
    	functions and macros.
    	(decls, vars, fns): Remove variables.
    	(maybe_unlink): Use save_temps rather than debug to keep files.
    	(tool_cleanup): Unlink ptx_cfile_name and ptx_name.
    	(read_file): Accept a pointer to a length and store into it.
    	(process): Don't try to parse the input file, just write it out as a
    	string, but looking for maps.  Also write out the length.
    	(main): Don't use -S to compile ptx code.  Add -lgomp.  Add
    	COLLECT_MKOFFLOAD_OPTIONS.  Scan for -fopenacc and produce an empty
    	image if it is not set.  Scan for -save-temps.
    	* gcc.c (mkoffload_options): New static variable.
    	(display_help): Mention -Xoffload
    	(driver_handle_option): Handle it.
    	(add_mkoffload_option): New static function.
    	(set_collect_gcc_options): If offloading, set
    	COLLECT_MKOFFLOAD_OPTIONS.
    	* doc/invoke.texi (-Xoffload): Document.
    	* common.opt (Xoffload): New option.
    	* gcc.c (process_command): Use spec_machine rather than
    	spec_host_machine to build tooldir_prefix2.
    	gcc/fortran/
    	* gfortranspec.c (lang_specific_driver): Add -Xoffload options to
    	link -lm and -lgfortran.
    	libgcc/
    	* config.host (nvptx-*): For an offloading build, add libgomp.a
    	and libgomp.spec to extra_parts.
    	* config/nvptx/t-nvptx (gomp-acc_on_device.o, gomp-tids.o)
    	(gomp-atomic.o, libgomp.a, libgomp.spec): New rules.
    	(OBJS_libgomp): New variable.
    	* config/nvptx/gomp-acc_on_device.c: New file.
    	* config/nvptx/gomp-atomic.asm: Likewise.
    	* config/nvptx/gomp-tids.c: Likewise.
    	libgomp/
    	* oacc-ptx.h: Remove file.
    	* plugin/plugin-nvptx.c: Don't include it.
    	(link_ptx): Accept a length argument.  Don't add predefined bits of
    	PTX code.  Look for NUL characters as file boundaries in the input
    	and link the multiple PTX files.
    	(GOMP_OFFLOAD_load_image): Get the size of PTX code from the table
    	and pass it to link_ptx.
    	* testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Add
    	"-Xoffload -lgfortran -Xoffload -lm".
    	* testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags):
    	Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@223176 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                                 |  28 +
 gcc/common.opt                                     |   3 +
 gcc/config/nvptx/mkoffload.c                       | 766 +++------------------
 gcc/doc/invoke.texi                                |   7 +-
 gcc/fortran/ChangeLog.gomp                         |   5 +
 gcc/fortran/gfortranspec.c                         |   2 +
 gcc/gcc.c                                          |  36 +-
 libgcc/ChangeLog.gomp                              |  12 +
 libgcc/config.host                                 |   6 +-
 libgcc/config/nvptx/gomp-acc_on_device.c           |   9 +
 libgcc/config/nvptx/gomp-atomic.asm                |  37 +
 libgcc/config/nvptx/gomp-tids.c                    |  66 ++
 libgcc/config/nvptx/t-nvptx                        |  13 +
 libgomp/ChangeLog.gomp                             |  16 +
 libgomp/oacc-ptx.h                                 | 454 ------------
 libgomp/plugin/plugin-nvptx.c                      |  91 +--
 libgomp/testsuite/libgomp.fortran/fortran.exp      |   5 +-
 libgomp/testsuite/libgomp.oacc-fortran/fortran.exp |   5 +-
 18 files changed, 383 insertions(+), 1178 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index 4a46cdb..8ea9498 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,31 @@
+2015-05-13  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* config/nvptx/mkoffload.c (enum Kind, struct Token, enum Vis)
+	(struct Stmt): Remove.
+	(read_file, tokenize, write_token, write_tokens, alloc_stmt)
+	(alloc_comment, append_stmt, rev_stmts, write_stmt, write_stmts)
+	(parse_insn, parse_list_nosemi, parse_init, parse_file): Remove
+	functions and macros.
+	(decls, vars, fns): Remove variables.
+	(maybe_unlink): Use save_temps rather than debug to keep files.
+	(tool_cleanup): Unlink ptx_cfile_name and ptx_name.
+	(read_file): Accept a pointer to a length and store into it.
+	(process): Don't try to parse the input file, just write it out as a
+	string, but looking for maps.  Also write out the length.
+	(main): Don't use -S to compile ptx code.  Add -lgomp.  Add
+	COLLECT_MKOFFLOAD_OPTIONS.  Scan for -fopenacc and produce an empty
+	image if it is not set.  Scan for -save-temps.
+	* gcc.c (mkoffload_options): New static variable.
+	(display_help): Mention -Xoffload
+	(driver_handle_option): Handle it.
+	(add_mkoffload_option): New static function.
+	(set_collect_gcc_options): If offloading, set
+	COLLECT_MKOFFLOAD_OPTIONS.
+	* doc/invoke.texi (-Xoffload): Document.
+	* common.opt (Xoffload): New option.
+	* gcc.c (process_command): Use spec_machine rather than
+	spec_host_machine to build tooldir_prefix2.
+
 2015-05-11  Thomas Schwinge  <thomas@codesourcery.com>
 	    Bernd Schmidt  <bernds@codesourcery.com>
 	    Cesar Philippidis  <cesar@codesourcery.com>
diff --git gcc/common.opt gcc/common.opt
index 51833c1..cebbd01 100644
--- gcc/common.opt
+++ gcc/common.opt
@@ -741,6 +741,9 @@ Driver Separate
 Xlinker
 Driver Separate
 
+Xoffload
+Driver Separate
+
 Xpreprocessor
 Driver Separate
 
diff --git gcc/config/nvptx/mkoffload.c gcc/config/nvptx/mkoffload.c
index 8687154..b918cad 100644
--- gcc/config/nvptx/mkoffload.c
+++ gcc/config/nvptx/mkoffload.c
@@ -41,84 +41,12 @@ const char tool_name[] = "nvptx mkoffload";
 
 #define COMMENT_PREFIX "#"
 
-typedef enum Kind
-{
-  /* 0-ff used for single char tokens */
-  K_symbol = 0x100, /* a symbol */
-  K_label,  /* a label defn (i.e. symbol:) */
-  K_ident,  /* other ident */
-  K_dotted, /* dotted identifier */
-  K_number,
-  K_string,
-  K_comment
-} Kind;
-
-typedef struct Token
-{
-  unsigned short kind : 12;
-  unsigned short space : 1; /* preceded by space */
-  unsigned short end : 1;   /* succeeded by end of line */
-  /* Length of token */
-  unsigned short len;
-
-  /* Token itself */
-  char const *ptr;
-} Token;
-
-/* statement info */
-typedef enum Vis
-{
-  V_dot = 0,  /* random pseudo */
-  V_var = 1,  /* var decl/defn */
-  V_func = 2, /* func decl/defn */
-  V_insn = 3, /* random insn */
-  V_label = 4, /* label defn */
-  V_comment = 5,
-  V_pred = 6,  /* predicate */
-  V_mask = 0x7,
-  V_global = 0x08, /* globalize */
-  V_weak = 0x10,   /* weakly globalize */
-  V_no_eol = 0x20, /* no end of line */
-  V_prefix_comment = 0x40 /* prefixed comment */
-} Vis;
-
-typedef struct Stmt
-{
-  struct Stmt *next;
-  Token *tokens;
-  unsigned char vis;
-  unsigned len : 12;
-  unsigned sym : 12;
-} Stmt;
-
 struct id_map
 {
   id_map *next;
   char *ptx_name;
 };
 
-static const char *read_file (FILE *);
-static Token *tokenize (const char *);
-
-static void write_token (FILE *, const Token *);
-static void write_tokens (FILE *, const Token *, unsigned, int);
-
-static Stmt *alloc_stmt (unsigned, Token *, Token *, const Token *);
-#define alloc_comment(S,E) alloc_stmt (V_comment, S, E, 0)
-#define append_stmt(V, S) ((S)->next = *(V), *(V) = (S))
-static Stmt *rev_stmts (Stmt *);
-static void write_stmt (FILE *, const Stmt *);
-static void write_stmts (FILE *, const Stmt *);
-
-static Token *parse_insn (Token *);
-static Token *parse_list_nosemi (Token *);
-static Token *parse_init (Token *);
-static Token *parse_file (Token *);
-
-static Stmt *decls;
-static Stmt *vars;
-static Stmt *fns;
-
 static id_map *func_ids, **funcs_tail = &func_ids;
 static id_map *var_ids, **vars_tail = &var_ids;
 
@@ -136,7 +64,7 @@ bool target_ilp32 = false;
 void
 maybe_unlink (const char *file)
 {
-  if (! debug)
+  if (!save_temps)
     {
       if (unlink_if_ordinary (file)
 	  && errno != ENOENT)
@@ -149,6 +77,10 @@ maybe_unlink (const char *file)
 void
 tool_cleanup (bool)
 {
+  if (ptx_cfile_name)
+    maybe_unlink (ptx_cfile_name);
+  if (ptx_name)
+    maybe_unlink (ptx_name);
 }
 
 /* Add or change the value of an environment variable, outputting the
@@ -184,7 +116,7 @@ record_id (const char *p1, id_map ***where)
    remember, there could be a NUL in the file itself.  */
 
 static const char *
-read_file (FILE *stream)
+read_file (FILE *stream, size_t *plen)
 {
   size_t alloc = 16384;
   size_t base = 0;
@@ -214,557 +146,10 @@ read_file (FILE *stream)
 	}
     }
   buffer[base] = 0;
+  *plen = base;
   return buffer;
 }
 
-/* Read a token, advancing ptr.
-   If we read a comment, append it to the comments block. */
-
-static Token *
-tokenize (const char *ptr)
-{
-  unsigned alloc = 1000;
-  unsigned num = 0;
-  Token *toks = XNEWVEC (Token, alloc);
-  int in_comment = 0;
-  int not_comment = 0;
-
-  for (;; num++)
-    {
-      const char *base;
-      unsigned kind;
-      int ws = 0;
-      int eol = 0;
-
-    again:
-      base = ptr;
-      if (in_comment)
-	goto block_comment;
-      switch (kind = *ptr++)
-	{
-	default:
-	  break;
-
-	case '\n':
-	  eol = 1;
-	  /* Fall through */
-	case ' ':
-	case '\t':
-	case '\r':
-	case '\v':
-	  /* White space */
-	  ws = not_comment;
-	  goto again;
-
-	case '/':
-	  {
-	    if (*ptr == '/')
-	      {
-		/* line comment.  Do not include trailing \n */
-		base += 2;
-		for (; *ptr; ptr++)
-		  if (*ptr == '\n')
-		    break;
-		kind = K_comment;
-	      }
-	    else if (*ptr == '*')
-	      {
-		/* block comment */
-		base += 2;
-		ptr++;
-
-	      block_comment:
-		eol = in_comment;
-		in_comment = 1;
-		for (; *ptr; ptr++)
-		  {
-		    if (*ptr == '\n')
-		      {
-			ptr++;
-			break;
-		      }
-		    if (ptr[0] == '*' && ptr[1] == '/')
-		      {
-			in_comment = 2;
-			ptr += 2;
-			break;
-		      }
-		  }
-		kind = K_comment;
-	      }
-	    else
-	      break;
-	  }
-	  break;
-
-	case '"':
-	  /* quoted string */
-	  kind = K_string;
-	  while (*ptr)
-	    if (*ptr == '"')
-	      {
-		ptr++;
-		break;
-	      }
-	    else if (*ptr++ == '\\')
-	      ptr++;
-	  break;
-
-	case '.':
-	  if (*ptr < '0' || *ptr > '9')
-	    {
-	      kind = K_dotted;
-	      ws = not_comment;
-	      goto ident;
-	    }
-	  /* FALLTHROUGH */
-	case '0'...'9':
-	  kind = K_number;
-	  goto ident;
-	  break;
-
-	case '$':  /* local labels.  */
-	case '%':  /* register names, pseudoes etc */
-	  kind = K_ident;
-	  goto ident;
-
-	case 'a'...'z':
-	case 'A'...'Z':
-	case '_':
-	  kind = K_symbol; /* possible symbol name */
-	ident:
-	  for (; *ptr; ptr++)
-	    {
-	      if (*ptr >= 'A' && *ptr <= 'Z')
-		continue;
-	      if (*ptr >= 'a' && *ptr <= 'z')
-		continue;
-	      if (*ptr >= '0' && *ptr <= '9')
-		continue;
-	      if (*ptr == '_' || *ptr == '$')
-		continue;
-	      if (*ptr == '.' && kind != K_dotted)
-		/* Idents starting with a dot, cannot have internal dots. */
-		continue;
-	      if ((*ptr == '+' || *ptr == '-')
-		  && kind == K_number
-		  && (ptr[-1] == 'e' || ptr[-1] == 'E'
-		      || ptr[-1] == 'p' || ptr[-1] == 'P'))
-		/* exponent */
-		continue;
-	      break;
-	    }
-	  if (*ptr == ':')
-	    {
-	      ptr++;
-	      kind = K_label;
-	    }
-	  break;
-	}
-
-      if (alloc == num)
-	{
-	  alloc *= 2;
-	  toks = XRESIZEVEC (Token, toks, alloc);
-	}
-      Token *tok = toks + num;
-
-      tok->kind = kind;
-      tok->space = ws;
-      tok->end = 0;
-      tok->ptr = base;
-      tok->len = ptr - base - in_comment;
-      in_comment &= 1;
-      not_comment = kind != K_comment;
-      if (eol && num)
-	tok[-1].end = 1;
-      if (!kind)
-	break;
-    }
-
-  return toks;
-}
-
-/* Write an encoded token. */
-
-static void
-write_token (FILE *out, Token const *tok)
-{
-  if (tok->space)
-    fputc (' ', out);
-
-  switch (tok->kind)
-    {
-    case K_string:
-      {
-	const char *c = tok->ptr + 1;
-	size_t len = tok->len - 2;
-
-	fputs ("\\\"", out);
-	while (len)
-	  {
-	    const char *bs = (const char *)memchr (c, '\\', len);
-	    size_t l = bs ? bs - c : len;
-
-	    fprintf (out, "%.*s", (int)l, c);
-	    len -= l;
-	    c += l;
-	    if (bs)
-	      {
-		fputs ("\\\\", out);
-		len--, c++;
-	      }
-	  }
-	fputs ("\\\"", out);
-      }
-      break;
-
-    default:
-      /* All other tokens shouldn't have anything magic in them */
-      fprintf (out, "%.*s", tok->len, tok->ptr);
-      break;
-    }
-  if (tok->end)
-    fputs ("\\n", out);
-}
-
-static void
-write_tokens (FILE *out, Token const *toks, unsigned len, int spc)
-{
-  fputs ("\t\"", out);
-  for (; len--; toks++)
-    write_token (out, toks);
-  if (spc)
-    fputs (" ", out);
-  fputs ("\"", out);
-}
-
-static Stmt *
-alloc_stmt (unsigned vis, Token *tokens, Token *end, Token const *sym)
-{
-  static unsigned alloc = 0;
-  static Stmt *heap = 0;
-
-  if (!alloc)
-    {
-      alloc = 1000;
-      heap = XNEWVEC (Stmt, alloc);
-    }
-
-  Stmt *stmt = heap++;
-  alloc--;
-
-  tokens->space = 0;
-  stmt->next = 0;
-  stmt->vis = vis;
-  stmt->tokens = tokens;
-  stmt->len = end - tokens;
-  stmt->sym = sym ? sym - tokens : ~0;
-
-  return stmt;
-}
-
-static Stmt *
-rev_stmts (Stmt *stmt)
-{
-  Stmt *prev = 0;
-  Stmt *next;
-
-  while (stmt)
-    {
-      next = stmt->next;
-      stmt->next = prev;
-      prev = stmt;
-      stmt = next;
-    }
-
-  return prev;
-}
-
-static void
-write_stmt (FILE *out, const Stmt *stmt)
-{
-  if ((stmt->vis & V_mask) != V_comment)
-    {
-      write_tokens (out, stmt->tokens, stmt->len,
-		    (stmt->vis & V_mask) == V_pred);
-      fputs (stmt->vis & V_no_eol ? "\t" : "\n", out);
-    }
-}
-
-static void
-write_stmts (FILE *out, const Stmt *stmts)
-{
-  for (; stmts; stmts = stmts->next)
-    write_stmt (out, stmts);
-}
-
-static Token *
-parse_insn (Token *tok)
-{
-  unsigned depth = 0;
-
-  do
-    {
-      Stmt *stmt;
-      Token *sym = 0;
-      unsigned s = V_insn;
-      Token *start = tok;
-
-      switch (tok++->kind)
-	{
-	case K_comment:
-	  while (tok->kind == K_comment)
-	    tok++;
-	  stmt = alloc_comment (start, tok);
-	  append_stmt (&fns, stmt);
-	  continue;
-
-	case '{':
-	  depth++;
-	  break;
-
-	case '}':
-	  depth--;
-	  break;
-
-	case K_label:
-	  if (tok[-1].ptr[0] != '$')
-	    sym = tok - 1;
-	  tok[-1].end = 1;
-	  s = V_label;
-	  break;
-
-	case '@':
-	  tok->space = 0;
-	  if (tok->kind == '!')
-	    tok++;
-	  if (tok->kind == K_symbol)
-	    sym = tok;
-	  tok++;
-	  s = V_pred;
-	  break;
-
-	default:
-	  for (; tok->kind != ';'; tok++)
-	    {
-	      if (tok->kind == ',')
-		tok[1].space = 0;
-	      else if (tok->kind == K_symbol)
-		sym = tok;
-	    }
-	  tok++->end = 1;
-	  break;
-	}
-
-      stmt = alloc_stmt (s, start, tok, sym);
-      append_stmt (&fns, stmt);
-
-      if (!tok[-1].end && tok[0].kind == K_comment)
-	{
-	  stmt->vis |= V_no_eol;
-	  stmt = alloc_comment (tok, tok + 1);
-	  append_stmt (&fns, stmt);
-	  tok++;
-	}
-    }
-  while (depth);
-
-  return tok;
-}
-
-/* comma separated list of tokens */
-
-static Token *
-parse_list_nosemi (Token *tok)
-{
-  Token *start = tok;
-
-  do
-    if (!(++tok)->kind)
-      break;
-  while ((++tok)->kind == ',');
-
-  tok[-1].end = 1;
-  Stmt *stmt = alloc_stmt (V_dot, start, tok, 0);
-  append_stmt (&decls, stmt);
-
-  return tok;
-}
-
-#define is_keyword(T,S) \
-  (sizeof (S) == (T)->len && !memcmp ((T)->ptr + 1, (S), (T)->len - 1))
-
-static Token *
-parse_init (Token *tok)
-{
-  for (;;)
-    {
-      Token *start = tok;
-      Token const *sym = 0;
-      Stmt *stmt;
-
-      if (tok->kind == K_comment)
-	{
-	  while (tok->kind == K_comment)
-	    tok++;
-	  stmt = alloc_comment (start, tok);
-	  append_stmt (&vars, stmt);
-	  start = tok;
-	}
-
-      if (tok->kind == '{')
-	tok[1].space = 0;
-      for (; tok->kind != ',' && tok->kind != ';'; tok++)
-	if (tok->kind == K_symbol)
-	  sym = tok;
-      tok[1].space = 0;
-      int end = tok++->kind == ';';
-      stmt = alloc_stmt (V_insn, start, tok, sym);
-      append_stmt (&vars, stmt);
-      if (!tok[-1].end && tok->kind == K_comment)
-	{
-	  stmt->vis |= V_no_eol;
-	  stmt = alloc_comment (tok, tok + 1);
-	  append_stmt (&vars, stmt);
-	  tok++;
-	}
-      if (end)
-	break;
-    }
-  return tok;
-}
-
-static Token *
-parse_file (Token *tok)
-{
-  Stmt *comment = 0;
-
-  if (tok->kind == K_comment)
-    {
-      Token *start = tok;
-
-      while (tok->kind == K_comment)
-	{
-	  if (strncmp (tok->ptr, ":VAR_MAP ", 9) == 0)
-	    record_id (tok->ptr + 9, &vars_tail);
-	  if (strncmp (tok->ptr, ":FUNC_MAP ", 10) == 0)
-	    record_id (tok->ptr + 10, &funcs_tail);
-	  tok++;
-	}
-      comment = alloc_comment (start, tok);
-      comment->vis |= V_prefix_comment;
-    }
-
-  if (tok->kind == K_dotted)
-    {
-      if (is_keyword (tok, "version")
-	  || is_keyword (tok, "target")
-	  || is_keyword (tok, "address_size"))
-	{
-	  if (comment)
-	    append_stmt (&decls, comment);
-	  tok = parse_list_nosemi (tok);
-	}
-      else
-	{
-	  unsigned vis = 0;
-	  const Token *def = 0;
-	  unsigned is_decl = 0;
-	  Token *start;
-
-	  for (start = tok;
-	       tok->kind && tok->kind != '=' && tok->kind != K_comment
-		 && tok->kind != '{' && tok->kind != ';'; tok++)
-	    {
-	      if (is_keyword (tok, "global")
-		  || is_keyword (tok, "const"))
-		vis |= V_var;
-	      else if (is_keyword (tok, "func")
-		       || is_keyword (tok, "entry"))
-		vis |= V_func;
-	      else if (is_keyword (tok, "visible"))
-		vis |= V_global;
-	      else if (is_keyword (tok, "extern"))
-		is_decl = 1;
-	      else if (is_keyword (tok, "weak"))
-		vis |= V_weak;
-	      if (tok->kind == '(')
-		{
-		  tok[1].space = 0;
-		  tok[0].space = 1;
-		}
-	      else if (tok->kind == ')' && tok[1].kind != ';')
-		tok[1].space = 1;
-
-	      if (tok->kind == K_symbol)
-		def = tok;
-	    }
-
-	  if (!tok->kind)
-	    {
-	      /* end of file */
-	      if (comment)
-		append_stmt (&fns, comment);
-	    }
-	  else if (tok->kind == '{'
-		   || tok->kind == K_comment)
-	    {
-	      /* function defn */
-	      Stmt *stmt = alloc_stmt (vis, start, tok, def);
-	      if (comment)
-		{
-		  append_stmt (&fns, comment);
-		  stmt->vis |= V_prefix_comment;
-		}
-	      append_stmt (&fns, stmt);
-	      tok = parse_insn (tok);
-	    }
-	  else
-	    {
-	      int assign = tok->kind == '=';
-
-	      tok++->end = 1;
-	      if ((vis & V_mask) == V_var && !is_decl)
-		{
-		  /* variable */
-		  Stmt *stmt = alloc_stmt (vis, start, tok, def);
-		  if (comment)
-		    {
-		      append_stmt (&vars, comment);
-		      stmt->vis |= V_prefix_comment;
-		    }
-		  append_stmt (&vars, stmt);
-		  if (assign)
-		    tok = parse_init (tok);
-		}
-	      else
-		{
-		  /* declaration */
-		  Stmt *stmt = alloc_stmt (vis, start, tok, 0);
-		  if (comment)
-		    {
-		      append_stmt (&decls, comment);
-		      stmt->vis |= V_prefix_comment;
-		    }
-		  append_stmt (&decls, stmt);
-		}
-	    }
-	}
-    }
-  else
-    {
-      /* Something strange.  Ignore it.  */
-      if (comment)
-	append_stmt (&fns, comment);
-
-      do
-	tok++;
-      while (tok->kind && !tok->end);
-    }
-  return tok;
-}
-
 /* Parse STR, saving found tokens into PVALUES and return their number.
    Tokens are assumed to be delimited by ':'.  */
 static unsigned
@@ -840,19 +225,50 @@ access_check (const char *name, int mode)
 static void
 process (FILE *in, FILE *out)
 {
-  const char *input = read_file (in);
-  Token *tok = tokenize (input);
+  size_t len;
+  const char *input = read_file (in, &len);
+
+  fprintf (out, "static const char ptx_code[] = \n \"");
+  for (size_t i = 0; i < len; i++)
+    {
+      char c = input[i];
+      bool nl = false;
+      switch (c)
+	{
+	case '\0':
+	  putc ('\\', out);
+	  c = '0';
+	  break;
+	case '\r':
+	  continue;
+	case '\n':
+	  putc ('\\', out);
+	  c = 'n';
+	  nl = true;
+	  break;
+	case '"':
+	case '\\':
+	  putc ('\\', out);
+	  break;
+
+	case '/':
+	  if (strncmp (input + i, "//:VAR_MAP ", 11) == 0)
+	    record_id (input + i + 11, &vars_tail);
+	  if (strncmp (input + i, "//:FUNC_MAP ", 12) == 0)
+	    record_id (input + i + 12, &funcs_tail);
+	  break;
+
+	default:
+	  break;
+	}
+      putc (c, out);
+      if (nl)
+	fputs ("\"\n\t\"", out);
+    }
+  fprintf (out, "\";\n\n");
+
   unsigned int nvars = 0, nfuncs = 0;
 
-  do
-    tok = parse_file (tok);
-  while (tok->kind);
-
-  fprintf (out, "static const char ptx_code[] = \n");
-  write_stmts (out, rev_stmts (decls));
-  write_stmts (out, rev_stmts (vars));
-  write_stmts (out, rev_stmts (fns));
-  fprintf (out, ";\n\n");
   fprintf (out, "static const char *var_mappings[] = {\n");
   for (id_map *id = var_ids; id; id = id->next, nvars++)
     fprintf (out, "\t\"%s\"%s\n", id->ptx_name, id->next ? "," : "");
@@ -863,8 +279,9 @@ process (FILE *in, FILE *out)
   fprintf (out, "};\n\n");
 
   fprintf (out, "static const void *target_data[] = {\n");
-  fprintf (out, "  ptx_code, (void*) %u, var_mappings, (void*) %u, "
-		"func_mappings\n", nvars, nfuncs);
+  fprintf (out, "  ptx_code, (void *)(__UINTPTR_TYPE__)sizeof (ptx_code),\n");
+  fprintf (out, "  (void *) %u, var_mappings, (void *) %u, func_mappings\n",
+	   nvars, nfuncs);
   fprintf (out, "};\n\n");
 
   fprintf (out, "extern void GOMP_offload_register (const void *, int, void *);\n");
@@ -983,47 +400,74 @@ main (int argc, char **argv)
   obstack_ptr_grow (&argv_obstack, driver);
   obstack_ptr_grow (&argv_obstack, "-xlto");
   obstack_ptr_grow (&argv_obstack, target_ilp32 ? "-m32" : "-m64");
-  obstack_ptr_grow (&argv_obstack, "-S");
+  obstack_ptr_grow (&argv_obstack, "-lgomp");
+  char *collect_mkoffload_opts = getenv ("COLLECT_MKOFFLOAD_OPTIONS");
+  if (collect_mkoffload_opts)
+    {
+      char *str = collect_mkoffload_opts;
+      char *p;
+      while ((p = strchr (str, ' ')) != 0)
+	{
+	  *p = '\0';
+	  obstack_ptr_grow (&argv_obstack, str);
+	  str = p + 1;
+	}
+      obstack_ptr_grow (&argv_obstack, str);
+    }
 
+  bool fopenacc = false;
   for (int ix = 1; ix != argc; ix++)
     {
+      if (!strcmp (argv[ix], "-v"))
+	verbose = true;
+      else if (!strcmp (argv[ix], "-save-temps"))
+	save_temps = true;
+      else if (!strcmp (argv[ix], "-fopenacc"))
+	fopenacc = true;
+
       if (!strcmp (argv[ix], "-o") && ix + 1 != argc)
 	outname = argv[++ix];
       else
 	obstack_ptr_grow (&argv_obstack, argv[ix]);
     }
 
-  ptx_name = make_temp_file (".mkoffload");
-  obstack_ptr_grow (&argv_obstack, "-o");
-  obstack_ptr_grow (&argv_obstack, ptx_name);
-  obstack_ptr_grow (&argv_obstack, NULL);
-  const char **new_argv = XOBFINISH (&argv_obstack, const char **);
-
-  char *execpath = getenv ("GCC_EXEC_PREFIX");
-  char *cpath = getenv ("COMPILER_PATH");
-  char *lpath = getenv ("LIBRARY_PATH");
-  unsetenv ("GCC_EXEC_PREFIX");
-  unsetenv ("COMPILER_PATH");
-  unsetenv ("LIBRARY_PATH");
-
-  fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
-  obstack_free (&argv_obstack, NULL);
-
-  xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
-  xputenv (concat ("COMPILER_PATH=", cpath, NULL));
-  xputenv (concat ("LIBRARY_PATH=", lpath, NULL));
-
-  in = fopen (ptx_name, "r");
-  if (!in)
-    fatal_error (input_location, "cannot open intermediate ptx file");
-
   ptx_cfile_name = make_temp_file (".c");
 
   out = fopen (ptx_cfile_name, "w");
   if (!out)
     fatal_error (input_location, "cannot open '%s'", ptx_cfile_name);
 
-  process (in, out);
+  /* We do not support OMP offloading. Don't generate an offload image
+     if we did not see -fopenacc.  */
+  if (fopenacc)
+    {
+      ptx_name = make_temp_file (".mkoffload");
+      obstack_ptr_grow (&argv_obstack, "-o");
+      obstack_ptr_grow (&argv_obstack, ptx_name);
+      obstack_ptr_grow (&argv_obstack, NULL);
+      const char **new_argv = XOBFINISH (&argv_obstack, const char **);
+
+      char *execpath = getenv ("GCC_EXEC_PREFIX");
+      char *cpath = getenv ("COMPILER_PATH");
+      char *lpath = getenv ("LIBRARY_PATH");
+      unsetenv ("GCC_EXEC_PREFIX");
+      unsetenv ("COMPILER_PATH");
+      unsetenv ("LIBRARY_PATH");
+
+      fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
+      obstack_free (&argv_obstack, NULL);
+
+      xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
+      xputenv (concat ("COMPILER_PATH=", cpath, NULL));
+      xputenv (concat ("LIBRARY_PATH=", lpath, NULL));
+
+      in = fopen (ptx_name, "r");
+      if (!in)
+	fatal_error (input_location, "cannot open intermediate ptx file");
+
+      process (in, out);
+    }
+
   fclose (out);
 
   compile_native (ptx_cfile_name, outname, collect_gcc);
diff --git gcc/doc/invoke.texi gcc/doc/invoke.texi
index 9c8aa99..d3ce92b 100644
--- gcc/doc/invoke.texi
+++ gcc/doc/invoke.texi
@@ -490,7 +490,7 @@ Objective-C and Objective-C++ Dialects}.
 -static-libmpx -static-libmpxwrappers @gol
 -shared -shared-libgcc  -symbolic @gol
 -T @var{script}  -Wl,@var{option}  -Xlinker @var{option} @gol
--u @var{symbol} -z @var{keyword}}
+-Xoffload @var{option} -u @var{symbol} -z @var{keyword}}
 
 @item Directory Options
 @xref{Directory Options,,Options for Directory Search}.
@@ -11404,6 +11404,11 @@ syntax than as separate arguments.  For example, you can specify
 @option{-Xlinker -Map -Xlinker output.map}.  Other linkers may not support
 this syntax for command-line options.
 
+@item -Xoffload @var{option}
+@opindex Xoffload
+Pass @var{option} as an option to the mkoffload program during the linking
+phase.  This program is used to generate images for offloaded code.
+
 @item -Wl,@var{option}
 @opindex Wl
 Pass @var{option} as an option to the linker.  If @var{option} contains
diff --git gcc/fortran/ChangeLog.gomp gcc/fortran/ChangeLog.gomp
index deeefd4..76af137 100644
--- gcc/fortran/ChangeLog.gomp
+++ gcc/fortran/ChangeLog.gomp
@@ -1,3 +1,8 @@
+2015-05-13  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* gfortranspec.c (lang_specific_driver): Add -Xoffload options to
+	link -lm and -lgfortran.
+
 2015-05-11  Thomas Schwinge  <thomas@codesourcery.com>
 	    Bernd Schmidt  <bernds@codesourcery.com>
 	    Cesar Philippidis  <cesar@codesourcery.com>
diff --git gcc/fortran/gfortranspec.c gcc/fortran/gfortranspec.c
index 8af4c76..dd59e1c 100644
--- gcc/fortran/gfortranspec.c
+++ gcc/fortran/gfortranspec.c
@@ -403,6 +403,8 @@ For more information about these matters, see the file named COPYING\n\n"));
 	default:
 	  break;
 	}
+      append_option (OPT_Xoffload, "-lm", 1);
+      append_option (OPT_Xoffload, "-lgfortran", 1);
     }
 
 #ifdef ENABLE_SHARED_LIBGCC
diff --git gcc/gcc.c gcc/gcc.c
index d956c36..c06322f 100644
--- gcc/gcc.c
+++ gcc/gcc.c
@@ -1184,6 +1184,11 @@ static vec<char_p> assembler_options;
    These options are accumulated by -Wp,
    and substituted into the preprocessor command with %Z.  */
 static vec<char_p> preprocessor_options;
+
+/* A vector of options to give to mkoffload.
+   These options are accumulated by -Xoffload and place in the
+   COLLECT_MKOFFLOAD_OPTIONS variable.  */
+static vec<char_p> mkoffload_options;
 \f
 static char *
 skip_whitespace (char *p)
@@ -3202,6 +3207,7 @@ display_help (void)
   fputs (_("  -Xassembler <arg>        Pass <arg> on to the assembler\n"), stdout);
   fputs (_("  -Xpreprocessor <arg>     Pass <arg> on to the preprocessor\n"), stdout);
   fputs (_("  -Xlinker <arg>           Pass <arg> on to the linker\n"), stdout);
+  fputs (_("  -Xoffload <arg>          Pass <arg> to mkoffload via an environment variable\n"), stdout);
   fputs (_("  -save-temps              Do not delete intermediate files\n"), stdout);
   fputs (_("  -save-temps=<arg>        Do not delete intermediate files\n"), stdout);
   fputs (_("\
@@ -3257,6 +3263,12 @@ add_linker_option (const char *option, int len)
 {
   linker_options.safe_push (save_string (option, len));
 }
+
+static void
+add_mkoffload_option (const char *option, int len)
+{
+  mkoffload_options.safe_push (save_string (option, len));
+}
 \f
 /* Allocate space for an input file in infiles.  */
 
@@ -3696,6 +3708,11 @@ driver_handle_option (struct gcc_options *opts,
       do_save = false;
       break;
 
+    case OPT_Xoffload:
+      add_mkoffload_option (arg, strlen (arg));
+      do_save = false;
+      break;
+
     case OPT_Xpreprocessor:
       add_preprocessor_option (arg, strlen (arg));
       do_save = false;
@@ -4266,7 +4283,7 @@ process_command (unsigned int decoded_options_count,
     }
 
   gcc_assert (!IS_ABSOLUTE_PATH (tooldir_base_prefix));
-  tooldir_prefix2 = concat (tooldir_base_prefix, spec_host_machine,
+  tooldir_prefix2 = concat (tooldir_base_prefix, spec_machine,
 			    dir_separator_str, NULL);
 
   /* Look for tools relative to the location from which the driver is
@@ -4391,6 +4408,23 @@ set_collect_gcc_options (void)
     }
   obstack_grow (&collect_obstack, "\0", 1);
   xputenv (XOBFINISH (&collect_obstack, char *));
+
+#ifdef ENABLE_OFFLOADING
+  /* Build COLLECT_MKOFFLOAD_OPTIONS to have all of the options specified to
+     mkoffload.  */
+  obstack_grow (&collect_obstack, "COLLECT_MKOFFLOAD_OPTIONS=",
+		sizeof ("COLLECT_MKOFFLOAD_OPTIONS=") - 1);
+
+  char_p opt;
+  FOR_EACH_VEC_ELT (mkoffload_options, i, opt)
+    {
+      if (i > 0)
+	obstack_grow (&collect_obstack, " ", 1);
+      obstack_grow (&collect_obstack, opt, strlen (opt));
+    }
+  obstack_grow (&collect_obstack, "\0", 1);
+  xputenv (XOBFINISH (&collect_obstack, char *));
+#endif
 }
 \f
 /* Process a spec string, accumulating and running commands.  */
diff --git libgcc/ChangeLog.gomp libgcc/ChangeLog.gomp
index bcc4c67..d872575 100644
--- libgcc/ChangeLog.gomp
+++ libgcc/ChangeLog.gomp
@@ -1,3 +1,15 @@
+2015-05-13  Bernd Schmidt  <bernds@codesourcery.com>
+	    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* config.host (nvptx-*): For an offloading build, add libgomp.a
+	and libgomp.spec to extra_parts.
+	* config/nvptx/t-nvptx (gomp-acc_on_device.o, gomp-tids.o)
+	(gomp-atomic.o, libgomp.a, libgomp.spec): New rules.
+	(OBJS_libgomp): New variable.
+	* config/nvptx/gomp-acc_on_device.c: New file.
+	* config/nvptx/gomp-atomic.asm: Likewise.
+	* config/nvptx/gomp-tids.c: Likewise.
+
 2014-09-08  Thomas Schwinge  <thomas@codesourcery.com>
 
 	* configure.ac (enable_accelerator, offload_targets): Remove.
diff --git libgcc/config.host libgcc/config.host
index d558c38..03cac35 100644
--- libgcc/config.host
+++ libgcc/config.host
@@ -1292,7 +1292,11 @@ mep*-*-*)
 	;;
 nvptx-*)
 	tmake_file="$tmake_file nvptx/t-nvptx"
-	extra_parts="crt0.o"
+	if test "x${enable_as_accelerator_for}" != x; then
+		extra_parts="crt0.o libgomp.a libgomp.spec"
+	else
+		extra_parts="crt0.o"
+	fi
 	;;
 *)
 	echo "*** Configuration ${host} not supported" 1>&2
diff --git libgcc/config/nvptx/gomp-acc_on_device.c libgcc/config/nvptx/gomp-acc_on_device.c
new file mode 100644
index 0000000..e4278f9
--- /dev/null
+++ libgcc/config/nvptx/gomp-acc_on_device.c
@@ -0,0 +1,9 @@
+int acc_on_device(int d)
+{
+  return __builtin_acc_on_device(d);
+}
+
+int acc_on_device_h_(int *d)
+{
+  return acc_on_device(*d);
+}
diff --git libgcc/config/nvptx/gomp-atomic.asm libgcc/config/nvptx/gomp-atomic.asm
new file mode 100644
index 0000000..ae9d925
--- /dev/null
+++ libgcc/config/nvptx/gomp-atomic.asm
@@ -0,0 +1,37 @@
+
+// BEGIN PREAMBLE
+	.version	3.1
+	.target	sm_30
+	.address_size 64
+	.extern .shared .u8 sdata[];
+// END PREAMBLE
+
+// BEGIN VAR DEF: libgomp_ptx_lock
+.global .align 4 .u32 libgomp_ptx_lock;
+
+// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_start
+.visible .func GOMP_atomic_start;
+// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_start
+.visible .func GOMP_atomic_start
+{
+	.reg .pred 	%p<2>;
+	.reg .s32 	%r<2>;
+	.reg .s64 	%rd<2>;
+BB5_1:
+	mov.u64 	%rd1, libgomp_ptx_lock;
+	atom.global.cas.b32 	%r1, [%rd1], 0, 1;
+	setp.ne.s32	%p1, %r1, 0;
+	@%p1 bra 	BB5_1;
+	ret;
+	}
+// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_end
+.visible .func GOMP_atomic_end;
+// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_end
+.visible .func GOMP_atomic_end
+{
+	.reg .s32 	%r<2>;
+	.reg .s64 	%rd<2>;
+	mov.u64 	%rd1, libgomp_ptx_lock;
+	atom.global.exch.b32 	%r1, [%rd1], 0;
+	ret;
+	}
diff --git libgcc/config/nvptx/gomp-tids.c libgcc/config/nvptx/gomp-tids.c
new file mode 100644
index 0000000..b017b0d
--- /dev/null
+++ libgcc/config/nvptx/gomp-tids.c
@@ -0,0 +1,66 @@
+/* Each gang consists of 'worker' threads.  Each worker has 'vector'
+   threads.
+
+   gang, worker and vector mapping functions:
+
+   *tid (0) => vector dimension
+   *tid (1) => worker dimension
+   *ctaid (0) = gang dimension
+
+   FIXME: these functions assume that the gang, worker and vector parameters
+   are 0 or 1.  To generalize these functions, we should use -1 to indicate,
+   say, that a gang clause was used without its optional argument.  In this
+   case, gang should correspond to ctaid(0), i.e., the num_gangs parameter
+   passed to cuLaunchKernel.
+
+   tid = [0, ntid-1]
+   ntid = [1...threads_per_dimension]
+*/
+
+int __attribute__ ((used))
+GOACC_get_num_threads (int gang, int worker, int vector)
+{
+  int vsize = vector * __builtin_GOACC_ntid (0);
+  int wsize = worker * __builtin_GOACC_ntid (1);
+  int gsize = gang * __builtin_GOACC_nctaid (0);
+  int size = 1;
+
+  if (vector)
+    size *= __builtin_GOACC_ntid (0);
+
+  if (worker)
+    size *= __builtin_GOACC_ntid (1);
+
+  if (gang)
+    size *= __builtin_GOACC_nctaid (0);
+
+  return size;
+}
+
+int __attribute__ ((used))
+GOACC_get_thread_num (int gang, int worker, int vector)
+{
+  int tid = 0;
+  int ws = __builtin_GOACC_ntid (1);
+  int vs = __builtin_GOACC_ntid (0);
+  int gid = __builtin_GOACC_ctaid (0);
+  int wid = __builtin_GOACC_tid (1);
+  int vid = __builtin_GOACC_tid (0);
+
+  if (gang && worker && vector)
+    tid = gid * ws * vs + vs * wid + vid;
+  else if (gang && !worker && vector)
+    tid = vs * gid + vid;
+  else if (gang && worker && !vector)
+    tid = ws * gid + wid;
+  else if (!gang && worker && vector)
+    tid = vs * wid + vid;
+  else if (!gang && !worker && vector)
+    tid = vid;
+  else if (!gang && worker && !vector)
+    tid = wid;
+  else if (gang && !worker && !vector)
+    tid = gid;
+
+  return tid;
+}
diff --git libgcc/config/nvptx/t-nvptx libgcc/config/nvptx/t-nvptx
index 34d68cc..a9e56a9 100644
--- libgcc/config/nvptx/t-nvptx
+++ libgcc/config/nvptx/t-nvptx
@@ -12,3 +12,16 @@ crt0.o: $(srcdir)/config/nvptx/crt0.s
 # support it, and it may cause the build to fail, because of alloca usage, for
 # example.
 INHIBIT_LIBC_CFLAGS = -Dinhibit_libc
+
+gomp-acc_on_device.o: $(srcdir)/config/nvptx/gomp-acc_on_device.c
+	$(gcc_compile) -c -fno-builtin-acc_on_device $<
+gomp-tids.o: $(srcdir)/config/nvptx/gomp-tids.c
+	$(gcc_compile) -c -fopenacc -O $<
+gomp-atomic.o: $(srcdir)/config/nvptx/gomp-atomic.asm
+	cp $< $@
+
+OBJS_libgomp= gomp-acc_on_device.o gomp-tids.o gomp-atomic.o
+libgomp.a: $(OBJS_libgomp)
+	$(AR_CREATE_FOR_TARGET) $@ $(OBJS_libgomp)
+libgomp.spec:
+	echo "*link_gomp: -lgomp" >$@
diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index f20cab3..6ce67c6 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,3 +1,19 @@
+2015-05-13  Thomas Schwinge  <thomas@codesourcery.com>
+	    Bernd Schmidt  <bernds@codesourcery.com>
+	    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* oacc-ptx.h: Remove file.
+	* plugin/plugin-nvptx.c: Don't include it.
+	(link_ptx): Accept a length argument.  Don't add predefined bits of
+	PTX code.  Look for NUL characters as file boundaries in the input
+	and link the multiple PTX files.
+	(GOMP_OFFLOAD_load_image): Get the size of PTX code from the table
+	and pass it to link_ptx.
+	* testsuite/libgomp.fortran/fortran.exp (lang_link_flags): Add
+	"-Xoffload -lgfortran -Xoffload -lm".
+	* testsuite/libgomp.oacc-fortran/fortran.exp (lang_link_flags):
+	Likewise.
+
 2015-05-11  Thomas Schwinge  <thomas@codesourcery.com>
 	    Bernd Schmidt  <bernds@codesourcery.com>
 	    Cesar Philippidis  <cesar@codesourcery.com>
diff --git libgomp/oacc-ptx.h libgomp/oacc-ptx.h
deleted file mode 100644
index 104f297..0000000
--- libgomp/oacc-ptx.h
+++ /dev/null
@@ -1,454 +0,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
-   Contributed by Mentor Embedded.
-
-   This file is part of the GNU Offloading and Multi Processing Library
-   (libgomp).
-
-   Libgomp is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define ABORT_PTX				\
-  ".version 3.1\n"				\
-  ".target sm_30\n"				\
-  ".address_size 64\n"				\
-  ".visible .func abort;\n"			\
-  ".visible .func abort\n"			\
-  "{\n"						\
-  "trap;\n"					\
-  "ret;\n"					\
-  "}\n"						\
-  ".visible .func _gfortran_abort;\n"		\
-  ".visible .func _gfortran_abort\n"		\
-  "{\n"						\
-  "trap;\n"					\
-  "ret;\n"					\
-  "}\n" \
-
-/* Generated with:
-
-   $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline
-*/
-#define ACC_ON_DEVICE_PTX						\
-  "        .version        3.1\n"					\
-  "        .target sm_30\n"						\
-  "        .address_size 64\n"						\
-  ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \
-  ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \
-  "{\n"									\
-  "        .reg.u32 %ar1;\n"						\
-  ".reg.u32 %retval;\n"							\
-  "        .reg.u64 %hr10;\n"						\
-  "        .reg.u32 %r24;\n"						\
-  "        .reg.u32 %r25;\n"						\
-  "        .reg.pred %r27;\n"						\
-  "        .reg.u32 %r30;\n"						\
-  "        ld.param.u32 %ar1, [%in_ar1];\n"				\
-  "                mov.u32 %r24, %ar1;\n"				\
-  "                setp.ne.u32 %r27,%r24,4;\n"				\
-  "                set.u32.eq.u32 %r30,%r24,5;\n"			\
-  "                neg.s32 %r25, %r30;\n"				\
-  "        @%r27   bra     $L3;\n"					\
-  "                mov.u32 %r25, 1;\n"					\
-  "$L3:\n"								\
-  "                mov.u32 %retval, %r25;\n"				\
-  "        st.param.u32    [%out_retval], %retval;\n"			\
-  "        ret;\n"							\
-  "        }\n"								\
-  ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \
-  ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \
-  "{\n"									\
-  "        .reg.u64 %ar1;\n"						\
-  ".reg.u32 %retval;\n"							\
-  "        .reg.u64 %hr10;\n"						\
-  "        .reg.u64 %r25;\n"						\
-  "        .reg.u32 %r26;\n"						\
-  "        .reg.u32 %r27;\n"						\
-  "        ld.param.u64 %ar1, [%in_ar1];\n"				\
-  "                mov.u64 %r25, %ar1;\n"				\
-  "                ld.u32  %r26, [%r25];\n"				\
-  "        {\n"								\
-  "                .param.u32 %retval_in;\n"				\
-  "        {\n"								\
-  "                .param.u32 %out_arg0;\n"				\
-  "                st.param.u32 [%out_arg0], %r26;\n"			\
-  "                call (%retval_in), acc_on_device, (%out_arg0);\n"	\
-  "        }\n"								\
-  "                ld.param.u32    %r27, [%retval_in];\n"		\
-  "}\n"									\
-  "                mov.u32 %retval, %r27;\n"				\
-  "        st.param.u32    [%out_retval], %retval;\n"			\
-  "        ret;\n"							\
-  "        }"
-
- #define GOACC_INTERNAL_PTX						\
-  ".version 3.1\n" \
-  ".target sm_30\n" \
-  ".address_size 64\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
-  ".extern .func abort;\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \
-  "{\n" \
-  ".reg .u32 %ar1;\n" \
-  ".reg .u32 %retval;\n" \
-  ".reg .u64 %hr10;\n" \
-  ".reg .u32 %r22;\n" \
-  ".reg .u32 %r23;\n" \
-  ".reg .u32 %r24;\n" \
-  ".reg .u32 %r25;\n" \
-  ".reg .u32 %r26;\n" \
-  ".reg .u32 %r27;\n" \
-  ".reg .u32 %r28;\n" \
-  ".reg .u32 %r29;\n" \
-  ".reg .pred %r30;\n" \
-  ".reg .u32 %r31;\n" \
-  ".reg .pred %r32;\n" \
-  ".reg .u32 %r33;\n" \
-  ".reg .pred %r34;\n" \
-  ".local .align 8 .b8 %frame[4];\n" \
-  "ld.param.u32 %ar1,[%in_ar1];\n" \
-  "mov.u32 %r27,%ar1;\n" \
-  "st.local.u32 [%frame],%r27;\n" \
-  "ld.local.u32 %r28,[%frame];\n" \
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L4;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L5;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L8;\n"							\
-  "mov.u32 %r23,%tid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L7;\n"								\
-  "$L4:\n"								\
-  "mov.u32 %r24,%tid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L7;\n"								\
-  "$L5:\n"								\
-  "mov.u32 %r25,%tid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L7;\n"								\
-  "$L8:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L7:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \
-  "{\n"									\
-  ".reg .u32 %ar1;\n"							\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .pred %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .pred %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  ".reg .pred %r34;\n"							\
-  ".local .align 8 .b8 %frame[4];\n"					\
-  "ld.param.u32 %ar1,[%in_ar1];\n"					\
-  "mov.u32 %r27,%ar1;\n"						\
-  "st.local.u32 [%frame],%r27;\n"					\
-  "ld.local.u32 %r28,[%frame];\n"					\
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L11;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L12;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L15;\n"							\
-  "mov.u32 %r23,%ntid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L14;\n"								\
-  "$L11:\n"								\
-  "mov.u32 %r24,%ntid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L14;\n"								\
-  "$L12:\n"								\
-  "mov.u32 %r25,%ntid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L14;\n"								\
-  "$L15:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L14:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \
-  "{\n"									\
-  ".reg .u32 %ar1;\n"							\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .pred %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .pred %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  ".reg .pred %r34;\n"							\
-  ".local .align 8 .b8 %frame[4];\n"					\
-  "ld.param.u32 %ar1,[%in_ar1];\n"					\
-  "mov.u32 %r27,%ar1;\n"						\
-  "st.local.u32 [%frame],%r27;\n"					\
-  "ld.local.u32 %r28,[%frame];\n"					\
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L18;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L19;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L22;\n"							\
-  "mov.u32 %r23,%ctaid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L21;\n"								\
-  "$L18:\n"								\
-  "mov.u32 %r24,%ctaid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L21;\n"								\
-  "$L19:\n"								\
-  "mov.u32 %r25,%ctaid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L21;\n"								\
-  "$L22:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L21:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \
-  "{\n"									\
-  ".reg .u32 %ar1;\n"							\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .pred %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .pred %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  ".reg .pred %r34;\n"							\
-  ".local .align 8 .b8 %frame[4];\n"					\
-  "ld.param.u32 %ar1,[%in_ar1];\n"					\
-  "mov.u32 %r27,%ar1;\n"						\
-  "st.local.u32 [%frame],%r27;\n"					\
-  "ld.local.u32 %r28,[%frame];\n"					\
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L25;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L26;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L29;\n"							\
-  "mov.u32 %r23,%nctaid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L28;\n"								\
-  "$L25:\n"								\
-  "mov.u32 %r24,%nctaid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L28;\n"								\
-  "$L26:\n"								\
-  "mov.u32 %r25,%nctaid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L28;\n"								\
-  "$L29:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L28:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n"	\
-  "{\n"									\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  "mov.u32 %r26,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r26;\n"					\
-  "call (%retval_in),GOACC_ntid,(%out_arg0);\n"				\
-  "}\n"									\
-  "ld.param.u32 %r27,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r22,%r27;\n"						\
-  "mov.u32 %r28,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r28;\n"					\
-  "call (%retval_in),GOACC_nctaid,(%out_arg0);\n"			\
-  "}\n"									\
-  "ld.param.u32 %r29,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r23,%r29;\n"						\
-  "mul.lo.u32 %r24,%r22,%r23;\n"					\
-  "mov.u32 %r25,%r24;\n"						\
-  "mov.u32 %retval,%r25;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n"	\
-  "{\n"									\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .u32 %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .u32 %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  "mov.u32 %r28,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r28;\n"					\
-  "call (%retval_in),GOACC_ntid,(%out_arg0);\n"				\
-  "}\n"									\
-  "ld.param.u32 %r29,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r22,%r29;\n"						\
-  "mov.u32 %r30,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r30;\n"					\
-  "call (%retval_in),GOACC_ctaid,(%out_arg0);\n"			\
-  "}\n"									\
-  "ld.param.u32 %r31,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r23,%r31;\n"						\
-  "mul.lo.u32 %r24,%r22,%r23;\n"					\
-  "mov.u32 %r32,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r32;\n"					\
-  "call (%retval_in),GOACC_tid,(%out_arg0);\n"				\
-  "}\n"									\
-  "ld.param.u32 %r33,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r25,%r33;\n"						\
-  "add.u32 %r26,%r24,%r25;\n"						\
-  "mov.u32 %r27,%r26;\n"						\
-  "mov.u32 %retval,%r27;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"
-
- #define GOMP_ATOMIC_PTX \
-  ".version 3.1\n" \
-  ".target sm_30\n" \
-  ".address_size 64\n" \
-  ".global .align 4 .u32 libgomp_ptx_lock;\n" \
-  ".visible .func GOMP_atomic_start;\n" \
-  ".visible .func GOMP_atomic_start\n" \
-  "{\n" \
-  "  .reg .pred    %p<2>;\n" \
-  "  .reg .s32     %r<2>;\n" \
-  "  .reg .s64     %rd<2>;\n" \
-  "BB5_1:\n" \
-  "  mov.u64       %rd1, libgomp_ptx_lock;\n" \
-  "  atom.global.cas.b32   %r1, [%rd1], 0, 1;\n" \
-  "  setp.ne.s32   %p1, %r1, 0;\n" \
-  "  @%p1 bra      BB5_1;\n" \
-  "  ret;\n" \
-  "}\n" \
-  ".visible .func GOMP_atomic_end;\n" \
-  ".visible .func GOMP_atomic_end\n" \
-  "{\n" \
-  "  .reg .s32     %r<2>;\n" \
-  "  .reg .s64     %rd<2>;\n" \
-  "  mov.u64       %rd1, libgomp_ptx_lock;\n" \
-  "  atom.global.exch.b32  %r1, [%rd1], 0;\n" \
-  "  ret;\n" \
-  "}\n"
diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c
index ad1163d..7d34b9c 100644
--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -34,7 +34,6 @@
 #include "openacc.h"
 #include "config.h"
 #include "libgomp-plugin.h"
-#include "oacc-ptx.h"
 #include "oacc-plugin.h"
 
 #include <pthread.h>
@@ -793,7 +792,7 @@ nvptx_get_num_devices (void)
 
 
 static void
-link_ptx (CUmodule *module, char *ptx_code)
+link_ptx (CUmodule *module, char *ptx_code, size_t length)
 {
   CUjit_option opts[7];
   void *optvals[7];
@@ -834,63 +833,38 @@ link_ptx (CUmodule *module, char *ptx_code)
   if (r != CUDA_SUCCESS)
     GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r));
 
-  char *abort_ptx = ABORT_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, abort_ptx,
-		     strlen (abort_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (abort) error: %s", cuda_error (r));
-    }
-
-  char *acc_on_device_ptx = ACC_ON_DEVICE_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, acc_on_device_ptx,
-		     strlen (acc_on_device_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (acc_on_device) error: %s",
-			 cuda_error (r));
-    }
-
-  char *goacc_internal_ptx = GOACC_INTERNAL_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, goacc_internal_ptx,
-		     strlen (goacc_internal_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (goacc_internal_ptx) error: %s",
-			 cuda_error (r));
-    }
-
-  char *gomp_atomic_ptx = GOMP_ATOMIC_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, gomp_atomic_ptx,
-		     strlen (gomp_atomic_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (gomp_atomic_ptx) error: %s",
-			 cuda_error (r));
-    }
-
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code,
-              strlen (ptx_code) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r));
+  size_t off = 0;
+  while (off < length)
+    {
+      int l = strlen (ptx_code + off);
+      r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code + off, l + 1,
+			 0, 0, 0, 0);
+      if (r != CUDA_SUCCESS)
+	{
+	  GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
+	  GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s", cuda_error (r));
+	}
+
+      off += l;
+      while (off < length && ptx_code[off] == '\0')
+	off++;
     }
 
   r = cuLinkComplete (linkstate, &linkout, &linkoutsize);
-  if (r != CUDA_SUCCESS)
-    GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));
 
   GOMP_PLUGIN_debug (0, "Link complete: %fms\n", elapsed);
   GOMP_PLUGIN_debug (0, "Link log %s\n", &ilog[0]);
 
+  if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));
+
   r = cuModuleLoadData (module, linkout);
   if (r != CUDA_SUCCESS)
     GOMP_PLUGIN_fatal ("cuModuleLoadData error: %s", cuda_error (r));
+
+  r = cuLinkDestroy (linkstate);
+  if (r != CUDA_SUCCESS)
+    GOMP_PLUGIN_fatal ("cuLinkDestory error: %s", cuda_error (r));
 }
 
 static void
@@ -1633,7 +1607,7 @@ GOMP_OFFLOAD_load_image (int ord, void *target_data,
 
   nvptx_attach_host_thread_to_device (ord);
 
-  link_ptx (&module, img_header[0]);
+  link_ptx (&module, img_header[0], (size_t) img_header[1]);
 
   pthread_mutex_lock (&ptx_image_lock);
   new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
@@ -1647,18 +1621,19 @@ GOMP_OFFLOAD_load_image (int ord, void *target_data,
      each offload image:
 
      img_header[0] -> ptx code
-     img_header[1] -> number of variables
-     img_header[2] -> array of variable names (pointers to strings)
-     img_header[3] -> number of kernels
-     img_header[4] -> array of kernel names (pointers to strings)
+     img_header[1] -> size of ptx code
+     img_header[2] -> number of variables
+     img_header[3] -> array of variable names (pointers to strings)
+     img_header[4] -> number of kernels
+     img_header[5] -> array of kernel names (pointers to strings)
 
      The array of kernel names and the functions addresses form a
      one-to-one correspondence.  */
 
-  var_entries = (uintptr_t) img_header[1];
-  var_names = (char **) img_header[2];
-  fn_entries = (uintptr_t) img_header[3];
-  fn_names = (char **) img_header[4];
+  var_entries = (uintptr_t) img_header[2];
+  var_names = (char **) img_header[3];
+  fn_entries = (uintptr_t) img_header[4];
+  fn_names = (char **) img_header[5];
 
   *target_table = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
 				      * (fn_entries + var_entries));
diff --git libgomp/testsuite/libgomp.fortran/fortran.exp libgomp/testsuite/libgomp.fortran/fortran.exp
index 9e6b643..f684abc 100644
--- libgomp/testsuite/libgomp.fortran/fortran.exp
+++ libgomp/testsuite/libgomp.fortran/fortran.exp
@@ -7,7 +7,10 @@ global ALWAYS_CFLAGS
 
 set shlib_ext [get_shlib_extension]
 set lang_library_path	"../libgfortran/.libs"
-set lang_link_flags	"-lgfortran"
+#TODO
+# We're not using the gfortran driver, so have to mimic its behavior
+# here.
+set lang_link_flags	"-lgfortran -Xoffload -lgfortran -Xoffload -lm"
 if [info exists lang_include_flags] then {
     unset lang_include_flags
 }
diff --git libgomp/testsuite/libgomp.oacc-fortran/fortran.exp libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
index a68e039..11655a1 100644
--- libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
+++ libgomp/testsuite/libgomp.oacc-fortran/fortran.exp
@@ -9,7 +9,10 @@ global ALWAYS_CFLAGS
 
 set shlib_ext [get_shlib_extension]
 set lang_library_path	"../libgfortran/.libs"
-set lang_link_flags	"-lgfortran"
+#TODO
+# We're not using the gfortran driver, so have to mimic its behavior
+# here.
+set lang_link_flags	"-lgfortran -Xoffload -lgfortran -Xoffload -lm"
 if [info exists lang_include_flags] then {
     unset lang_include_flags
 }


Grüße,
 Thomas

[-- Attachment #2: Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [WIP] OpenMP 4 NVPTX support
  2015-04-21 15:58 [WIP] OpenMP 4 NVPTX support Jakub Jelinek
  2015-04-22 15:08 ` Bernd Schmidt
  2015-04-23 14:40 ` [WIP] OpenMP 4 NVPTX support Jakub Jelinek
@ 2015-07-22 16:13 ` Thomas Schwinge
  2015-07-22 16:47   ` Jakub Jelinek
  2015-08-10 16:31   ` [gomp4] [nvptx] Move GOMP stuff from libgcc to libgomp (was: [WIP] OpenMP 4 NVPTX support) Thomas Schwinge
  2 siblings, 2 replies; 11+ messages in thread
From: Thomas Schwinge @ 2015-07-22 16:13 UTC (permalink / raw)
  To: Jakub Jelinek
  Cc: gcc-patches, Julian Brown, Bernd Schmidt, Tobias Burnus, Ilya Verbin

[-- Attachment #1: Type: text/plain, Size: 17132 bytes --]

Hi!

On Tue, 21 Apr 2015 17:58:39 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> Attached is a minimal patch to get at least a trivial OpenMP 4.0 testcase
> offloading to NVPTX (the first patch).  The second patch is WIP, just first
> few needed changes to make libgomp to build for NVPTX (several weeks of work
> at least).

We're not in particular working on making nvptx offloading work for
OpenMP, but also for OpenACC offloading a tiny bit of code is required to
be shipped in an offloading device's runtime library -- code that
conceptually belongs into libgomp.  (On gomp-4_0-branch, it currently
lives in libgcc because that was easier to do.)  Actually, as I should
find out, building a "dummy" (empty) libgomp for nvptx is not actually
difficult.  Additionally to your second patch (U2; quoted at the end of
this email), we'll need the following:

commit ea5213c1eb6e525f64aa103312e8e0ac88048122
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Wed Jul 22 12:12:41 2015 +0200

    Empty libgomp for nvptx
    
        $ mkdir libgomp/config/nvptx
        $ cp libgomp/config/{linux,nvptx}/omp-lock.h
        $ for f in libgomp{,/config/linux,/config/posix}/*.c; do touch libgomp/config/nvptx/"$(basename "$f")"; done
---
 libgomp/config/nvptx/affinity.c       |  0
 libgomp/config/nvptx/alloc.c          |  0
 libgomp/config/nvptx/bar.c            |  0
 libgomp/config/nvptx/barrier.c        |  0
 libgomp/config/nvptx/critical.c       |  0
 libgomp/config/nvptx/env.c            |  0
 libgomp/config/nvptx/error.c          |  0
 libgomp/config/nvptx/fortran.c        |  0
 libgomp/config/nvptx/iter.c           |  0
 libgomp/config/nvptx/iter_ull.c       |  0
 libgomp/config/nvptx/libgomp-plugin.c |  0
 libgomp/config/nvptx/lock.c           |  0
 libgomp/config/nvptx/loop.c           |  0
 libgomp/config/nvptx/loop_ull.c       |  0
 libgomp/config/nvptx/mutex.c          |  0
 libgomp/config/nvptx/oacc-async.c     |  0
 libgomp/config/nvptx/oacc-cuda.c      |  0
 libgomp/config/nvptx/oacc-host.c      |  0
 libgomp/config/nvptx/oacc-init.c      |  0
 libgomp/config/nvptx/oacc-mem.c       |  0
 libgomp/config/nvptx/oacc-parallel.c  |  0
 libgomp/config/nvptx/oacc-plugin.c    |  0
 libgomp/config/nvptx/omp-lock.h       | 12 ++++++++++++
 libgomp/config/nvptx/ordered.c        |  0
 libgomp/config/nvptx/parallel.c       |  0
 libgomp/config/nvptx/proc.c           |  0
 libgomp/config/nvptx/ptrlock.c        |  0
 libgomp/config/nvptx/sections.c       |  0
 libgomp/config/nvptx/sem.c            |  0
 libgomp/config/nvptx/single.c         |  0
 libgomp/config/nvptx/splay-tree.c     |  0
 libgomp/config/nvptx/target.c         |  0
 libgomp/config/nvptx/task.c           |  0
 libgomp/config/nvptx/team.c           |  0
 libgomp/config/nvptx/time.c           |  0
 libgomp/config/nvptx/work.c           |  0
 36 files changed, 12 insertions(+)

diff --git libgomp/config/nvptx/affinity.c libgomp/config/nvptx/affinity.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/alloc.c libgomp/config/nvptx/alloc.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/bar.c libgomp/config/nvptx/bar.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/barrier.c libgomp/config/nvptx/barrier.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/critical.c libgomp/config/nvptx/critical.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/env.c libgomp/config/nvptx/env.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/error.c libgomp/config/nvptx/error.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/fortran.c libgomp/config/nvptx/fortran.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/iter.c libgomp/config/nvptx/iter.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/iter_ull.c libgomp/config/nvptx/iter_ull.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/libgomp-plugin.c libgomp/config/nvptx/libgomp-plugin.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/lock.c libgomp/config/nvptx/lock.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/loop.c libgomp/config/nvptx/loop.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/loop_ull.c libgomp/config/nvptx/loop_ull.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/mutex.c libgomp/config/nvptx/mutex.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-async.c libgomp/config/nvptx/oacc-async.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-cuda.c libgomp/config/nvptx/oacc-cuda.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-host.c libgomp/config/nvptx/oacc-host.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-init.c libgomp/config/nvptx/oacc-init.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-mem.c libgomp/config/nvptx/oacc-mem.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-parallel.c libgomp/config/nvptx/oacc-parallel.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-plugin.c libgomp/config/nvptx/oacc-plugin.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/omp-lock.h libgomp/config/nvptx/omp-lock.h
new file mode 100644
index 0000000..2ca7c5e
--- /dev/null
+++ libgomp/config/nvptx/omp-lock.h
@@ -0,0 +1,12 @@
+/* This header is used during the build process to find the size and 
+   alignment of the public OpenMP locks, so that we can export data
+   structures without polluting the namespace.
+
+   When using the Linux futex primitive, non-recursive locks require
+   one int.  Recursive locks require we identify the owning task
+   and so require in addition one int and a pointer.  */
+
+typedef int omp_lock_t;
+typedef struct { int lock, count; void *owner; } omp_nest_lock_t;
+typedef int omp_lock_25_t;
+typedef struct { int owner, count; } omp_nest_lock_25_t;
diff --git libgomp/config/nvptx/ordered.c libgomp/config/nvptx/ordered.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/parallel.c libgomp/config/nvptx/parallel.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/proc.c libgomp/config/nvptx/proc.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/ptrlock.c libgomp/config/nvptx/ptrlock.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/sections.c libgomp/config/nvptx/sections.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/sem.c libgomp/config/nvptx/sem.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/single.c libgomp/config/nvptx/single.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/splay-tree.c libgomp/config/nvptx/splay-tree.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/target.c libgomp/config/nvptx/target.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/task.c libgomp/config/nvptx/task.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/team.c libgomp/config/nvptx/team.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/time.c libgomp/config/nvptx/time.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/work.c libgomp/config/nvptx/work.c
new file mode 100644
index 0000000..e69de29


Next, we can then (on gomp-4_0-branch) move the libgcc code into libgomp:

commit d8d75d17630d7633be4f1733fd195a104cb2ccc4
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Wed Jul 22 13:05:16 2015 +0200

    [nvptx] Move GOMP stuff from libgcc to libgomp
---
 libgcc/config.host                       |  6 +---
 libgcc/config/nvptx/gomp-acc_on_device.c |  9 -----
 libgcc/config/nvptx/gomp-atomic.asm      | 37 ---------------------
 libgcc/config/nvptx/t-nvptx              | 11 ------
 libgomp/config/nvptx/critical.c          | 57 ++++++++++++++++++++++++++++++++
 libgomp/config/nvptx/oacc-init.c         | 35 ++++++++++++++++++++
 6 files changed, 93 insertions(+), 62 deletions(-)

diff --git libgcc/config.host libgcc/config.host
index ee7ce03..3a2c75d 100644
--- libgcc/config.host
+++ libgcc/config.host
@@ -1304,11 +1304,7 @@ mep*-*-*)
 	;;
 nvptx-*)
 	tmake_file="$tmake_file nvptx/t-nvptx"
-	if test "x${enable_as_accelerator_for}" != x; then
-		extra_parts="crt0.o libgomp.a libgomp.spec"
-	else
-		extra_parts="crt0.o"
-	fi
+	extra_parts="crt0.o"
 	;;
 *)
 	echo "*** Configuration ${host} not supported" 1>&2
diff --git libgcc/config/nvptx/gomp-acc_on_device.c libgcc/config/nvptx/gomp-acc_on_device.c
deleted file mode 100644
index e4278f9..0000000
--- libgcc/config/nvptx/gomp-acc_on_device.c
+++ /dev/null
@@ -1,9 +0,0 @@
-int acc_on_device(int d)
-{
-  return __builtin_acc_on_device(d);
-}
-
-int acc_on_device_h_(int *d)
-{
-  return acc_on_device(*d);
-}
diff --git libgcc/config/nvptx/gomp-atomic.asm libgcc/config/nvptx/gomp-atomic.asm
deleted file mode 100644
index ae9d925..0000000
--- libgcc/config/nvptx/gomp-atomic.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-
-// BEGIN PREAMBLE
-	.version	3.1
-	.target	sm_30
-	.address_size 64
-	.extern .shared .u8 sdata[];
-// END PREAMBLE
-
-// BEGIN VAR DEF: libgomp_ptx_lock
-.global .align 4 .u32 libgomp_ptx_lock;
-
-// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_start
-.visible .func GOMP_atomic_start;
-// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_start
-.visible .func GOMP_atomic_start
-{
-	.reg .pred 	%p<2>;
-	.reg .s32 	%r<2>;
-	.reg .s64 	%rd<2>;
-BB5_1:
-	mov.u64 	%rd1, libgomp_ptx_lock;
-	atom.global.cas.b32 	%r1, [%rd1], 0, 1;
-	setp.ne.s32	%p1, %r1, 0;
-	@%p1 bra 	BB5_1;
-	ret;
-	}
-// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_end
-.visible .func GOMP_atomic_end;
-// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_end
-.visible .func GOMP_atomic_end
-{
-	.reg .s32 	%r<2>;
-	.reg .s64 	%rd<2>;
-	mov.u64 	%rd1, libgomp_ptx_lock;
-	atom.global.exch.b32 	%r1, [%rd1], 0;
-	ret;
-	}
diff --git libgcc/config/nvptx/t-nvptx libgcc/config/nvptx/t-nvptx
index c8741c4..0c2cea0 100644
--- libgcc/config/nvptx/t-nvptx
+++ libgcc/config/nvptx/t-nvptx
@@ -13,14 +13,3 @@ crt0.o: $(srcdir)/config/nvptx/crt0.s
 # support it, and it may cause the build to fail, because of alloca usage, for
 # example.
 INHIBIT_LIBC_CFLAGS = -Dinhibit_libc
-
-gomp-acc_on_device.o: $(srcdir)/config/nvptx/gomp-acc_on_device.c
-	$(gcc_compile) -c -fno-builtin-acc_on_device $<
-gomp-atomic.o: $(srcdir)/config/nvptx/gomp-atomic.asm
-	cp $< $@
-
-OBJS_libgomp= gomp-acc_on_device.o gomp-atomic.o
-libgomp.a: $(OBJS_libgomp)
-	$(AR_CREATE_FOR_TARGET) $@ $(OBJS_libgomp)
-libgomp.spec:
-	echo "*link_gomp: -lgomp" >$@
diff --git libgomp/config/nvptx/critical.c libgomp/config/nvptx/critical.c
index e69de29..1f55aad 100644
--- libgomp/config/nvptx/critical.c
+++ libgomp/config/nvptx/critical.c
@@ -0,0 +1,57 @@
+/* GOMP atomic routines
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+__asm__ ("// BEGIN VAR DEF: libgomp_ptx_lock\n"
+	 ".global .align 4 .u32 libgomp_ptx_lock;\n"
+	 "\n"
+	 "// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_start\n"
+	 ".visible .func GOMP_atomic_start;\n"
+	 "// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_start\n"
+	 ".visible .func GOMP_atomic_start\n"
+	 "{\n"
+	 "	.reg .pred 	%p<2>;\n"
+	 "	.reg .s32 	%r<2>;\n"
+	 "	.reg .s64 	%rd<2>;\n"
+	 "BB5_1:\n"
+	 "	mov.u64 	%rd1, libgomp_ptx_lock;\n"
+	 "	atom.global.cas.b32 	%r1, [%rd1], 0, 1;\n"
+	 "	setp.ne.s32	%p1, %r1, 0;\n"
+	 "	@%p1 bra 	BB5_1;\n"
+	 "	ret;\n"
+	 "	}\n"
+	 "// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_end\n"
+	 ".visible .func GOMP_atomic_end;\n"
+	 "// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_end\n"
+	 ".visible .func GOMP_atomic_end\n"
+	 "{\n"
+	 "	.reg .s32 	%r<2>;\n"
+	 "	.reg .s64 	%rd<2>;\n"
+	 "	mov.u64 	%rd1, libgomp_ptx_lock;\n"
+	 "	atom.global.exch.b32 	%r1, [%rd1], 0;\n"
+	 "	ret;\n"
+	 "	}");
diff --git libgomp/config/nvptx/oacc-init.c libgomp/config/nvptx/oacc-init.c
index e69de29..e2c54c9 100644
--- libgomp/config/nvptx/oacc-init.c
+++ libgomp/config/nvptx/oacc-init.c
@@ -0,0 +1,35 @@
+/* OpenACC Runtime initialization routines
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "openacc.h"
+
+int
+acc_on_device (acc_device_t d)
+{
+  return __builtin_acc_on_device (d);
+}

This, obviously, is still very bare-bones, but it works, and can be
extended later.


> we build libgcc.a, libc.a, libm.a, libgfortran.a (and in the future hopefully libgomp.a),
> nothing attempts to link those in :(.

Together with the changes highlighted above, I'd then work on merging
into trunk the nvptx linking code present on gomp-4_0-branch, OK?


For reference, your second patch (U2):

> --- libgomp/configure.tgt.jj	2015-04-21 08:38:00.000000000 +0200
> +++ libgomp/configure.tgt	2015-04-21 10:59:30.857197475 +0200
> @@ -151,6 +151,10 @@ case "${target}" in
>  	XLDFLAGS="${XLDFLAGS} -lpthread"
>  	;;
>  
> +  nvptx*-*-*)
> +	config_path="nvptx"
> +	;;
> +
>    *)
>  	;;
>  
> --- libgomp/libgomp.h.jj	2015-04-21 08:38:00.000000000 +0200
> +++ libgomp/libgomp.h	2015-04-21 11:15:35.952217394 +0200
> @@ -40,7 +40,9 @@
>  #include "gstdint.h"
>  #include "libgomp-plugin.h"
>  
> +#ifdef HAVE_PTHREAD_H
>  #include <pthread.h>
> +#endif
>  #include <stdbool.h>
>  #include <stdlib.h>
>  #include <stdarg.h>
> --- libgomp/configure.ac.jj	2015-04-21 08:38:00.000000000 +0200
> +++ libgomp/configure.ac	2015-04-21 11:06:38.418117846 +0200
> @@ -179,6 +179,9 @@ case "$host" in
>    *-*-rtems*)
>      # RTEMS supports Pthreads, but the library is not available at GCC build time.
>      ;;
> +  nvptx*-*-*)
> +    # NVPTX does not support Pthreads, has its own code replacement.
> +    ;;
>    *)
>      # Check to see if -pthread or -lpthread is needed.  Prefer the former.
>      # In case the pthread.h system header is not found, this test will fail.
> --- configure.ac.jj	2015-04-21 08:38:09.000000000 +0200
> +++ configure.ac	2015-04-21 09:14:50.107827544 +0200
> @@ -539,6 +539,9 @@ if test x$enable_libgomp = x ; then
>  	;;
>      *-*-darwin* | *-*-aix*)
>  	;;
> +    # And on NVPTX as an offloading target.
> +    nvptx*-*-*)
> +	;;
>      *)
>  	noconfigdirs="$noconfigdirs target-libgomp"
>  	;;


Grüße,
 Thomas

[-- Attachment #2: Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [WIP] OpenMP 4 NVPTX support
  2015-07-22 16:13 ` Thomas Schwinge
@ 2015-07-22 16:47   ` Jakub Jelinek
  2015-08-10 15:56     ` Empty libgomp for nvptx (was: [WIP] OpenMP 4 NVPTX support) Thomas Schwinge
  2015-08-10 16:31   ` [gomp4] [nvptx] Move GOMP stuff from libgcc to libgomp (was: [WIP] OpenMP 4 NVPTX support) Thomas Schwinge
  1 sibling, 1 reply; 11+ messages in thread
From: Jakub Jelinek @ 2015-07-22 16:47 UTC (permalink / raw)
  To: Thomas Schwinge
  Cc: gcc-patches, Julian Brown, Bernd Schmidt, Tobias Burnus, Ilya Verbin

On Wed, Jul 22, 2015 at 06:04:20PM +0200, Thomas Schwinge wrote:
> On Tue, 21 Apr 2015 17:58:39 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> > Attached is a minimal patch to get at least a trivial OpenMP 4.0 testcase
> > offloading to NVPTX (the first patch).  The second patch is WIP, just first
> > few needed changes to make libgomp to build for NVPTX (several weeks of work
> > at least).
> 
> We're not in particular working on making nvptx offloading work for
> OpenMP, but also for OpenACC offloading a tiny bit of code is required to
> be shipped in an offloading device's runtime library -- code that
> conceptually belongs into libgomp.  (On gomp-4_0-branch, it currently
> lives in libgcc because that was easier to do.)  Actually, as I should
> find out, building a "dummy" (empty) libgomp for nvptx is not actually
> difficult.  Additionally to your second patch (U2; quoted at the end of
> this email), we'll need the following:

The U2 version was a very early one, I've posted a newer version later,
but supposedly we can go with my U2 (if you've tested it together with your
patch, please check it in yourself) and your patch, and then
incrementally start removing the zero sized stubs or replacing them with
something real.

	Jakub

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Empty libgomp for nvptx (was: [WIP] OpenMP 4 NVPTX support)
  2015-07-22 16:47   ` Jakub Jelinek
@ 2015-08-10 15:56     ` Thomas Schwinge
  2015-08-10 16:29       ` Empty libgomp for nvptx Thomas Schwinge
  0 siblings, 1 reply; 11+ messages in thread
From: Thomas Schwinge @ 2015-08-10 15:56 UTC (permalink / raw)
  To: gcc-patches, Jakub Jelinek
  Cc: Julian Brown, Bernd Schmidt, Tobias Burnus, Ilya Verbin

[-- Attachment #1: Type: text/plain, Size: 14686 bytes --]

Hi!

On Wed, 22 Jul 2015 18:38:44 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Jul 22, 2015 at 06:04:20PM +0200, Thomas Schwinge wrote:
> > On Tue, 21 Apr 2015 17:58:39 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> > > Attached is a minimal patch to get at least a trivial OpenMP 4.0 testcase
> > > offloading to NVPTX (the first patch).  The second patch is WIP, just first
> > > few needed changes to make libgomp to build for NVPTX (several weeks of work
> > > at least).
> > 
> > We're not in particular working on making nvptx offloading work for
> > OpenMP, but also for OpenACC offloading a tiny bit of code is required to
> > be shipped in an offloading device's runtime library -- code that
> > conceptually belongs into libgomp.  (On gomp-4_0-branch, it currently
> > lives in libgcc because that was easier to do.)  Actually, as I should
> > find out, building a "dummy" (empty) libgomp for nvptx is not actually
> > difficult.  Additionally to your second patch (U2; quoted at the end of
> > this email), we'll need the following:
> 
> The U2 version was a very early one, I've posted a newer version later,
> but supposedly we can go with my U2 (if you've tested it together with your
> patch, please check it in yourself) and your patch, and then
> incrementally start removing the zero sized stubs or replacing them with
> something real.

Yes, that's precisely the idea.  Committed in r226760:

commit fdcd05c84f79cec55fa61249febd4c1c21b772a7
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Mon Aug 10 15:53:33 2015 +0000

    Empty libgomp for nvptx
    
    	* configure.ac (noconfigdirs): Don't add "target-libgomp" for target
    	nvptx*-*-*.
    	* configure: Regenerate.
    	libgomp/
    	* config/nvptx/affinity.c: New file.
    	* config/nvptx/alloc.c: Likewise.
    	* config/nvptx/bar.c: Likewise.
    	* config/nvptx/barrier.c: Likewise.
    	* config/nvptx/critical.c: Likewise.
    	* config/nvptx/env.c: Likewise.
    	* config/nvptx/error.c: Likewise.
    	* config/nvptx/fortran.c: Likewise.
    	* config/nvptx/iter.c: Likewise.
    	* config/nvptx/iter_ull.c: Likewise.
    	* config/nvptx/libgomp-plugin.c: Likewise.
    	* config/nvptx/lock.c: Likewise.
    	* config/nvptx/loop.c: Likewise.
    	* config/nvptx/loop_ull.c: Likewise.
    	* config/nvptx/mutex.c: Likewise.
    	* config/nvptx/oacc-async.c: Likewise.
    	* config/nvptx/oacc-cuda.c: Likewise.
    	* config/nvptx/oacc-host.c: Likewise.
    	* config/nvptx/oacc-init.c: Likewise.
    	* config/nvptx/oacc-mem.c: Likewise.
    	* config/nvptx/oacc-parallel.c: Likewise.
    	* config/nvptx/oacc-plugin.c: Likewise.
    	* config/nvptx/omp-lock.h: Likewise.
    	* config/nvptx/ordered.c: Likewise.
    	* config/nvptx/parallel.c: Likewise.
    	* config/nvptx/proc.c: Likewise.
    	* config/nvptx/ptrlock.c: Likewise.
    	* config/nvptx/sections.c: Likewise.
    	* config/nvptx/sem.c: Likewise.
    	* config/nvptx/single.c: Likewise.
    	* config/nvptx/splay-tree.c: Likewise.
    	* config/nvptx/target.c: Likewise.
    	* config/nvptx/task.c: Likewise.
    	* config/nvptx/team.c: Likewise.
    	* config/nvptx/time.c: Likewise.
    	* config/nvptx/work.c: Likewise.
    	* configure.ac: Don't probe pthreads support for host nvptx*-*-*.
    	* configure: Regenerate.
    	* configure.tgt (config_path): Set to "nvptx" for target
    	nvptx*-*-*.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226760 138bc75d-0d04-0410-961f-82ee72b054a4
---
 ChangeLog                       |    7 +++++++
 configure                       |    6 +++---
 configure.ac                    |    6 +++---
 libgomp/ChangeLog               |   44 +++++++++++++++++++++++++++++++++++++++
 libgomp/config/nvptx/omp-lock.h |   12 +++++++++++
 libgomp/configure               |    3 +++
 libgomp/configure.ac            |    3 +++
 libgomp/configure.tgt           |    4 ++++
 8 files changed, 79 insertions(+), 6 deletions(-)

diff --git ChangeLog ChangeLog
index bd0f35e..6d3a8a0 100644
--- ChangeLog
+++ ChangeLog
@@ -1,3 +1,10 @@
+2015-08-10  Thomas Schwinge  <thomas@codesourcery.com>
+	    Jakub Jelinek  <jakub@redhat.com>
+
+	* configure.ac (noconfigdirs): Don't add "target-libgomp" for target
+	nvptx*-*-*.
+	* configure: Regenerate.
+
 2015-08-06  Yaakov Selkowitz  <yselkowi@redhat.com>
 
 	* Makefile.def (libiconv): Define bootstrap=true.
diff --git configure configure
index 6d7152e..79257fd 100755
--- configure
+++ configure
@@ -3168,9 +3168,8 @@ if test x$enable_static_libjava != xyes ; then
 fi
 
 
-# Disable libgomp on non POSIX hosted systems.
+# Enable libgomp by default on hosted POSIX systems, and a few others.
 if test x$enable_libgomp = x ; then
-    # Enable libgomp by default on hosted POSIX systems.
     case "${target}" in
     *-*-linux* | *-*-gnu* | *-*-k*bsd*-gnu | *-*-kopensolaris*-gnu)
 	;;
@@ -3180,6 +3179,8 @@ if test x$enable_libgomp = x ; then
 	;;
     *-*-darwin* | *-*-aix*)
 	;;
+    nvptx*-*-*)
+	;;
     *)
 	noconfigdirs="$noconfigdirs target-libgomp"
 	;;
@@ -3917,7 +3918,6 @@ case "${target}" in
     noconfigdirs="$noconfigdirs gdb"
     ;;
   nvptx*-*-*)
-    # nvptx is just a compiler
     noconfigdirs="$noconfigdirs target-libssp target-libstdc++-v3 target-libobjc"
     ;;
   or1k*-*-*)
diff --git configure.ac configure.ac
index fbc49ce..452fc05 100644
--- configure.ac
+++ configure.ac
@@ -529,9 +529,8 @@ if test x$enable_static_libjava != xyes ; then
 fi
 AC_SUBST(EXTRA_CONFIGARGS_LIBJAVA)
 
-# Disable libgomp on non POSIX hosted systems.
+# Enable libgomp by default on hosted POSIX systems, and a few others.
 if test x$enable_libgomp = x ; then
-    # Enable libgomp by default on hosted POSIX systems.
     case "${target}" in
     *-*-linux* | *-*-gnu* | *-*-k*bsd*-gnu | *-*-kopensolaris*-gnu)
 	;;
@@ -541,6 +540,8 @@ if test x$enable_libgomp = x ; then
 	;;
     *-*-darwin* | *-*-aix*)
 	;;
+    nvptx*-*-*)
+	;;
     *)
 	noconfigdirs="$noconfigdirs target-libgomp"
 	;;
@@ -1257,7 +1258,6 @@ case "${target}" in
     noconfigdirs="$noconfigdirs gdb"
     ;;
   nvptx*-*-*)
-    # nvptx is just a compiler
     noconfigdirs="$noconfigdirs target-libssp target-libstdc++-v3 target-libobjc"
     ;;
   or1k*-*-*)
diff --git libgomp/ChangeLog libgomp/ChangeLog
index 3b60290..084aabd 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,4 +1,48 @@
 2015-08-10  Thomas Schwinge  <thomas@codesourcery.com>
+	    Jakub Jelinek  <jakub@redhat.com>
+
+	* config/nvptx/affinity.c: New file.
+	* config/nvptx/alloc.c: Likewise.
+	* config/nvptx/bar.c: Likewise.
+	* config/nvptx/barrier.c: Likewise.
+	* config/nvptx/critical.c: Likewise.
+	* config/nvptx/env.c: Likewise.
+	* config/nvptx/error.c: Likewise.
+	* config/nvptx/fortran.c: Likewise.
+	* config/nvptx/iter.c: Likewise.
+	* config/nvptx/iter_ull.c: Likewise.
+	* config/nvptx/libgomp-plugin.c: Likewise.
+	* config/nvptx/lock.c: Likewise.
+	* config/nvptx/loop.c: Likewise.
+	* config/nvptx/loop_ull.c: Likewise.
+	* config/nvptx/mutex.c: Likewise.
+	* config/nvptx/oacc-async.c: Likewise.
+	* config/nvptx/oacc-cuda.c: Likewise.
+	* config/nvptx/oacc-host.c: Likewise.
+	* config/nvptx/oacc-init.c: Likewise.
+	* config/nvptx/oacc-mem.c: Likewise.
+	* config/nvptx/oacc-parallel.c: Likewise.
+	* config/nvptx/oacc-plugin.c: Likewise.
+	* config/nvptx/omp-lock.h: Likewise.
+	* config/nvptx/ordered.c: Likewise.
+	* config/nvptx/parallel.c: Likewise.
+	* config/nvptx/proc.c: Likewise.
+	* config/nvptx/ptrlock.c: Likewise.
+	* config/nvptx/sections.c: Likewise.
+	* config/nvptx/sem.c: Likewise.
+	* config/nvptx/single.c: Likewise.
+	* config/nvptx/splay-tree.c: Likewise.
+	* config/nvptx/target.c: Likewise.
+	* config/nvptx/task.c: Likewise.
+	* config/nvptx/team.c: Likewise.
+	* config/nvptx/time.c: Likewise.
+	* config/nvptx/work.c: Likewise.
+	* configure.ac: Don't probe pthreads support for host nvptx*-*-*.
+	* configure: Regenerate.
+	* configure.tgt (config_path): Set to "nvptx" for target
+	nvptx*-*-*.
+
+2015-08-10  Thomas Schwinge  <thomas@codesourcery.com>
 
 	* testsuite/libgomp.oacc-c-c++-common/vector-type-1.c: New file.
 
diff --git libgomp/config/nvptx/affinity.c libgomp/config/nvptx/affinity.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/alloc.c libgomp/config/nvptx/alloc.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/bar.c libgomp/config/nvptx/bar.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/barrier.c libgomp/config/nvptx/barrier.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/critical.c libgomp/config/nvptx/critical.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/env.c libgomp/config/nvptx/env.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/error.c libgomp/config/nvptx/error.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/fortran.c libgomp/config/nvptx/fortran.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/iter.c libgomp/config/nvptx/iter.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/iter_ull.c libgomp/config/nvptx/iter_ull.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/libgomp-plugin.c libgomp/config/nvptx/libgomp-plugin.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/lock.c libgomp/config/nvptx/lock.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/loop.c libgomp/config/nvptx/loop.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/loop_ull.c libgomp/config/nvptx/loop_ull.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/mutex.c libgomp/config/nvptx/mutex.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-async.c libgomp/config/nvptx/oacc-async.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-cuda.c libgomp/config/nvptx/oacc-cuda.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-host.c libgomp/config/nvptx/oacc-host.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-init.c libgomp/config/nvptx/oacc-init.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-mem.c libgomp/config/nvptx/oacc-mem.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-parallel.c libgomp/config/nvptx/oacc-parallel.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-plugin.c libgomp/config/nvptx/oacc-plugin.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/omp-lock.h libgomp/config/nvptx/omp-lock.h
new file mode 100644
index 0000000..2ca7c5e
--- /dev/null
+++ libgomp/config/nvptx/omp-lock.h
@@ -0,0 +1,12 @@
+/* This header is used during the build process to find the size and 
+   alignment of the public OpenMP locks, so that we can export data
+   structures without polluting the namespace.
+
+   When using the Linux futex primitive, non-recursive locks require
+   one int.  Recursive locks require we identify the owning task
+   and so require in addition one int and a pointer.  */
+
+typedef int omp_lock_t;
+typedef struct { int lock, count; void *owner; } omp_nest_lock_t;
+typedef int omp_lock_25_t;
+typedef struct { int owner, count; } omp_nest_lock_25_t;
diff --git libgomp/config/nvptx/ordered.c libgomp/config/nvptx/ordered.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/parallel.c libgomp/config/nvptx/parallel.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/proc.c libgomp/config/nvptx/proc.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/ptrlock.c libgomp/config/nvptx/ptrlock.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/sections.c libgomp/config/nvptx/sections.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/sem.c libgomp/config/nvptx/sem.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/single.c libgomp/config/nvptx/single.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/splay-tree.c libgomp/config/nvptx/splay-tree.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/target.c libgomp/config/nvptx/target.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/task.c libgomp/config/nvptx/task.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/team.c libgomp/config/nvptx/team.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/time.c libgomp/config/nvptx/time.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/work.c libgomp/config/nvptx/work.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/configure libgomp/configure
index f1a92ba..867ce40 100755
--- libgomp/configure
+++ libgomp/configure
@@ -15041,6 +15041,9 @@ case "$host" in
   *-*-rtems*)
     # RTEMS supports Pthreads, but the library is not available at GCC build time.
     ;;
+  nvptx*-*-*)
+    # NVPTX does not support Pthreads, has its own code replacement.
+    ;;
   *)
     # Check to see if -pthread or -lpthread is needed.  Prefer the former.
     # In case the pthread.h system header is not found, this test will fail.
diff --git libgomp/configure.ac libgomp/configure.ac
index 9cf0218..b1696d0 100644
--- libgomp/configure.ac
+++ libgomp/configure.ac
@@ -179,6 +179,9 @@ case "$host" in
   *-*-rtems*)
     # RTEMS supports Pthreads, but the library is not available at GCC build time.
     ;;
+  nvptx*-*-*)
+    # NVPTX does not support Pthreads, has its own code replacement.
+    ;;
   *)
     # Check to see if -pthread or -lpthread is needed.  Prefer the former.
     # In case the pthread.h system header is not found, this test will fail.
diff --git libgomp/configure.tgt libgomp/configure.tgt
index 2970f6f..8fad977 100644
--- libgomp/configure.tgt
+++ libgomp/configure.tgt
@@ -151,6 +151,10 @@ case "${target}" in
 	XLDFLAGS="${XLDFLAGS} -lpthread"
 	;;
 
+  nvptx*-*-*)
+	config_path="nvptx"
+	;;
+
   *)
 	;;
 


Grüße,
 Thomas

[-- Attachment #2: Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Empty libgomp for nvptx
  2015-08-10 15:56     ` Empty libgomp for nvptx (was: [WIP] OpenMP 4 NVPTX support) Thomas Schwinge
@ 2015-08-10 16:29       ` Thomas Schwinge
  0 siblings, 0 replies; 11+ messages in thread
From: Thomas Schwinge @ 2015-08-10 16:29 UTC (permalink / raw)
  To: gcc-patches; +Cc: Julian Brown, Tobias Burnus, Ilya Verbin, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 15254 bytes --]

Hi!

On Mon, 10 Aug 2015 17:55:57 +0200, I wrote:
> On Wed, 22 Jul 2015 18:38:44 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> > On Wed, Jul 22, 2015 at 06:04:20PM +0200, Thomas Schwinge wrote:
> > > On Tue, 21 Apr 2015 17:58:39 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> > > > Attached is a minimal patch to get at least a trivial OpenMP 4.0 testcase
> > > > offloading to NVPTX (the first patch).  The second patch is WIP, just first
> > > > few needed changes to make libgomp to build for NVPTX (several weeks of work
> > > > at least).
> > > 
> > > We're not in particular working on making nvptx offloading work for
> > > OpenMP, but also for OpenACC offloading a tiny bit of code is required to
> > > be shipped in an offloading device's runtime library -- code that
> > > conceptually belongs into libgomp.  (On gomp-4_0-branch, it currently
> > > lives in libgcc because that was easier to do.)  Actually, as I should
> > > find out, building a "dummy" (empty) libgomp for nvptx is not actually
> > > difficult.  Additionally to your second patch (U2; quoted at the end of
> > > this email), we'll need the following:
> > 
> > The U2 version was a very early one, I've posted a newer version later,
> > but supposedly we can go with my U2 (if you've tested it together with your
> > patch, please check it in yourself) and your patch, and then
> > incrementally start removing the zero sized stubs or replacing them with
> > something real.
> 
> Yes, that's precisely the idea.  Committed in r226760:
> 
> commit fdcd05c84f79cec55fa61249febd4c1c21b772a7
> Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
> Date:   Mon Aug 10 15:53:33 2015 +0000
> 
>     Empty libgomp for nvptx

Backported to gomp-4_0-branch in r226761:

commit d4ba3f3e41b5b647e4a3cc7bad12f2a4770cd15d
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Mon Aug 10 16:26:39 2015 +0000

    Empty libgomp for nvptx
    
    Backport trunk r226760:
    
    	* configure.ac (noconfigdirs): Don't add "target-libgomp" for target
    	nvptx*-*-*.
    	* configure: Regenerate.
    	libgomp/
    	* config/nvptx/affinity.c: New file.
    	* config/nvptx/alloc.c: Likewise.
    	* config/nvptx/bar.c: Likewise.
    	* config/nvptx/barrier.c: Likewise.
    	* config/nvptx/critical.c: Likewise.
    	* config/nvptx/env.c: Likewise.
    	* config/nvptx/error.c: Likewise.
    	* config/nvptx/fortran.c: Likewise.
    	* config/nvptx/iter.c: Likewise.
    	* config/nvptx/iter_ull.c: Likewise.
    	* config/nvptx/libgomp-plugin.c: Likewise.
    	* config/nvptx/lock.c: Likewise.
    	* config/nvptx/loop.c: Likewise.
    	* config/nvptx/loop_ull.c: Likewise.
    	* config/nvptx/mutex.c: Likewise.
    	* config/nvptx/oacc-async.c: Likewise.
    	* config/nvptx/oacc-cuda.c: Likewise.
    	* config/nvptx/oacc-host.c: Likewise.
    	* config/nvptx/oacc-init.c: Likewise.
    	* config/nvptx/oacc-mem.c: Likewise.
    	* config/nvptx/oacc-parallel.c: Likewise.
    	* config/nvptx/oacc-plugin.c: Likewise.
    	* config/nvptx/omp-lock.h: Likewise.
    	* config/nvptx/ordered.c: Likewise.
    	* config/nvptx/parallel.c: Likewise.
    	* config/nvptx/proc.c: Likewise.
    	* config/nvptx/ptrlock.c: Likewise.
    	* config/nvptx/sections.c: Likewise.
    	* config/nvptx/sem.c: Likewise.
    	* config/nvptx/single.c: Likewise.
    	* config/nvptx/splay-tree.c: Likewise.
    	* config/nvptx/target.c: Likewise.
    	* config/nvptx/task.c: Likewise.
    	* config/nvptx/team.c: Likewise.
    	* config/nvptx/time.c: Likewise.
    	* config/nvptx/work.c: Likewise.
    	* configure.ac: Don't probe pthreads support for host nvptx*-*-*.
    	* configure: Regenerate.
    	* configure.tgt (config_path): Set to "nvptx" for target
    	nvptx*-*-*.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@226761 138bc75d-0d04-0410-961f-82ee72b054a4
---
 ChangeLog.gomp                  |   11 +++++++++
 configure                       |    6 ++---
 configure.ac                    |    6 ++---
 libgomp/ChangeLog.gomp          |   48 +++++++++++++++++++++++++++++++++++++++
 libgomp/config/nvptx/omp-lock.h |   12 ++++++++++
 libgomp/configure               |    3 +++
 libgomp/configure.ac            |    3 +++
 libgomp/configure.tgt           |    4 ++++
 8 files changed, 87 insertions(+), 6 deletions(-)

diff --git ChangeLog.gomp ChangeLog.gomp
index 2a0ddb1..fd1a1e0 100644
--- ChangeLog.gomp
+++ ChangeLog.gomp
@@ -1,3 +1,14 @@
+2015-08-10  Thomas Schwinge  <thomas@codesourcery.com>
+
+	Backport trunk r226760:
+
+	2015-08-10  Thomas Schwinge  <thomas@codesourcery.com>
+		    Jakub Jelinek  <jakub@redhat.com>
+
+	* configure.ac (noconfigdirs): Don't add "target-libgomp" for target
+	nvptx*-*-*.
+	* configure: Regenerate.
+
 2015-06-30  Tom de Vries  <tom@codesourcery.com>
 
 	Revert:
diff --git configure configure
index 82e45f3..5d90445 100755
--- configure
+++ configure
@@ -3159,9 +3159,8 @@ if test x$enable_static_libjava != xyes ; then
 fi
 
 
-# Disable libgomp on non POSIX hosted systems.
+# Enable libgomp by default on hosted POSIX systems, and a few others.
 if test x$enable_libgomp = x ; then
-    # Enable libgomp by default on hosted POSIX systems.
     case "${target}" in
     *-*-linux* | *-*-gnu* | *-*-k*bsd*-gnu | *-*-kopensolaris*-gnu)
 	;;
@@ -3171,6 +3170,8 @@ if test x$enable_libgomp = x ; then
 	;;
     *-*-darwin* | *-*-aix*)
 	;;
+    nvptx*-*-*)
+	;;
     *)
 	noconfigdirs="$noconfigdirs target-libgomp"
 	;;
@@ -3899,7 +3900,6 @@ case "${target}" in
     noconfigdirs="$noconfigdirs gdb"
     ;;
   nvptx*-*-*)
-    # nvptx is just a compiler
     noconfigdirs="$noconfigdirs target-libssp target-libstdc++-v3 target-libobjc"
     ;;
   or1k*-*-*)
diff --git configure.ac configure.ac
index dc77a1b..9906aee 100644
--- configure.ac
+++ configure.ac
@@ -527,9 +527,8 @@ if test x$enable_static_libjava != xyes ; then
 fi
 AC_SUBST(EXTRA_CONFIGARGS_LIBJAVA)
 
-# Disable libgomp on non POSIX hosted systems.
+# Enable libgomp by default on hosted POSIX systems, and a few others.
 if test x$enable_libgomp = x ; then
-    # Enable libgomp by default on hosted POSIX systems.
     case "${target}" in
     *-*-linux* | *-*-gnu* | *-*-k*bsd*-gnu | *-*-kopensolaris*-gnu)
 	;;
@@ -539,6 +538,8 @@ if test x$enable_libgomp = x ; then
 	;;
     *-*-darwin* | *-*-aix*)
 	;;
+    nvptx*-*-*)
+	;;
     *)
 	noconfigdirs="$noconfigdirs target-libgomp"
 	;;
@@ -1246,7 +1247,6 @@ case "${target}" in
     noconfigdirs="$noconfigdirs gdb"
     ;;
   nvptx*-*-*)
-    # nvptx is just a compiler
     noconfigdirs="$noconfigdirs target-libssp target-libstdc++-v3 target-libobjc"
     ;;
   or1k*-*-*)
diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 4389795..3898930 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,3 +1,51 @@
+2015-08-10  Thomas Schwinge  <thomas@codesourcery.com>
+
+	Backport trunk r226760:
+
+	2015-08-10  Thomas Schwinge  <thomas@codesourcery.com>
+		    Jakub Jelinek  <jakub@redhat.com>
+
+	* config/nvptx/affinity.c: New file.
+	* config/nvptx/alloc.c: Likewise.
+	* config/nvptx/bar.c: Likewise.
+	* config/nvptx/barrier.c: Likewise.
+	* config/nvptx/critical.c: Likewise.
+	* config/nvptx/env.c: Likewise.
+	* config/nvptx/error.c: Likewise.
+	* config/nvptx/fortran.c: Likewise.
+	* config/nvptx/iter.c: Likewise.
+	* config/nvptx/iter_ull.c: Likewise.
+	* config/nvptx/libgomp-plugin.c: Likewise.
+	* config/nvptx/lock.c: Likewise.
+	* config/nvptx/loop.c: Likewise.
+	* config/nvptx/loop_ull.c: Likewise.
+	* config/nvptx/mutex.c: Likewise.
+	* config/nvptx/oacc-async.c: Likewise.
+	* config/nvptx/oacc-cuda.c: Likewise.
+	* config/nvptx/oacc-host.c: Likewise.
+	* config/nvptx/oacc-init.c: Likewise.
+	* config/nvptx/oacc-mem.c: Likewise.
+	* config/nvptx/oacc-parallel.c: Likewise.
+	* config/nvptx/oacc-plugin.c: Likewise.
+	* config/nvptx/omp-lock.h: Likewise.
+	* config/nvptx/ordered.c: Likewise.
+	* config/nvptx/parallel.c: Likewise.
+	* config/nvptx/proc.c: Likewise.
+	* config/nvptx/ptrlock.c: Likewise.
+	* config/nvptx/sections.c: Likewise.
+	* config/nvptx/sem.c: Likewise.
+	* config/nvptx/single.c: Likewise.
+	* config/nvptx/splay-tree.c: Likewise.
+	* config/nvptx/target.c: Likewise.
+	* config/nvptx/task.c: Likewise.
+	* config/nvptx/team.c: Likewise.
+	* config/nvptx/time.c: Likewise.
+	* config/nvptx/work.c: Likewise.
+	* configure.ac: Don't probe pthreads support for host nvptx*-*-*.
+	* configure: Regenerate.
+	* configure.tgt (config_path): Set to "nvptx" for target
+	nvptx*-*-*.
+
 2015-08-04  Nathan Sidwell  <nathan@codesourcery.com>
 
 	* testsuite/libgomp.oacc-c-c++-common/routine-1.c: Add warning.
diff --git libgomp/config/nvptx/affinity.c libgomp/config/nvptx/affinity.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/alloc.c libgomp/config/nvptx/alloc.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/bar.c libgomp/config/nvptx/bar.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/barrier.c libgomp/config/nvptx/barrier.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/critical.c libgomp/config/nvptx/critical.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/env.c libgomp/config/nvptx/env.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/error.c libgomp/config/nvptx/error.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/fortran.c libgomp/config/nvptx/fortran.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/iter.c libgomp/config/nvptx/iter.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/iter_ull.c libgomp/config/nvptx/iter_ull.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/libgomp-plugin.c libgomp/config/nvptx/libgomp-plugin.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/lock.c libgomp/config/nvptx/lock.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/loop.c libgomp/config/nvptx/loop.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/loop_ull.c libgomp/config/nvptx/loop_ull.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/mutex.c libgomp/config/nvptx/mutex.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-async.c libgomp/config/nvptx/oacc-async.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-cuda.c libgomp/config/nvptx/oacc-cuda.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-host.c libgomp/config/nvptx/oacc-host.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-init.c libgomp/config/nvptx/oacc-init.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-mem.c libgomp/config/nvptx/oacc-mem.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-parallel.c libgomp/config/nvptx/oacc-parallel.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/oacc-plugin.c libgomp/config/nvptx/oacc-plugin.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/omp-lock.h libgomp/config/nvptx/omp-lock.h
new file mode 100644
index 0000000..2ca7c5e
--- /dev/null
+++ libgomp/config/nvptx/omp-lock.h
@@ -0,0 +1,12 @@
+/* This header is used during the build process to find the size and 
+   alignment of the public OpenMP locks, so that we can export data
+   structures without polluting the namespace.
+
+   When using the Linux futex primitive, non-recursive locks require
+   one int.  Recursive locks require we identify the owning task
+   and so require in addition one int and a pointer.  */
+
+typedef int omp_lock_t;
+typedef struct { int lock, count; void *owner; } omp_nest_lock_t;
+typedef int omp_lock_25_t;
+typedef struct { int owner, count; } omp_nest_lock_25_t;
diff --git libgomp/config/nvptx/ordered.c libgomp/config/nvptx/ordered.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/parallel.c libgomp/config/nvptx/parallel.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/proc.c libgomp/config/nvptx/proc.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/ptrlock.c libgomp/config/nvptx/ptrlock.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/sections.c libgomp/config/nvptx/sections.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/sem.c libgomp/config/nvptx/sem.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/single.c libgomp/config/nvptx/single.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/splay-tree.c libgomp/config/nvptx/splay-tree.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/target.c libgomp/config/nvptx/target.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/task.c libgomp/config/nvptx/task.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/team.c libgomp/config/nvptx/team.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/time.c libgomp/config/nvptx/time.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/config/nvptx/work.c libgomp/config/nvptx/work.c
new file mode 100644
index 0000000..e69de29
diff --git libgomp/configure libgomp/configure
index c79611f..c93e877 100755
--- libgomp/configure
+++ libgomp/configure
@@ -15041,6 +15041,9 @@ case "$host" in
   *-*-rtems*)
     # RTEMS supports Pthreads, but the library is not available at GCC build time.
     ;;
+  nvptx*-*-*)
+    # NVPTX does not support Pthreads, has its own code replacement.
+    ;;
   *)
     # Check to see if -pthread or -lpthread is needed.  Prefer the former.
     # In case the pthread.h system header is not found, this test will fail.
diff --git libgomp/configure.ac libgomp/configure.ac
index 9cf0218..b1696d0 100644
--- libgomp/configure.ac
+++ libgomp/configure.ac
@@ -179,6 +179,9 @@ case "$host" in
   *-*-rtems*)
     # RTEMS supports Pthreads, but the library is not available at GCC build time.
     ;;
+  nvptx*-*-*)
+    # NVPTX does not support Pthreads, has its own code replacement.
+    ;;
   *)
     # Check to see if -pthread or -lpthread is needed.  Prefer the former.
     # In case the pthread.h system header is not found, this test will fail.
diff --git libgomp/configure.tgt libgomp/configure.tgt
index 2970f6f..8fad977 100644
--- libgomp/configure.tgt
+++ libgomp/configure.tgt
@@ -151,6 +151,10 @@ case "${target}" in
 	XLDFLAGS="${XLDFLAGS} -lpthread"
 	;;
 
+  nvptx*-*-*)
+	config_path="nvptx"
+	;;
+
   *)
 	;;
 


Grüße,
 Thomas

[-- Attachment #2: Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gomp4] [nvptx] Move GOMP stuff from libgcc to libgomp (was: [WIP] OpenMP 4 NVPTX support)
  2015-07-22 16:13 ` Thomas Schwinge
  2015-07-22 16:47   ` Jakub Jelinek
@ 2015-08-10 16:31   ` Thomas Schwinge
  1 sibling, 0 replies; 11+ messages in thread
From: Thomas Schwinge @ 2015-08-10 16:31 UTC (permalink / raw)
  To: gcc-patches; +Cc: Julian Brown, Tobias Burnus, Ilya Verbin, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 14070 bytes --]

Hi!

On Wed, 22 Jul 2015 18:04:20 +0200, I wrote:
> On Tue, 21 Apr 2015 17:58:39 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> > Attached is a minimal patch to get at least a trivial OpenMP 4.0 testcase
> > offloading to NVPTX (the first patch).  The second patch is WIP, just first
> > few needed changes to make libgomp to build for NVPTX (several weeks of work
> > at least).
> 
> We're not in particular working on making nvptx offloading work for
> OpenMP, but also for OpenACC offloading a tiny bit of code is required to
> be shipped in an offloading device's runtime library -- code that
> conceptually belongs into libgomp.  (On gomp-4_0-branch, it currently
> lives in libgcc because that was easier to do.)  [...]

> Next, we can then (on gomp-4_0-branch) move the libgcc code into libgomp:
> 
> commit d8d75d17630d7633be4f1733fd195a104cb2ccc4
> Author: Thomas Schwinge <thomas@codesourcery.com>
> Date:   Wed Jul 22 13:05:16 2015 +0200
> 
>     [nvptx] Move GOMP stuff from libgcc to libgomp

Committed to gomp-4_0-branch in r226762:

commit c49a2b23a76591f26b4076401647011442df92df
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Mon Aug 10 16:26:46 2015 +0000

    [nvptx] Move GOMP stuff from libgcc to libgomp
    
    	libgcc/
    	* config.host [nvptx-*] (extra_parts): Don't add "libgomp.a", and
    	"libgomp.spec".
    	* config/nvptx/gomp-acc_on_device.c: Remove file.
    	* config/nvptx/gomp-atomic.asm: Likewise.
    	* config/nvptx/t-nvptx (OBJS_libgomp): Don't set.
    	(gomp-acc_on_device.o, gomp-atomic.o, libgomp.a, libgomp.spec):
    	Remove targets.
    	libgomp/
    	* config/nvptx/critical.c: New file, replacing empty file.
    	* config/nvptx/oacc-init.c: Likewise.
    	* config/nvptx/openacc.f90: New file.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@226762 138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgcc/ChangeLog.gomp                    |   10 +++
 libgcc/config.host                       |    6 +-
 libgcc/config/nvptx/gomp-acc_on_device.c |   15 -----
 libgcc/config/nvptx/gomp-atomic.asm      |   37 -----------
 libgcc/config/nvptx/t-nvptx              |   11 ----
 libgomp/ChangeLog.gomp                   |    4 ++
 libgomp/config/nvptx/critical.c          |   57 +++++++++++++++++
 libgomp/config/nvptx/oacc-init.c         |   40 ++++++++++++
 libgomp/config/nvptx/openacc.f90         |  101 ++++++++++++++++++++++++++++++
 9 files changed, 213 insertions(+), 68 deletions(-)

diff --git libgcc/ChangeLog.gomp libgcc/ChangeLog.gomp
index 085bfda..7de8361 100644
--- libgcc/ChangeLog.gomp
+++ libgcc/ChangeLog.gomp
@@ -1,3 +1,13 @@
+2015-08-10  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* config.host [nvptx-*] (extra_parts): Don't add "libgomp.a", and
+	"libgomp.spec".
+	* config/nvptx/gomp-acc_on_device.c: Remove file.
+	* config/nvptx/gomp-atomic.asm: Likewise.
+	* config/nvptx/t-nvptx (OBJS_libgomp): Don't set.
+	(gomp-acc_on_device.o, gomp-atomic.o, libgomp.a, libgomp.spec):
+	Remove targets.
+
 2015-08-03  Thomas Schwinge  <thomas@codesourcery.com>
 
 	* config/nvptx/gomp-acc_on_device.c: Don't include
diff --git libgcc/config.host libgcc/config.host
index ee7ce03..3a2c75d 100644
--- libgcc/config.host
+++ libgcc/config.host
@@ -1304,11 +1304,7 @@ mep*-*-*)
 	;;
 nvptx-*)
 	tmake_file="$tmake_file nvptx/t-nvptx"
-	if test "x${enable_as_accelerator_for}" != x; then
-		extra_parts="crt0.o libgomp.a libgomp.spec"
-	else
-		extra_parts="crt0.o"
-	fi
+	extra_parts="crt0.o"
 	;;
 *)
 	echo "*** Configuration ${host} not supported" 1>&2
diff --git libgcc/config/nvptx/gomp-acc_on_device.c libgcc/config/nvptx/gomp-acc_on_device.c
deleted file mode 100644
index db94350..0000000
--- libgcc/config/nvptx/gomp-acc_on_device.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/* The compiler always attempts to expand acc_on_device, but if the
-   user disables the builtin, or calls it via a pointer, we have this
-   version.  */
-
-int
-acc_on_device (int dev)
-{
-  /* Just rely on the compiler builtin.  */
-  return __builtin_acc_on_device (dev);
-}
-
-int acc_on_device_h_(int *d)
-{
-  return acc_on_device(*d);
-}
diff --git libgcc/config/nvptx/gomp-atomic.asm libgcc/config/nvptx/gomp-atomic.asm
deleted file mode 100644
index ae9d925..0000000
--- libgcc/config/nvptx/gomp-atomic.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-
-// BEGIN PREAMBLE
-	.version	3.1
-	.target	sm_30
-	.address_size 64
-	.extern .shared .u8 sdata[];
-// END PREAMBLE
-
-// BEGIN VAR DEF: libgomp_ptx_lock
-.global .align 4 .u32 libgomp_ptx_lock;
-
-// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_start
-.visible .func GOMP_atomic_start;
-// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_start
-.visible .func GOMP_atomic_start
-{
-	.reg .pred 	%p<2>;
-	.reg .s32 	%r<2>;
-	.reg .s64 	%rd<2>;
-BB5_1:
-	mov.u64 	%rd1, libgomp_ptx_lock;
-	atom.global.cas.b32 	%r1, [%rd1], 0, 1;
-	setp.ne.s32	%p1, %r1, 0;
-	@%p1 bra 	BB5_1;
-	ret;
-	}
-// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_end
-.visible .func GOMP_atomic_end;
-// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_end
-.visible .func GOMP_atomic_end
-{
-	.reg .s32 	%r<2>;
-	.reg .s64 	%rd<2>;
-	mov.u64 	%rd1, libgomp_ptx_lock;
-	atom.global.exch.b32 	%r1, [%rd1], 0;
-	ret;
-	}
diff --git libgcc/config/nvptx/t-nvptx libgcc/config/nvptx/t-nvptx
index c8741c4..0c2cea0 100644
--- libgcc/config/nvptx/t-nvptx
+++ libgcc/config/nvptx/t-nvptx
@@ -13,14 +13,3 @@ crt0.o: $(srcdir)/config/nvptx/crt0.s
 # support it, and it may cause the build to fail, because of alloca usage, for
 # example.
 INHIBIT_LIBC_CFLAGS = -Dinhibit_libc
-
-gomp-acc_on_device.o: $(srcdir)/config/nvptx/gomp-acc_on_device.c
-	$(gcc_compile) -c -fno-builtin-acc_on_device $<
-gomp-atomic.o: $(srcdir)/config/nvptx/gomp-atomic.asm
-	cp $< $@
-
-OBJS_libgomp= gomp-acc_on_device.o gomp-atomic.o
-libgomp.a: $(OBJS_libgomp)
-	$(AR_CREATE_FOR_TARGET) $@ $(OBJS_libgomp)
-libgomp.spec:
-	echo "*link_gomp: -lgomp" >$@
diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 3898930..fa7eb75 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,5 +1,9 @@
 2015-08-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	* config/nvptx/critical.c: New file, replacing empty file.
+	* config/nvptx/oacc-init.c: Likewise.
+	* config/nvptx/openacc.f90: New file.
+
 	Backport trunk r226760:
 
 	2015-08-10  Thomas Schwinge  <thomas@codesourcery.com>
diff --git libgomp/config/nvptx/critical.c libgomp/config/nvptx/critical.c
index e69de29..1f55aad 100644
--- libgomp/config/nvptx/critical.c
+++ libgomp/config/nvptx/critical.c
@@ -0,0 +1,57 @@
+/* GOMP atomic routines
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+__asm__ ("// BEGIN VAR DEF: libgomp_ptx_lock\n"
+	 ".global .align 4 .u32 libgomp_ptx_lock;\n"
+	 "\n"
+	 "// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_start\n"
+	 ".visible .func GOMP_atomic_start;\n"
+	 "// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_start\n"
+	 ".visible .func GOMP_atomic_start\n"
+	 "{\n"
+	 "	.reg .pred 	%p<2>;\n"
+	 "	.reg .s32 	%r<2>;\n"
+	 "	.reg .s64 	%rd<2>;\n"
+	 "BB5_1:\n"
+	 "	mov.u64 	%rd1, libgomp_ptx_lock;\n"
+	 "	atom.global.cas.b32 	%r1, [%rd1], 0, 1;\n"
+	 "	setp.ne.s32	%p1, %r1, 0;\n"
+	 "	@%p1 bra 	BB5_1;\n"
+	 "	ret;\n"
+	 "	}\n"
+	 "// BEGIN GLOBAL FUNCTION DECL: GOMP_atomic_end\n"
+	 ".visible .func GOMP_atomic_end;\n"
+	 "// BEGIN GLOBAL FUNCTION DEF: GOMP_atomic_end\n"
+	 ".visible .func GOMP_atomic_end\n"
+	 "{\n"
+	 "	.reg .s32 	%r<2>;\n"
+	 "	.reg .s64 	%rd<2>;\n"
+	 "	mov.u64 	%rd1, libgomp_ptx_lock;\n"
+	 "	atom.global.exch.b32 	%r1, [%rd1], 0;\n"
+	 "	ret;\n"
+	 "	}");
diff --git libgomp/config/nvptx/oacc-init.c libgomp/config/nvptx/oacc-init.c
index e69de29..5b93e74 100644
--- libgomp/config/nvptx/oacc-init.c
+++ libgomp/config/nvptx/oacc-init.c
@@ -0,0 +1,40 @@
+/* OpenACC Runtime initialization routines
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "openacc.h"
+
+/* The compiler always attempts to expand acc_on_device, but if the
+   user disables the builtin, or calls it via a pointer, we have this
+   version.  */
+
+int
+acc_on_device (int dev)
+{
+  /* Just rely on the compiler builtin.  */
+  return __builtin_acc_on_device (dev);
+}
diff --git libgomp/config/nvptx/openacc.f90 libgomp/config/nvptx/openacc.f90
new file mode 100644
index 0000000..5dfc6a1
--- /dev/null
+++ libgomp/config/nvptx/openacc.f90
@@ -0,0 +1,101 @@
+!  OpenACC Runtime Library Definitions.
+
+!  Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+!  Contributed by Tobias Burnus <burnus@net-b.de>
+!              and Mentor Embedded.
+
+!  This file is part of the GNU Offloading and Multi Processing Library
+!  (libgomp).
+
+!  Libgomp is free software; you can redistribute it and/or modify it
+!  under the terms of the GNU General Public License as published by
+!  the Free Software Foundation; either version 3, or (at your option)
+!  any later version.
+
+!  Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+!  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+!  more details.
+
+!  Under Section 7 of GPL version 3, you are granted additional
+!  permissions described in the GCC Runtime Library Exception, version
+!  3.1, as published by the Free Software Foundation.
+
+!  You should have received a copy of the GNU General Public License and
+!  a copy of the GCC Runtime Library Exception along with this program;
+!  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+!  <http://www.gnu.org/licenses/>.
+
+! Wrapper functions will be built from openacc.f90.  We use a separate file
+! here, because for using ../../openacc.f90, implementations are required for
+! all the functions that it wraps, which we currently don't provide, so linking
+! would fail.
+
+module openacc_kinds
+  use iso_fortran_env, only: int32
+  implicit none
+
+  private :: int32
+  public :: acc_device_kind
+
+  integer, parameter :: acc_device_kind = int32
+
+  public :: acc_device_none, acc_device_default, acc_device_host
+  public :: acc_device_not_host, acc_device_nvidia
+
+  ! Keep in sync with include/gomp-constants.h.
+  integer (acc_device_kind), parameter :: acc_device_none = 0
+  integer (acc_device_kind), parameter :: acc_device_default = 1
+  integer (acc_device_kind), parameter :: acc_device_host = 2
+  integer (acc_device_kind), parameter :: acc_device_not_host = 4
+  integer (acc_device_kind), parameter :: acc_device_nvidia = 5
+
+end module
+
+module openacc_internal
+  use openacc_kinds
+  implicit none
+
+  interface
+    function acc_on_device_h (d)
+      import
+      integer (acc_device_kind) d
+      logical acc_on_device_h
+    end function
+  end interface
+
+  interface
+    function acc_on_device_l (d) &
+        bind (C, name = "acc_on_device")
+      use iso_c_binding, only: c_int
+      integer (c_int) :: acc_on_device_l
+      integer (c_int), value :: d
+    end function
+ end interface
+end module
+
+module openacc
+  use openacc_kinds
+  use openacc_internal
+  implicit none
+
+  public :: acc_on_device
+
+  interface acc_on_device
+    procedure :: acc_on_device_h
+  end interface
+
+end module openacc
+
+function acc_on_device_h (d)
+  use openacc_internal, only: acc_on_device_l
+  use openacc_kinds
+  integer (acc_device_kind) d
+  logical acc_on_device_h
+  if (acc_on_device_l (d) .eq. 1) then
+    acc_on_device_h = .TRUE.
+  else
+    acc_on_device_h = .FALSE.
+  end if
+end function


Grüße,
 Thomas

[-- Attachment #2: Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* nvptx offloading linking
  2015-05-13 20:19   ` [gomp4] nvptx offloading linking (was: [WIP] OpenMP 4 NVPTX support) Thomas Schwinge
@ 2015-10-02 19:46     ` Thomas Schwinge
  2015-10-05 16:08     ` [gomp4] [nvptx] Don't explicitly pass "-lgomp" to the offload compiler (was: nvptx offloading linking) Thomas Schwinge
  1 sibling, 0 replies; 11+ messages in thread
From: Thomas Schwinge @ 2015-10-02 19:46 UTC (permalink / raw)
  To: Bernd Schmidt, Jakub Jelinek, gcc-patches, nathan
  Cc: Alexander Monakov, Arutyun Avetisyan

[-- Attachment #1: Type: text/plain, Size: 55862 bytes --]

Hi!

On Wed, 13 May 2015 22:11:36 +0200, I wrote:
> On Wed, 22 Apr 2015 17:08:26 +0200, Bernd Schmidt <bernds@codesourcery.com> wrote:
> > On 04/21/2015 05:58 PM, Jakub Jelinek wrote:
> > 
> > > suggests that while it is nice that when building nvptx accel compiler
> > > we build libgcc.a, libc.a, libm.a, libgfortran.a (and in the future hopefully libgomp.a),
> > > nothing attempts to link those in :(.
> > 
> > I have that fixed; I expect I'll get around to posting this at some 
> > point now that stage1 is open.
> 
> I have committed the following to gomp-4_0-branch in r223176.  We'll be
> submitting this for trunk later on; some changes will need to be done, as
> already discussed.

I extracted relevant changes out of Bernd's original patch, did a few
things differently (libgomp, as obvious), and committed the following to
trunk in r228418.

libgomp/config/nvptx/fortran.c can be blanked again once
<https://gcc.gnu.org/ml/gcc/2015-10/msg00014.html>, "Offloading:
libgfortran, libm dependencies" has been resolved, and the stuff from
libgomp/config/nvptx/oacc-parallel.c will go away once more pieces of the
execution model implementation on gomp-4_0-branch are merged into trunk.

commit 689db5ed20ee0ae1ca351fd6066c72c60aa43805
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri Oct 2 19:43:41 2015 +0000

    nvptx offloading linking
    
    	gcc/
    	* config/nvptx/mkoffload.c (Kind, Vis): Remove enums.
    	(Token, Stmt): Remove structs.
    	(decls, vars, fns): Remove variables.
    	(alloc_comment, append_stmt, is_keyword): Remove macros.
    	(tokenize, write_token, write_tokens, alloc_stmt, rev_stmts)
    	(write_stmt, write_stmts, parse_insn, parse_list_nosemi)
    	(parse_init, parse_file): Remove functions.
    	(read_file): Accept a pointer to a length and store into it.
    	(process): Don't try to parse the input file, just write it out as
    	a string, but looking for maps.  Also write out the length.
    	(main): Don't use "-S" to compile PTX code.
    
    	libgomp/
    	* oacc-ptx.h: Remove file, moving its content into...
    	* config/nvptx/fortran.c: ... here...
    	* config/nvptx/oacc-init.c: ..., here...
    	* config/nvptx/oacc-parallel.c: ..., and here.
    	* config/nvptx/openacc.f90: New file.
    	* plugin/plugin-nvptx.c: Don't include "oacc-ptx.h".
    	(link_ptx): Don't link in predefined bits of PTX code.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@228418 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                        |  15 +
 gcc/config/nvptx/mkoffload.c         | 677 +++--------------------------------
 libgomp/ChangeLog                    |  10 +
 libgomp/config/nvptx/fortran.c       |  40 +++
 libgomp/config/nvptx/oacc-init.c     |  42 +++
 libgomp/config/nvptx/oacc-parallel.c | 358 ++++++++++++++++++
 libgomp/config/nvptx/openacc.f90     | 102 ++++++
 libgomp/oacc-init.c                  |   6 +-
 libgomp/oacc-ptx.h                   | 426 ----------------------
 libgomp/plugin/plugin-nvptx.c        |  30 --
 10 files changed, 617 insertions(+), 1089 deletions(-)

diff --git gcc/ChangeLog gcc/ChangeLog
index 6a0e102..d1235bd 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,3 +1,18 @@
+2015-10-02  Thomas Schwinge  <thomas@codesourcery.com>
+	    Bernd Schmidt  <bernds@codesourcery.com>
+
+	* config/nvptx/mkoffload.c (Kind, Vis): Remove enums.
+	(Token, Stmt): Remove structs.
+	(decls, vars, fns): Remove variables.
+	(alloc_comment, append_stmt, is_keyword): Remove macros.
+	(tokenize, write_token, write_tokens, alloc_stmt, rev_stmts)
+	(write_stmt, write_stmts, parse_insn, parse_list_nosemi)
+	(parse_init, parse_file): Remove functions.
+	(read_file): Accept a pointer to a length and store into it.
+	(process): Don't try to parse the input file, just write it out as
+	a string, but looking for maps.  Also write out the length.
+	(main): Don't use "-S" to compile PTX code.
+
 2015-10-02  Jeff Law  <law@redhat.com>
 
 	* tree-ssa-dom.c (optimize_stmt): Note when loop structures need
diff --git gcc/config/nvptx/mkoffload.c gcc/config/nvptx/mkoffload.c
index 69eb4ea..ff538e2 100644
--- gcc/config/nvptx/mkoffload.c
+++ gcc/config/nvptx/mkoffload.c
@@ -41,84 +41,12 @@ const char tool_name[] = "nvptx mkoffload";
 
 #define COMMENT_PREFIX "#"
 
-typedef enum Kind
-{
-  /* 0-ff used for single char tokens */
-  K_symbol = 0x100, /* a symbol */
-  K_label,  /* a label defn (i.e. symbol:) */
-  K_ident,  /* other ident */
-  K_dotted, /* dotted identifier */
-  K_number,
-  K_string,
-  K_comment
-} Kind;
-
-typedef struct Token
-{
-  unsigned short kind : 12;
-  unsigned short space : 1; /* preceded by space */
-  unsigned short end : 1;   /* succeeded by end of line */
-  /* Length of token */
-  unsigned short len;
-
-  /* Token itself */
-  char const *ptr;
-} Token;
-
-/* statement info */
-typedef enum Vis
-{
-  V_dot = 0,  /* random pseudo */
-  V_var = 1,  /* var decl/defn */
-  V_func = 2, /* func decl/defn */
-  V_insn = 3, /* random insn */
-  V_label = 4, /* label defn */
-  V_comment = 5,
-  V_pred = 6,  /* predicate */
-  V_mask = 0x7,
-  V_global = 0x08, /* globalize */
-  V_weak = 0x10,   /* weakly globalize */
-  V_no_eol = 0x20, /* no end of line */
-  V_prefix_comment = 0x40 /* prefixed comment */
-} Vis;
-
-typedef struct Stmt
-{
-  struct Stmt *next;
-  Token *tokens;
-  unsigned char vis;
-  unsigned len : 12;
-  unsigned sym : 12;
-} Stmt;
-
 struct id_map
 {
   id_map *next;
   char *ptx_name;
 };
 
-static const char *read_file (FILE *);
-static Token *tokenize (const char *);
-
-static void write_token (FILE *, const Token *);
-static void write_tokens (FILE *, const Token *, unsigned, int);
-
-static Stmt *alloc_stmt (unsigned, Token *, Token *, const Token *);
-#define alloc_comment(S,E) alloc_stmt (V_comment, S, E, 0)
-#define append_stmt(V, S) ((S)->next = *(V), *(V) = (S))
-static Stmt *rev_stmts (Stmt *);
-static void write_stmt (FILE *, const Stmt *);
-static void write_stmts (FILE *, const Stmt *);
-
-static Token *parse_insn (Token *);
-static Token *parse_list_nosemi (Token *);
-static Token *parse_init (Token *);
-static Token *parse_file (Token *);
-
-static Stmt *decls;
-static Stmt *vars;
-static Stmt *fns;
-
 static id_map *func_ids, **funcs_tail = &func_ids;
 static id_map *var_ids, **vars_tail = &var_ids;
 
@@ -183,7 +111,7 @@ record_id (const char *p1, id_map ***where)
    remember, there could be a NUL in the file itself.  */
 
 static const char *
-read_file (FILE *stream)
+read_file (FILE *stream, size_t *plen)
 {
   size_t alloc = 16384;
   size_t base = 0;
@@ -213,557 +141,10 @@ read_file (FILE *stream)
 	}
     }
   buffer[base] = 0;
+  *plen = base;
   return buffer;
 }
 
-/* Read a token, advancing ptr.
-   If we read a comment, append it to the comments block. */
-
-static Token *
-tokenize (const char *ptr)
-{
-  unsigned alloc = 1000;
-  unsigned num = 0;
-  Token *toks = XNEWVEC (Token, alloc);
-  int in_comment = 0;
-  int not_comment = 0;
-
-  for (;; num++)
-    {
-      const char *base;
-      unsigned kind;
-      int ws = 0;
-      int eol = 0;
-
-    again:
-      base = ptr;
-      if (in_comment)
-	goto block_comment;
-      switch (kind = *ptr++)
-	{
-	default:
-	  break;
-
-	case '\n':
-	  eol = 1;
-	  /* Fall through */
-	case ' ':
-	case '\t':
-	case '\r':
-	case '\v':
-	  /* White space */
-	  ws = not_comment;
-	  goto again;
-
-	case '/':
-	  {
-	    if (*ptr == '/')
-	      {
-		/* line comment.  Do not include trailing \n */
-		base += 2;
-		for (; *ptr; ptr++)
-		  if (*ptr == '\n')
-		    break;
-		kind = K_comment;
-	      }
-	    else if (*ptr == '*')
-	      {
-		/* block comment */
-		base += 2;
-		ptr++;
-
-	      block_comment:
-		eol = in_comment;
-		in_comment = 1;
-		for (; *ptr; ptr++)
-		  {
-		    if (*ptr == '\n')
-		      {
-			ptr++;
-			break;
-		      }
-		    if (ptr[0] == '*' && ptr[1] == '/')
-		      {
-			in_comment = 2;
-			ptr += 2;
-			break;
-		      }
-		  }
-		kind = K_comment;
-	      }
-	    else
-	      break;
-	  }
-	  break;
-
-	case '"':
-	  /* quoted string */
-	  kind = K_string;
-	  while (*ptr)
-	    if (*ptr == '"')
-	      {
-		ptr++;
-		break;
-	      }
-	    else if (*ptr++ == '\\')
-	      ptr++;
-	  break;
-
-	case '.':
-	  if (*ptr < '0' || *ptr > '9')
-	    {
-	      kind = K_dotted;
-	      ws = not_comment;
-	      goto ident;
-	    }
-	  /* FALLTHROUGH */
-	case '0'...'9':
-	  kind = K_number;
-	  goto ident;
-	  break;
-
-	case '$':  /* local labels.  */
-	case '%':  /* register names, pseudoes etc */
-	  kind = K_ident;
-	  goto ident;
-
-	case 'a'...'z':
-	case 'A'...'Z':
-	case '_':
-	  kind = K_symbol; /* possible symbol name */
-	ident:
-	  for (; *ptr; ptr++)
-	    {
-	      if (*ptr >= 'A' && *ptr <= 'Z')
-		continue;
-	      if (*ptr >= 'a' && *ptr <= 'z')
-		continue;
-	      if (*ptr >= '0' && *ptr <= '9')
-		continue;
-	      if (*ptr == '_' || *ptr == '$')
-		continue;
-	      if (*ptr == '.' && kind != K_dotted)
-		/* Idents starting with a dot, cannot have internal dots. */
-		continue;
-	      if ((*ptr == '+' || *ptr == '-')
-		  && kind == K_number
-		  && (ptr[-1] == 'e' || ptr[-1] == 'E'
-		      || ptr[-1] == 'p' || ptr[-1] == 'P'))
-		/* exponent */
-		continue;
-	      break;
-	    }
-	  if (*ptr == ':')
-	    {
-	      ptr++;
-	      kind = K_label;
-	    }
-	  break;
-	}
-
-      if (alloc == num)
-	{
-	  alloc *= 2;
-	  toks = XRESIZEVEC (Token, toks, alloc);
-	}
-      Token *tok = toks + num;
-
-      tok->kind = kind;
-      tok->space = ws;
-      tok->end = 0;
-      tok->ptr = base;
-      tok->len = ptr - base - in_comment;
-      in_comment &= 1;
-      not_comment = kind != K_comment;
-      if (eol && num)
-	tok[-1].end = 1;
-      if (!kind)
-	break;
-    }
-
-  return toks;
-}
-
-/* Write an encoded token. */
-
-static void
-write_token (FILE *out, Token const *tok)
-{
-  if (tok->space)
-    fputc (' ', out);
-
-  switch (tok->kind)
-    {
-    case K_string:
-      {
-	const char *c = tok->ptr + 1;
-	size_t len = tok->len - 2;
-
-	fputs ("\\\"", out);
-	while (len)
-	  {
-	    const char *bs = (const char *)memchr (c, '\\', len);
-	    size_t l = bs ? bs - c : len;
-
-	    fprintf (out, "%.*s", (int)l, c);
-	    len -= l;
-	    c += l;
-	    if (bs)
-	      {
-		fputs ("\\\\", out);
-		len--, c++;
-	      }
-	  }
-	fputs ("\\\"", out);
-      }
-      break;
-
-    default:
-      /* All other tokens shouldn't have anything magic in them */
-      fprintf (out, "%.*s", tok->len, tok->ptr);
-      break;
-    }
-  if (tok->end)
-    fputs ("\\n", out);
-}
-
-static void
-write_tokens (FILE *out, Token const *toks, unsigned len, int spc)
-{
-  fputs ("\t\"", out);
-  for (; len--; toks++)
-    write_token (out, toks);
-  if (spc)
-    fputs (" ", out);
-  fputs ("\"", out);
-}
-
-static Stmt *
-alloc_stmt (unsigned vis, Token *tokens, Token *end, Token const *sym)
-{
-  static unsigned alloc = 0;
-  static Stmt *heap = 0;
-
-  if (!alloc)
-    {
-      alloc = 1000;
-      heap = XNEWVEC (Stmt, alloc);
-    }
-
-  Stmt *stmt = heap++;
-  alloc--;
-
-  tokens->space = 0;
-  stmt->next = 0;
-  stmt->vis = vis;
-  stmt->tokens = tokens;
-  stmt->len = end - tokens;
-  stmt->sym = sym ? sym - tokens : ~0;
-
-  return stmt;
-}
-
-static Stmt *
-rev_stmts (Stmt *stmt)
-{
-  Stmt *prev = 0;
-  Stmt *next;
-
-  while (stmt)
-    {
-      next = stmt->next;
-      stmt->next = prev;
-      prev = stmt;
-      stmt = next;
-    }
-
-  return prev;
-}
-
-static void
-write_stmt (FILE *out, const Stmt *stmt)
-{
-  if ((stmt->vis & V_mask) != V_comment)
-    {
-      write_tokens (out, stmt->tokens, stmt->len,
-		    (stmt->vis & V_mask) == V_pred);
-      fputs (stmt->vis & V_no_eol ? "\t" : "\n", out);
-    }
-}
-
-static void
-write_stmts (FILE *out, const Stmt *stmts)
-{
-  for (; stmts; stmts = stmts->next)
-    write_stmt (out, stmts);
-}
-
-static Token *
-parse_insn (Token *tok)
-{
-  unsigned depth = 0;
-
-  do
-    {
-      Stmt *stmt;
-      Token *sym = 0;
-      unsigned s = V_insn;
-      Token *start = tok;
-
-      switch (tok++->kind)
-	{
-	case K_comment:
-	  while (tok->kind == K_comment)
-	    tok++;
-	  stmt = alloc_comment (start, tok);
-	  append_stmt (&fns, stmt);
-	  continue;
-
-	case '{':
-	  depth++;
-	  break;
-
-	case '}':
-	  depth--;
-	  break;
-
-	case K_label:
-	  if (tok[-1].ptr[0] != '$')
-	    sym = tok - 1;
-	  tok[-1].end = 1;
-	  s = V_label;
-	  break;
-
-	case '@':
-	  tok->space = 0;
-	  if (tok->kind == '!')
-	    tok++;
-	  if (tok->kind == K_symbol)
-	    sym = tok;
-	  tok++;
-	  s = V_pred;
-	  break;
-
-	default:
-	  for (; tok->kind != ';'; tok++)
-	    {
-	      if (tok->kind == ',')
-		tok[1].space = 0;
-	      else if (tok->kind == K_symbol)
-		sym = tok;
-	    }
-	  tok++->end = 1;
-	  break;
-	}
-
-      stmt = alloc_stmt (s, start, tok, sym);
-      append_stmt (&fns, stmt);
-
-      if (!tok[-1].end && tok[0].kind == K_comment)
-	{
-	  stmt->vis |= V_no_eol;
-	  stmt = alloc_comment (tok, tok + 1);
-	  append_stmt (&fns, stmt);
-	  tok++;
-	}
-    }
-  while (depth);
-
-  return tok;
-}
-
-/* comma separated list of tokens */
-
-static Token *
-parse_list_nosemi (Token *tok)
-{
-  Token *start = tok;
-
-  do
-    if (!(++tok)->kind)
-      break;
-  while ((++tok)->kind == ',');
-
-  tok[-1].end = 1;
-  Stmt *stmt = alloc_stmt (V_dot, start, tok, 0);
-  append_stmt (&decls, stmt);
-
-  return tok;
-}
-
-#define is_keyword(T,S) \
-  (sizeof (S) == (T)->len && !memcmp ((T)->ptr + 1, (S), (T)->len - 1))
-
-static Token *
-parse_init (Token *tok)
-{
-  for (;;)
-    {
-      Token *start = tok;
-      Token const *sym = 0;
-      Stmt *stmt;
-
-      if (tok->kind == K_comment)
-	{
-	  while (tok->kind == K_comment)
-	    tok++;
-	  stmt = alloc_comment (start, tok);
-	  append_stmt (&vars, stmt);
-	  start = tok;
-	}
-
-      if (tok->kind == '{')
-	tok[1].space = 0;
-      for (; tok->kind != ',' && tok->kind != ';'; tok++)
-	if (tok->kind == K_symbol)
-	  sym = tok;
-      tok[1].space = 0;
-      int end = tok++->kind == ';';
-      stmt = alloc_stmt (V_insn, start, tok, sym);
-      append_stmt (&vars, stmt);
-      if (!tok[-1].end && tok->kind == K_comment)
-	{
-	  stmt->vis |= V_no_eol;
-	  stmt = alloc_comment (tok, tok + 1);
-	  append_stmt (&vars, stmt);
-	  tok++;
-	}
-      if (end)
-	break;
-    }
-  return tok;
-}
-
-static Token *
-parse_file (Token *tok)
-{
-  Stmt *comment = 0;
-
-  if (tok->kind == K_comment)
-    {
-      Token *start = tok;
-
-      while (tok->kind == K_comment)
-	{
-	  if (strncmp (tok->ptr, ":VAR_MAP ", 9) == 0)
-	    record_id (tok->ptr + 9, &vars_tail);
-	  if (strncmp (tok->ptr, ":FUNC_MAP ", 10) == 0)
-	    record_id (tok->ptr + 10, &funcs_tail);
-	  tok++;
-	}
-      comment = alloc_comment (start, tok);
-      comment->vis |= V_prefix_comment;
-    }
-
-  if (tok->kind == K_dotted)
-    {
-      if (is_keyword (tok, "version")
-	  || is_keyword (tok, "target")
-	  || is_keyword (tok, "address_size"))
-	{
-	  if (comment)
-	    append_stmt (&decls, comment);
-	  tok = parse_list_nosemi (tok);
-	}
-      else
-	{
-	  unsigned vis = 0;
-	  const Token *def = 0;
-	  unsigned is_decl = 0;
-	  Token *start;
-
-	  for (start = tok;
-	       tok->kind && tok->kind != '=' && tok->kind != K_comment
-		 && tok->kind != '{' && tok->kind != ';'; tok++)
-	    {
-	      if (is_keyword (tok, "global")
-		  || is_keyword (tok, "const"))
-		vis |= V_var;
-	      else if (is_keyword (tok, "func")
-		       || is_keyword (tok, "entry"))
-		vis |= V_func;
-	      else if (is_keyword (tok, "visible"))
-		vis |= V_global;
-	      else if (is_keyword (tok, "extern"))
-		is_decl = 1;
-	      else if (is_keyword (tok, "weak"))
-		vis |= V_weak;
-	      if (tok->kind == '(')
-		{
-		  tok[1].space = 0;
-		  tok[0].space = 1;
-		}
-	      else if (tok->kind == ')' && tok[1].kind != ';')
-		tok[1].space = 1;
-
-	      if (tok->kind == K_symbol)
-		def = tok;
-	    }
-
-	  if (!tok->kind)
-	    {
-	      /* end of file */
-	      if (comment)
-		append_stmt (&fns, comment);
-	    }
-	  else if (tok->kind == '{'
-		   || tok->kind == K_comment)
-	    {
-	      /* function defn */
-	      Stmt *stmt = alloc_stmt (vis, start, tok, def);
-	      if (comment)
-		{
-		  append_stmt (&fns, comment);
-		  stmt->vis |= V_prefix_comment;
-		}
-	      append_stmt (&fns, stmt);
-	      tok = parse_insn (tok);
-	    }
-	  else
-	    {
-	      int assign = tok->kind == '=';
-
-	      tok++->end = 1;
-	      if ((vis & V_mask) == V_var && !is_decl)
-		{
-		  /* variable */
-		  Stmt *stmt = alloc_stmt (vis, start, tok, def);
-		  if (comment)
-		    {
-		      append_stmt (&vars, comment);
-		      stmt->vis |= V_prefix_comment;
-		    }
-		  append_stmt (&vars, stmt);
-		  if (assign)
-		    tok = parse_init (tok);
-		}
-	      else
-		{
-		  /* declaration */
-		  Stmt *stmt = alloc_stmt (vis, start, tok, 0);
-		  if (comment)
-		    {
-		      append_stmt (&decls, comment);
-		      stmt->vis |= V_prefix_comment;
-		    }
-		  append_stmt (&decls, stmt);
-		}
-	    }
-	}
-    }
-  else
-    {
-      /* Something strange.  Ignore it.  */
-      if (comment)
-	append_stmt (&fns, comment);
-
-      do
-	tok++;
-      while (tok->kind && !tok->end);
-    }
-  return tok;
-}
-
 /* Parse STR, saving found tokens into PVALUES and return their number.
    Tokens are assumed to be delimited by ':'.  */
 static unsigned
@@ -839,22 +220,55 @@ access_check (const char *name, int mode)
 static void
 process (FILE *in, FILE *out)
 {
-  const char *input = read_file (in);
-  Token *tok = tokenize (input);
+  size_t len = 0;
+  const char *input = read_file (in, &len);
   const char *comma;
   id_map const *id;
   unsigned obj_count = 0;
   unsigned ix;
 
-  do
-    tok = parse_file (tok);
-  while (tok->kind);
+  /* Dump out char arrays for each PTX object file.  These are
+     terminated by a NUL.  */
+  for (size_t i = 0; i != len;)
+    {
+      char c;
 
-  fprintf (out, "static const char ptx_code_%u[] = \n", obj_count++);
-  write_stmts (out, rev_stmts (decls));
-  write_stmts (out, rev_stmts (vars));
-  write_stmts (out, rev_stmts (fns));
-  fprintf (out, ";\n\n");
+      fprintf (out, "static const char ptx_code_%u[] =\n\t\"", obj_count++);
+      while ((c = input[i++]))
+	{
+	  switch (c)
+	    {
+	    case '\r':
+	      continue;
+	    case '\n':
+	      fprintf (out, "\\n\"\n\t\"");
+	      /* Look for mappings on subsequent lines.  */
+	      while (strncmp (input + i, "//:", 3) == 0)
+		{
+		  i += 3;
+
+		  if (strncmp (input + i, "VAR_MAP ", 8) == 0)
+		    record_id (input + i + 8, &vars_tail);
+		  else if (strncmp (input + i, "FUNC_MAP ", 9) == 0)
+		    record_id (input + i + 9, &funcs_tail);
+		  else
+		    abort ();
+		  /* Skip to next line. */
+		  while (input[i++] != '\n')
+		    continue;
+		}
+	      continue;
+	    case '"':
+	    case '\\':
+	      putc ('\\', out);
+	      break;
+	    default:
+	      break;
+	    }
+	  putc (c, out);
+	}
+      fprintf (out, "\";\n\n");
+    }
 
   /* Dump out array of pointers to ptx object strings.  */
   fprintf (out, "static const struct ptx_obj {\n"
@@ -1068,7 +482,6 @@ main (int argc, char **argv)
     default:
       gcc_unreachable ();
     }
-  obstack_ptr_grow (&argv_obstack, "-S");
 
   for (int ix = 1; ix != argc; ix++)
     {
diff --git libgomp/ChangeLog libgomp/ChangeLog
index b38234b..191f21f 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,3 +1,13 @@
+2015-10-02  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* oacc-ptx.h: Remove file, moving its content into...
+	* config/nvptx/fortran.c: ... here...
+	* config/nvptx/oacc-init.c: ..., here...
+	* config/nvptx/oacc-parallel.c: ..., and here.
+	* config/nvptx/openacc.f90: New file.
+	* plugin/plugin-nvptx.c: Don't include "oacc-ptx.h".
+	(link_ptx): Don't link in predefined bits of PTX code.
+
 2015-09-30  Nathan Sidwell  <nathan@codesourcery.com>
 	    Bernd Schmidt <bernds@codesourcery.com>
 
diff --git libgomp/config/nvptx/fortran.c libgomp/config/nvptx/fortran.c
index e69de29..58ca790 100644
--- libgomp/config/nvptx/fortran.c
+++ libgomp/config/nvptx/fortran.c
@@ -0,0 +1,40 @@
+/* OpenACC Runtime Fortran wrapper routines
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Temporary hack; this will be provided by libgfortran.  */
+
+extern void _gfortran_abort (void);
+
+__asm__ ("// BEGIN GLOBAL FUNCTION DECL: _gfortran_abort\n"
+	 ".visible .func _gfortran_abort;\n"
+	 "// BEGIN GLOBAL FUNCTION DEF: _gfortran_abort\n"
+	 ".visible .func _gfortran_abort\n"
+	 "{\n"
+	 "trap;\n"
+	 "ret;\n"
+	 "}\n");
diff --git libgomp/config/nvptx/oacc-init.c libgomp/config/nvptx/oacc-init.c
index e69de29..c57a3f3 100644
--- libgomp/config/nvptx/oacc-init.c
+++ libgomp/config/nvptx/oacc-init.c
@@ -0,0 +1,42 @@
+/* OpenACC Runtime initialization routines
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "openacc.h"
+
+/* For -O and higher, the compiler always attempts to expand acc_on_device, but
+   if the user disables the builtin, or calls it via a pointer, we'll need this
+   version.
+
+   Compile this with optimization, so that the compiler expands
+   this, rather than generating infinitely recursive code.  */
+
+int __attribute__ ((__optimize__ ("O2")))
+acc_on_device (acc_device_t dev)
+{
+  return __builtin_acc_on_device (dev);
+}
diff --git libgomp/config/nvptx/oacc-parallel.c libgomp/config/nvptx/oacc-parallel.c
index e69de29..b971256 100644
--- libgomp/config/nvptx/oacc-parallel.c
+++ libgomp/config/nvptx/oacc-parallel.c
@@ -0,0 +1,358 @@
+/* OpenACC constructs
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "libgomp_g.h"
+
+__asm__ (".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n"
+	 "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_num_threads\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n"
+	 "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_thread_num\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n"
+	 "// BEGIN GLOBAL FUNCTION DECL: abort\n"
+	 ".extern .func abort;\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n"
+	 "{\n"
+	 ".reg .u32 %ar1;\n"
+	 ".reg .u32 %retval;\n"
+	 ".reg .u64 %hr10;\n"
+	 ".reg .u32 %r22;\n"
+	 ".reg .u32 %r23;\n"
+	 ".reg .u32 %r24;\n"
+	 ".reg .u32 %r25;\n"
+	 ".reg .u32 %r26;\n"
+	 ".reg .u32 %r27;\n"
+	 ".reg .u32 %r28;\n"
+	 ".reg .u32 %r29;\n"
+	 ".reg .pred %r30;\n"
+	 ".reg .u32 %r31;\n"
+	 ".reg .pred %r32;\n"
+	 ".reg .u32 %r33;\n"
+	 ".reg .pred %r34;\n"
+	 ".local .align 8 .b8 %frame[4];\n"
+	 "ld.param.u32 %ar1,[%in_ar1];\n"
+	 "mov.u32 %r27,%ar1;\n"
+	 "st.local.u32 [%frame],%r27;\n"
+	 "ld.local.u32 %r28,[%frame];\n"
+	 "mov.u32 %r29,1;\n"
+	 "setp.eq.u32 %r30,%r28,%r29;\n"
+	 "@%r30 bra $L4;\n"
+	 "mov.u32 %r31,2;\n"
+	 "setp.eq.u32 %r32,%r28,%r31;\n"
+	 "@%r32 bra $L5;\n"
+	 "mov.u32 %r33,0;\n"
+	 "setp.eq.u32 %r34,%r28,%r33;\n"
+	 "@!%r34 bra $L8;\n"
+	 "mov.u32 %r23,%tid.x;\n"
+	 "mov.u32 %r22,%r23;\n"
+	 "bra $L7;\n"
+	 "$L4:\n"
+	 "mov.u32 %r24,%tid.y;\n"
+	 "mov.u32 %r22,%r24;\n"
+	 "bra $L7;\n"
+	 "$L5:\n"
+	 "mov.u32 %r25,%tid.z;\n"
+	 "mov.u32 %r22,%r25;\n"
+	 "bra $L7;\n"
+	 "$L8:\n"
+	 "{\n"
+	 "{\n"
+	 "call abort;\n"
+	 "}\n"
+	 "}\n"
+	 "$L7:\n"
+	 "mov.u32 %r26,%r22;\n"
+	 "mov.u32 %retval,%r26;\n"
+	 "st.param.u32 [%out_retval],%retval;\n"
+	 "ret;\n"
+	 "}\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n"
+	 "{\n"
+	 ".reg .u32 %ar1;\n"
+	 ".reg .u32 %retval;\n"
+	 ".reg .u64 %hr10;\n"
+	 ".reg .u32 %r22;\n"
+	 ".reg .u32 %r23;\n"
+	 ".reg .u32 %r24;\n"
+	 ".reg .u32 %r25;\n"
+	 ".reg .u32 %r26;\n"
+	 ".reg .u32 %r27;\n"
+	 ".reg .u32 %r28;\n"
+	 ".reg .u32 %r29;\n"
+	 ".reg .pred %r30;\n"
+	 ".reg .u32 %r31;\n"
+	 ".reg .pred %r32;\n"
+	 ".reg .u32 %r33;\n"
+	 ".reg .pred %r34;\n"
+	 ".local .align 8 .b8 %frame[4];\n"
+	 "ld.param.u32 %ar1,[%in_ar1];\n"
+	 "mov.u32 %r27,%ar1;\n"
+	 "st.local.u32 [%frame],%r27;\n"
+	 "ld.local.u32 %r28,[%frame];\n"
+	 "mov.u32 %r29,1;\n"
+	 "setp.eq.u32 %r30,%r28,%r29;\n"
+	 "@%r30 bra $L11;\n"
+	 "mov.u32 %r31,2;\n"
+	 "setp.eq.u32 %r32,%r28,%r31;\n"
+	 "@%r32 bra $L12;\n"
+	 "mov.u32 %r33,0;\n"
+	 "setp.eq.u32 %r34,%r28,%r33;\n"
+	 "@!%r34 bra $L15;\n"
+	 "mov.u32 %r23,%ntid.x;\n"
+	 "mov.u32 %r22,%r23;\n"
+	 "bra $L14;\n"
+	 "$L11:\n"
+	 "mov.u32 %r24,%ntid.y;\n"
+	 "mov.u32 %r22,%r24;\n"
+	 "bra $L14;\n"
+	 "$L12:\n"
+	 "mov.u32 %r25,%ntid.z;\n"
+	 "mov.u32 %r22,%r25;\n"
+	 "bra $L14;\n"
+	 "$L15:\n"
+	 "{\n"
+	 "{\n"
+	 "call abort;\n"
+	 "}\n"
+	 "}\n"
+	 "$L14:\n"
+	 "mov.u32 %r26,%r22;\n"
+	 "mov.u32 %retval,%r26;\n"
+	 "st.param.u32 [%out_retval],%retval;\n"
+	 "ret;\n"
+	 "}\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n"
+	 "{\n"
+	 ".reg .u32 %ar1;\n"
+	 ".reg .u32 %retval;\n"
+	 ".reg .u64 %hr10;\n"
+	 ".reg .u32 %r22;\n"
+	 ".reg .u32 %r23;\n"
+	 ".reg .u32 %r24;\n"
+	 ".reg .u32 %r25;\n"
+	 ".reg .u32 %r26;\n"
+	 ".reg .u32 %r27;\n"
+	 ".reg .u32 %r28;\n"
+	 ".reg .u32 %r29;\n"
+	 ".reg .pred %r30;\n"
+	 ".reg .u32 %r31;\n"
+	 ".reg .pred %r32;\n"
+	 ".reg .u32 %r33;\n"
+	 ".reg .pred %r34;\n"
+	 ".local .align 8 .b8 %frame[4];\n"
+	 "ld.param.u32 %ar1,[%in_ar1];\n"
+	 "mov.u32 %r27,%ar1;\n"
+	 "st.local.u32 [%frame],%r27;\n"
+	 "ld.local.u32 %r28,[%frame];\n"
+	 "mov.u32 %r29,1;\n"
+	 "setp.eq.u32 %r30,%r28,%r29;\n"
+	 "@%r30 bra $L18;\n"
+	 "mov.u32 %r31,2;\n"
+	 "setp.eq.u32 %r32,%r28,%r31;\n"
+	 "@%r32 bra $L19;\n"
+	 "mov.u32 %r33,0;\n"
+	 "setp.eq.u32 %r34,%r28,%r33;\n"
+	 "@!%r34 bra $L22;\n"
+	 "mov.u32 %r23,%ctaid.x;\n"
+	 "mov.u32 %r22,%r23;\n"
+	 "bra $L21;\n"
+	 "$L18:\n"
+	 "mov.u32 %r24,%ctaid.y;\n"
+	 "mov.u32 %r22,%r24;\n"
+	 "bra $L21;\n"
+	 "$L19:\n"
+	 "mov.u32 %r25,%ctaid.z;\n"
+	 "mov.u32 %r22,%r25;\n"
+	 "bra $L21;\n"
+	 "$L22:\n"
+	 "{\n"
+	 "{\n"
+	 "call abort;\n"
+	 "}\n"
+	 "}\n"
+	 "$L21:\n"
+	 "mov.u32 %r26,%r22;\n"
+	 "mov.u32 %retval,%r26;\n"
+	 "st.param.u32 [%out_retval],%retval;\n"
+	 "ret;\n"
+	 "}\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n"
+	 "{\n"
+	 ".reg .u32 %ar1;\n"
+	 ".reg .u32 %retval;\n"
+	 ".reg .u64 %hr10;\n"
+	 ".reg .u32 %r22;\n"
+	 ".reg .u32 %r23;\n"
+	 ".reg .u32 %r24;\n"
+	 ".reg .u32 %r25;\n"
+	 ".reg .u32 %r26;\n"
+	 ".reg .u32 %r27;\n"
+	 ".reg .u32 %r28;\n"
+	 ".reg .u32 %r29;\n"
+	 ".reg .pred %r30;\n"
+	 ".reg .u32 %r31;\n"
+	 ".reg .pred %r32;\n"
+	 ".reg .u32 %r33;\n"
+	 ".reg .pred %r34;\n"
+	 ".local .align 8 .b8 %frame[4];\n"
+	 "ld.param.u32 %ar1,[%in_ar1];\n"
+	 "mov.u32 %r27,%ar1;\n"
+	 "st.local.u32 [%frame],%r27;\n"
+	 "ld.local.u32 %r28,[%frame];\n"
+	 "mov.u32 %r29,1;\n"
+	 "setp.eq.u32 %r30,%r28,%r29;\n"
+	 "@%r30 bra $L25;\n"
+	 "mov.u32 %r31,2;\n"
+	 "setp.eq.u32 %r32,%r28,%r31;\n"
+	 "@%r32 bra $L26;\n"
+	 "mov.u32 %r33,0;\n"
+	 "setp.eq.u32 %r34,%r28,%r33;\n"
+	 "@!%r34 bra $L29;\n"
+	 "mov.u32 %r23,%nctaid.x;\n"
+	 "mov.u32 %r22,%r23;\n"
+	 "bra $L28;\n"
+	 "$L25:\n"
+	 "mov.u32 %r24,%nctaid.y;\n"
+	 "mov.u32 %r22,%r24;\n"
+	 "bra $L28;\n"
+	 "$L26:\n"
+	 "mov.u32 %r25,%nctaid.z;\n"
+	 "mov.u32 %r22,%r25;\n"
+	 "bra $L28;\n"
+	 "$L29:\n"
+	 "{\n"
+	 "{\n"
+	 "call abort;\n"
+	 "}\n"
+	 "}\n"
+	 "$L28:\n"
+	 "mov.u32 %r26,%r22;\n"
+	 "mov.u32 %retval,%r26;\n"
+	 "st.param.u32 [%out_retval],%retval;\n"
+	 "ret;\n"
+	 "}\n"
+	 "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_num_threads\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n"
+	 "{\n"
+	 ".reg .u32 %retval;\n"
+	 ".reg .u64 %hr10;\n"
+	 ".reg .u32 %r22;\n"
+	 ".reg .u32 %r23;\n"
+	 ".reg .u32 %r24;\n"
+	 ".reg .u32 %r25;\n"
+	 ".reg .u32 %r26;\n"
+	 ".reg .u32 %r27;\n"
+	 ".reg .u32 %r28;\n"
+	 ".reg .u32 %r29;\n"
+	 "mov.u32 %r26,0;\n"
+	 "{\n"
+	 ".param .u32 %retval_in;\n"
+	 "{\n"
+	 ".param .u32 %out_arg0;\n"
+	 "st.param.u32 [%out_arg0],%r26;\n"
+	 "call (%retval_in),GOACC_ntid,(%out_arg0);\n"
+	 "}\n"
+	 "ld.param.u32 %r27,[%retval_in];\n"
+	 "}\n"
+	 "mov.u32 %r22,%r27;\n"
+	 "mov.u32 %r28,0;\n"
+	 "{\n"
+	 ".param .u32 %retval_in;\n"
+	 "{\n"
+	 ".param .u32 %out_arg0;\n"
+	 "st.param.u32 [%out_arg0],%r28;\n"
+	 "call (%retval_in),GOACC_nctaid,(%out_arg0);\n"
+	 "}\n"
+	 "ld.param.u32 %r29,[%retval_in];\n"
+	 "}\n"
+	 "mov.u32 %r23,%r29;\n"
+	 "mul.lo.u32 %r24,%r22,%r23;\n"
+	 "mov.u32 %r25,%r24;\n"
+	 "mov.u32 %retval,%r25;\n"
+	 "st.param.u32 [%out_retval],%retval;\n"
+	 "ret;\n"
+	 "}\n"
+	 "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_thread_num\n"
+	 ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n"
+	 "{\n"
+	 ".reg .u32 %retval;\n"
+	 ".reg .u64 %hr10;\n"
+	 ".reg .u32 %r22;\n"
+	 ".reg .u32 %r23;\n"
+	 ".reg .u32 %r24;\n"
+	 ".reg .u32 %r25;\n"
+	 ".reg .u32 %r26;\n"
+	 ".reg .u32 %r27;\n"
+	 ".reg .u32 %r28;\n"
+	 ".reg .u32 %r29;\n"
+	 ".reg .u32 %r30;\n"
+	 ".reg .u32 %r31;\n"
+	 ".reg .u32 %r32;\n"
+	 ".reg .u32 %r33;\n"
+	 "mov.u32 %r28,0;\n"
+	 "{\n"
+	 ".param .u32 %retval_in;\n"
+	 "{\n"
+	 ".param .u32 %out_arg0;\n"
+	 "st.param.u32 [%out_arg0],%r28;\n"
+	 "call (%retval_in),GOACC_ntid,(%out_arg0);\n"
+	 "}\n"
+	 "ld.param.u32 %r29,[%retval_in];\n"
+	 "}\n"
+	 "mov.u32 %r22,%r29;\n"
+	 "mov.u32 %r30,0;\n"
+	 "{\n"
+	 ".param .u32 %retval_in;\n"
+	 "{\n"
+	 ".param .u32 %out_arg0;\n"
+	 "st.param.u32 [%out_arg0],%r30;\n"
+	 "call (%retval_in),GOACC_ctaid,(%out_arg0);\n"
+	 "}\n"
+	 "ld.param.u32 %r31,[%retval_in];\n"
+	 "}\n"
+	 "mov.u32 %r23,%r31;\n"
+	 "mul.lo.u32 %r24,%r22,%r23;\n"
+	 "mov.u32 %r32,0;\n"
+	 "{\n"
+	 ".param .u32 %retval_in;\n"
+	 "{\n"
+	 ".param .u32 %out_arg0;\n"
+	 "st.param.u32 [%out_arg0],%r32;\n"
+	 "call (%retval_in),GOACC_tid,(%out_arg0);\n"
+	 "}\n"
+	 "ld.param.u32 %r33,[%retval_in];\n"
+	 "}\n"
+	 "mov.u32 %r25,%r33;\n"
+	 "add.u32 %r26,%r24,%r25;\n"
+	 "mov.u32 %r27,%r26;\n"
+	 "mov.u32 %retval,%r27;\n"
+	 "st.param.u32 [%out_retval],%retval;\n"
+	 "ret;\n"
+	 "}\n");
diff --git libgomp/config/nvptx/openacc.f90 libgomp/config/nvptx/openacc.f90
new file mode 100644
index 0000000..d8b5c06
--- /dev/null
+++ libgomp/config/nvptx/openacc.f90
@@ -0,0 +1,102 @@
+!  OpenACC Runtime Library Definitions.
+
+!  Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+!  Contributed by Tobias Burnus <burnus@net-b.de>
+!              and Mentor Embedded.
+
+!  This file is part of the GNU Offloading and Multi Processing Library
+!  (libgomp).
+
+!  Libgomp is free software; you can redistribute it and/or modify it
+!  under the terms of the GNU General Public License as published by
+!  the Free Software Foundation; either version 3, or (at your option)
+!  any later version.
+
+!  Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+!  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+!  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+!  more details.
+
+!  Under Section 7 of GPL version 3, you are granted additional
+!  permissions described in the GCC Runtime Library Exception, version
+!  3.1, as published by the Free Software Foundation.
+
+!  You should have received a copy of the GNU General Public License and
+!  a copy of the GCC Runtime Library Exception along with this program;
+!  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+!  <http://www.gnu.org/licenses/>.
+
+! Wrapper functions will be built from openacc.f90.  We use a separate file
+! here, because for using ../../openacc.f90, implementations are required for
+! all the functions that it wraps, which we currently don't provide, so linking
+! would fail.
+
+module openacc_kinds
+  use iso_fortran_env, only: int32
+  implicit none
+
+  private :: int32
+  public :: acc_device_kind
+
+  integer, parameter :: acc_device_kind = int32
+
+  public :: acc_device_none, acc_device_default, acc_device_host
+  public :: acc_device_not_host, acc_device_nvidia
+
+  ! Keep in sync with include/gomp-constants.h.
+  integer (acc_device_kind), parameter :: acc_device_none = 0
+  integer (acc_device_kind), parameter :: acc_device_default = 1
+  integer (acc_device_kind), parameter :: acc_device_host = 2
+  ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
+  integer (acc_device_kind), parameter :: acc_device_not_host = 4
+  integer (acc_device_kind), parameter :: acc_device_nvidia = 5
+
+end module
+
+module openacc_internal
+  use openacc_kinds
+  implicit none
+
+  interface
+    function acc_on_device_h (d)
+      import
+      integer (acc_device_kind) d
+      logical acc_on_device_h
+    end function
+  end interface
+
+  interface
+    function acc_on_device_l (d) &
+        bind (C, name = "acc_on_device")
+      use iso_c_binding, only: c_int
+      integer (c_int) :: acc_on_device_l
+      integer (c_int), value :: d
+    end function
+  end interface
+end module
+
+module openacc
+  use openacc_kinds
+  use openacc_internal
+  implicit none
+
+  public :: acc_on_device
+
+  interface acc_on_device
+    procedure :: acc_on_device_h
+  end interface
+
+end module openacc
+
+function acc_on_device_h (d)
+  use openacc_internal, only: acc_on_device_l
+  use openacc_kinds
+  integer (acc_device_kind) d
+  logical acc_on_device_h
+  if (acc_on_device_l (d) .eq. 1) then
+    acc_on_device_h = .TRUE.
+  else
+    acc_on_device_h = .FALSE.
+  end if
+end function
diff --git libgomp/oacc-init.c libgomp/oacc-init.c
index 28b9e7a..a0e62a4 100644
--- libgomp/oacc-init.c
+++ libgomp/oacc-init.c
@@ -620,7 +620,11 @@ acc_set_device_num (int ord, acc_device_t d)
 
 ialias (acc_set_device_num)
 
-/* Compile on_device with optimization, so that the compiler expands
+/* For -O and higher, the compiler always attempts to expand acc_on_device, but
+   if the user disables the builtin, or calls it via a pointer, we'll need this
+   version.
+
+   Compile this with optimization, so that the compiler expands
    this, rather than generating infinitely recursive code.  */
 
 int __attribute__ ((__optimize__ ("O2")))
diff --git libgomp/oacc-ptx.h libgomp/oacc-ptx.h
deleted file mode 100644
index 2419a46..0000000
--- libgomp/oacc-ptx.h
+++ /dev/null
@@ -1,426 +0,0 @@
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
-   Contributed by Mentor Embedded.
-
-   This file is part of the GNU Offloading and Multi Processing Library
-   (libgomp).
-
-   Libgomp is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-   more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#define ABORT_PTX				\
-  ".version 3.1\n"				\
-  ".target sm_30\n"				\
-  ".address_size 64\n"				\
-  ".visible .func abort;\n"			\
-  ".visible .func abort\n"			\
-  "{\n"						\
-  "trap;\n"					\
-  "ret;\n"					\
-  "}\n"						\
-  ".visible .func _gfortran_abort;\n"		\
-  ".visible .func _gfortran_abort\n"		\
-  "{\n"						\
-  "trap;\n"					\
-  "ret;\n"					\
-  "}\n" \
-
-/* Generated with:
-
-   $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline
-*/
-#define ACC_ON_DEVICE_PTX						\
-  "        .version        3.1\n"					\
-  "        .target sm_30\n"						\
-  "        .address_size 64\n"						\
-  ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \
-  ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \
-  "{\n"									\
-  "        .reg.u32 %ar1;\n"						\
-  ".reg.u32 %retval;\n"							\
-  "        .reg.u64 %hr10;\n"						\
-  "        .reg.u32 %r24;\n"						\
-  "        .reg.u32 %r25;\n"						\
-  "        .reg.pred %r27;\n"						\
-  "        .reg.u32 %r30;\n"						\
-  "        ld.param.u32 %ar1, [%in_ar1];\n"				\
-  "                mov.u32 %r24, %ar1;\n"				\
-  "                setp.ne.u32 %r27,%r24,4;\n"				\
-  "                set.u32.eq.u32 %r30,%r24,5;\n"			\
-  "                neg.s32 %r25, %r30;\n"				\
-  "        @%r27   bra     $L3;\n"					\
-  "                mov.u32 %r25, 1;\n"					\
-  "$L3:\n"								\
-  "                mov.u32 %retval, %r25;\n"				\
-  "        st.param.u32    [%out_retval], %retval;\n"			\
-  "        ret;\n"							\
-  "        }\n"								\
-  ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \
-  ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \
-  "{\n"									\
-  "        .reg.u64 %ar1;\n"						\
-  ".reg.u32 %retval;\n"							\
-  "        .reg.u64 %hr10;\n"						\
-  "        .reg.u64 %r25;\n"						\
-  "        .reg.u32 %r26;\n"						\
-  "        .reg.u32 %r27;\n"						\
-  "        ld.param.u64 %ar1, [%in_ar1];\n"				\
-  "                mov.u64 %r25, %ar1;\n"				\
-  "                ld.u32  %r26, [%r25];\n"				\
-  "        {\n"								\
-  "                .param.u32 %retval_in;\n"				\
-  "        {\n"								\
-  "                .param.u32 %out_arg0;\n"				\
-  "                st.param.u32 [%out_arg0], %r26;\n"			\
-  "                call (%retval_in), acc_on_device, (%out_arg0);\n"	\
-  "        }\n"								\
-  "                ld.param.u32    %r27, [%retval_in];\n"		\
-  "}\n"									\
-  "                mov.u32 %retval, %r27;\n"				\
-  "        st.param.u32    [%out_retval], %retval;\n"			\
-  "        ret;\n"							\
-  "        }"
-
- #define GOACC_INTERNAL_PTX						\
-  ".version 3.1\n" \
-  ".target sm_30\n" \
-  ".address_size 64\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
-  ".extern .func abort;\n" \
-  ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \
-  "{\n" \
-  ".reg .u32 %ar1;\n" \
-  ".reg .u32 %retval;\n" \
-  ".reg .u64 %hr10;\n" \
-  ".reg .u32 %r22;\n" \
-  ".reg .u32 %r23;\n" \
-  ".reg .u32 %r24;\n" \
-  ".reg .u32 %r25;\n" \
-  ".reg .u32 %r26;\n" \
-  ".reg .u32 %r27;\n" \
-  ".reg .u32 %r28;\n" \
-  ".reg .u32 %r29;\n" \
-  ".reg .pred %r30;\n" \
-  ".reg .u32 %r31;\n" \
-  ".reg .pred %r32;\n" \
-  ".reg .u32 %r33;\n" \
-  ".reg .pred %r34;\n" \
-  ".local .align 8 .b8 %frame[4];\n" \
-  "ld.param.u32 %ar1,[%in_ar1];\n" \
-  "mov.u32 %r27,%ar1;\n" \
-  "st.local.u32 [%frame],%r27;\n" \
-  "ld.local.u32 %r28,[%frame];\n" \
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L4;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L5;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L8;\n"							\
-  "mov.u32 %r23,%tid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L7;\n"								\
-  "$L4:\n"								\
-  "mov.u32 %r24,%tid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L7;\n"								\
-  "$L5:\n"								\
-  "mov.u32 %r25,%tid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L7;\n"								\
-  "$L8:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L7:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \
-  "{\n"									\
-  ".reg .u32 %ar1;\n"							\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .pred %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .pred %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  ".reg .pred %r34;\n"							\
-  ".local .align 8 .b8 %frame[4];\n"					\
-  "ld.param.u32 %ar1,[%in_ar1];\n"					\
-  "mov.u32 %r27,%ar1;\n"						\
-  "st.local.u32 [%frame],%r27;\n"					\
-  "ld.local.u32 %r28,[%frame];\n"					\
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L11;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L12;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L15;\n"							\
-  "mov.u32 %r23,%ntid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L14;\n"								\
-  "$L11:\n"								\
-  "mov.u32 %r24,%ntid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L14;\n"								\
-  "$L12:\n"								\
-  "mov.u32 %r25,%ntid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L14;\n"								\
-  "$L15:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L14:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \
-  "{\n"									\
-  ".reg .u32 %ar1;\n"							\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .pred %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .pred %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  ".reg .pred %r34;\n"							\
-  ".local .align 8 .b8 %frame[4];\n"					\
-  "ld.param.u32 %ar1,[%in_ar1];\n"					\
-  "mov.u32 %r27,%ar1;\n"						\
-  "st.local.u32 [%frame],%r27;\n"					\
-  "ld.local.u32 %r28,[%frame];\n"					\
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L18;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L19;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L22;\n"							\
-  "mov.u32 %r23,%ctaid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L21;\n"								\
-  "$L18:\n"								\
-  "mov.u32 %r24,%ctaid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L21;\n"								\
-  "$L19:\n"								\
-  "mov.u32 %r25,%ctaid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L21;\n"								\
-  "$L22:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L21:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \
-  "{\n"									\
-  ".reg .u32 %ar1;\n"							\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .pred %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .pred %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  ".reg .pred %r34;\n"							\
-  ".local .align 8 .b8 %frame[4];\n"					\
-  "ld.param.u32 %ar1,[%in_ar1];\n"					\
-  "mov.u32 %r27,%ar1;\n"						\
-  "st.local.u32 [%frame],%r27;\n"					\
-  "ld.local.u32 %r28,[%frame];\n"					\
-  "mov.u32 %r29,1;\n"							\
-  "setp.eq.u32 %r30,%r28,%r29;\n"					\
-  "@%r30 bra $L25;\n"							\
-  "mov.u32 %r31,2;\n"							\
-  "setp.eq.u32 %r32,%r28,%r31;\n"					\
-  "@%r32 bra $L26;\n"							\
-  "mov.u32 %r33,0;\n"							\
-  "setp.eq.u32 %r34,%r28,%r33;\n"					\
-  "@!%r34 bra $L29;\n"							\
-  "mov.u32 %r23,%nctaid.x;\n"						\
-  "mov.u32 %r22,%r23;\n"						\
-  "bra $L28;\n"								\
-  "$L25:\n"								\
-  "mov.u32 %r24,%nctaid.y;\n"						\
-  "mov.u32 %r22,%r24;\n"						\
-  "bra $L28;\n"								\
-  "$L26:\n"								\
-  "mov.u32 %r25,%nctaid.z;\n"						\
-  "mov.u32 %r22,%r25;\n"						\
-  "bra $L28;\n"								\
-  "$L29:\n"								\
-  "{\n"									\
-  "{\n"									\
-  "call abort;\n"							\
-  "}\n"									\
-  "}\n"									\
-  "$L28:\n"								\
-  "mov.u32 %r26,%r22;\n"						\
-  "mov.u32 %retval,%r26;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n"	\
-  "{\n"									\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  "mov.u32 %r26,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r26;\n"					\
-  "call (%retval_in),GOACC_ntid,(%out_arg0);\n"				\
-  "}\n"									\
-  "ld.param.u32 %r27,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r22,%r27;\n"						\
-  "mov.u32 %r28,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r28;\n"					\
-  "call (%retval_in),GOACC_nctaid,(%out_arg0);\n"			\
-  "}\n"									\
-  "ld.param.u32 %r29,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r23,%r29;\n"						\
-  "mul.lo.u32 %r24,%r22,%r23;\n"					\
-  "mov.u32 %r25,%r24;\n"						\
-  "mov.u32 %retval,%r25;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"									\
-  ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n"	\
-  "{\n"									\
-  ".reg .u32 %retval;\n"						\
-  ".reg .u64 %hr10;\n"							\
-  ".reg .u32 %r22;\n"							\
-  ".reg .u32 %r23;\n"							\
-  ".reg .u32 %r24;\n"							\
-  ".reg .u32 %r25;\n"							\
-  ".reg .u32 %r26;\n"							\
-  ".reg .u32 %r27;\n"							\
-  ".reg .u32 %r28;\n"							\
-  ".reg .u32 %r29;\n"							\
-  ".reg .u32 %r30;\n"							\
-  ".reg .u32 %r31;\n"							\
-  ".reg .u32 %r32;\n"							\
-  ".reg .u32 %r33;\n"							\
-  "mov.u32 %r28,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r28;\n"					\
-  "call (%retval_in),GOACC_ntid,(%out_arg0);\n"				\
-  "}\n"									\
-  "ld.param.u32 %r29,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r22,%r29;\n"						\
-  "mov.u32 %r30,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r30;\n"					\
-  "call (%retval_in),GOACC_ctaid,(%out_arg0);\n"			\
-  "}\n"									\
-  "ld.param.u32 %r31,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r23,%r31;\n"						\
-  "mul.lo.u32 %r24,%r22,%r23;\n"					\
-  "mov.u32 %r32,0;\n"							\
-  "{\n"									\
-  ".param .u32 %retval_in;\n"						\
-  "{\n"									\
-  ".param .u32 %out_arg0;\n"						\
-  "st.param.u32 [%out_arg0],%r32;\n"					\
-  "call (%retval_in),GOACC_tid,(%out_arg0);\n"				\
-  "}\n"									\
-  "ld.param.u32 %r33,[%retval_in];\n"					\
-  "}\n"									\
-  "mov.u32 %r25,%r33;\n"						\
-  "add.u32 %r26,%r24,%r25;\n"						\
-  "mov.u32 %r27,%r26;\n"						\
-  "mov.u32 %retval,%r27;\n"						\
-  "st.param.u32 [%out_retval],%retval;\n"				\
-  "ret;\n"								\
-  "}\n"
diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c
index cedcc59..9b84637 100644
--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -34,7 +34,6 @@
 #include "openacc.h"
 #include "config.h"
 #include "libgomp-plugin.h"
-#include "oacc-ptx.h"
 #include "oacc-plugin.h"
 #include "gomp-constants.h"
 
@@ -750,35 +749,6 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
   if (r != CUDA_SUCCESS)
     GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r));
 
-  char *abort_ptx = ABORT_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, abort_ptx,
-		     strlen (abort_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (abort) error: %s", cuda_error (r));
-    }
-
-  char *acc_on_device_ptx = ACC_ON_DEVICE_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, acc_on_device_ptx,
-		     strlen (acc_on_device_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (acc_on_device) error: %s",
-			 cuda_error (r));
-    }
-
-  char *goacc_internal_ptx = GOACC_INTERNAL_PTX;
-  r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, goacc_internal_ptx,
-		     strlen (goacc_internal_ptx) + 1, 0, 0, 0, 0);
-  if (r != CUDA_SUCCESS)
-    {
-      GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
-      GOMP_PLUGIN_fatal ("cuLinkAddData (goacc_internal_ptx) error: %s",
-			 cuda_error (r));
-    }
-
   for (; num_objs--; ptx_objs++)
     {
       /* cuLinkAddData's 'data' argument erroneously omits the const


Grüße,
 Thomas

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [gomp4] [nvptx] Don't explicitly pass "-lgomp" to the offload compiler (was: nvptx offloading linking)
  2015-05-13 20:19   ` [gomp4] nvptx offloading linking (was: [WIP] OpenMP 4 NVPTX support) Thomas Schwinge
  2015-10-02 19:46     ` nvptx offloading linking Thomas Schwinge
@ 2015-10-05 16:08     ` Thomas Schwinge
  1 sibling, 0 replies; 11+ messages in thread
From: Thomas Schwinge @ 2015-10-05 16:08 UTC (permalink / raw)
  To: Bernd Schmidt, Jakub Jelinek, gcc-patches, nathan
  Cc: Julian Brown, Tobias Burnus, Ilya Verbin

[-- Attachment #1: Type: text/plain, Size: 2857 bytes --]

Hi!

On Wed, 13 May 2015 22:11:36 +0200, I wrote:
> On Wed, 22 Apr 2015 17:08:26 +0200, Bernd Schmidt <bernds@codesourcery.com> wrote:
> > On 04/21/2015 05:58 PM, Jakub Jelinek wrote:
> > 
> > > suggests that while it is nice that when building nvptx accel compiler
> > > we build libgcc.a, libc.a, libm.a, libgfortran.a (and in the future hopefully libgomp.a),
> > > nothing attempts to link those in :(.
> > 
> > I have that fixed; I expect I'll get around to posting this at some 
> > point now that stage1 is open.
> 
> I have committed the following to gomp-4_0-branch in r223176.  [...]

>     	gcc/
>     	* config/nvptx/mkoffload.c [...]
>     	(main): [...] Add -lgomp.  [...]

> --- gcc/config/nvptx/mkoffload.c
> +++ gcc/config/nvptx/mkoffload.c

> @@ -983,47 +400,74 @@ main (int argc, char **argv)
>    obstack_ptr_grow (&argv_obstack, driver);
>    obstack_ptr_grow (&argv_obstack, "-xlto");
>    obstack_ptr_grow (&argv_obstack, target_ilp32 ? "-m32" : "-m64");
> -[...]
> +  obstack_ptr_grow (&argv_obstack, "-lgomp");

As argued in
<http://news.gmane.org/find-root.php?message_id=%3C87h9m9e1qj.fsf%40schwinge.name%3E>
(-fopenacc/-fopenmp in combination with the libgomp spec file), and now
verified, we don't actually need that (and I had omitted it from the
earlier trunk commit); now reflected on gomp-4_0-branch in r228495:

commit aee77cda31ea36c95020ea12da3d379d859a851b
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Mon Oct 5 16:04:23 2015 +0000

    [nvptx] Don't explicitly pass "-lgomp" to the offload compiler
    
    	gcc/
    	* config/nvptx/mkoffload.c (main): Don't explicitly pass "-lgomp"
    	to the offload compiler.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@228495 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp           |    5 +++++
 gcc/config/nvptx/mkoffload.c |    1 -
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index 8a32190..a65e652 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,8 @@
+2015-10-05  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* config/nvptx/mkoffload.c (main): Don't explicitly pass "-lgomp"
+	to the offload compiler.
+
 2015-10-01  Nathan Sidwell  <nathan@codesourcery.com>
 
 	* builtins.c: Don't include gomp-constants.h.
diff --git gcc/config/nvptx/mkoffload.c gcc/config/nvptx/mkoffload.c
index c8ea8b1..e398b44 100644
--- gcc/config/nvptx/mkoffload.c
+++ gcc/config/nvptx/mkoffload.c
@@ -488,7 +488,6 @@ main (int argc, char **argv)
     default:
       gcc_unreachable ();
     }
-  obstack_ptr_grow (&argv_obstack, "-lgomp");
   char *collect_mkoffload_opts = getenv ("COLLECT_MKOFFLOAD_OPTIONS");
   if (collect_mkoffload_opts)
     {


Grüße,
 Thomas

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-10-05 16:08 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-21 15:58 [WIP] OpenMP 4 NVPTX support Jakub Jelinek
2015-04-22 15:08 ` Bernd Schmidt
2015-05-13 20:19   ` [gomp4] nvptx offloading linking (was: [WIP] OpenMP 4 NVPTX support) Thomas Schwinge
2015-10-02 19:46     ` nvptx offloading linking Thomas Schwinge
2015-10-05 16:08     ` [gomp4] [nvptx] Don't explicitly pass "-lgomp" to the offload compiler (was: nvptx offloading linking) Thomas Schwinge
2015-04-23 14:40 ` [WIP] OpenMP 4 NVPTX support Jakub Jelinek
2015-07-22 16:13 ` Thomas Schwinge
2015-07-22 16:47   ` Jakub Jelinek
2015-08-10 15:56     ` Empty libgomp for nvptx (was: [WIP] OpenMP 4 NVPTX support) Thomas Schwinge
2015-08-10 16:29       ` Empty libgomp for nvptx Thomas Schwinge
2015-08-10 16:31   ` [gomp4] [nvptx] Move GOMP stuff from libgcc to libgomp (was: [WIP] OpenMP 4 NVPTX support) Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).