nvptx offloading: Global constructor, destructor support, via nvptx-tools 'ld' (was: nvptx: Support global constructors/destructors via 'collect2' for offloading)

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Thomas Schwinge <tschwinge@baylibre.com>
To: gcc-patches@gcc.gnu.org
Cc: Tom de Vries <tdevries@suse.de>
Subject: nvptx offloading: Global constructor, destructor support, via nvptx-tools 'ld' (was: nvptx: Support global constructors/destructors via 'collect2' for offloading)
Date: Thu, 06 Jun 2024 14:02:05 +0200	[thread overview]
Message-ID: <87wmn2mg8y.fsf@euler.schwinge.ddns.net> (raw)
In-Reply-To: <87r0wqp7jf.fsf@euler.schwinge.homeip.net>

[-- Attachment #1: Type: text/plain, Size: 868 bytes --]

Hi!

On 2022-12-23T14:35:16+0100, I wrote:
> On 2022-12-02T14:35:35+0100, I wrote:
>> On 2022-12-01T22:13:38+0100, I wrote:
>>> I'm working on support for global constructors/destructors with
>>> GCC/nvptx
>>
>> See "nvptx: Support global constructors/destructors via 'collect2'"
>> [posted before]

..., which I then recently revised; see
commit d9c90c82d900fdae95df4499bf5f0a4ecb903b53
"nvptx target: Global constructor, destructor support, via nvptx-tools 'ld'".

> Building on that, attached is now the additional "for offloading" piece:
> "nvptx: Support global constructors/destructors via 'collect2' for offloading".

Similarly revised, I've now pushed to trunk branch
commit 5bbe5350a0932c78d4ffce292ba4104a6fe6ef96
"nvptx offloading: Global constructor, destructor support, via nvptx-tools 'ld'",
see attached.


Grüße
 Thomas



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-nvptx-offloading-Global-constructor-destructor-suppo.patch --]
[-- Type: text/x-diff, Size: 7460 bytes --]

From 5bbe5350a0932c78d4ffce292ba4104a6fe6ef96 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <tschwinge@baylibre.com>
Date: Wed, 5 Jun 2024 12:40:50 +0200
Subject: [PATCH] nvptx offloading: Global constructor, destructor support, via
 nvptx-tools 'ld'

This extends commit d9c90c82d900fdae95df4499bf5f0a4ecb903b53
"nvptx target: Global constructor, destructor support, via nvptx-tools 'ld'"
for offloading.

	libgcc/
	* config/nvptx/gbl-ctors.c ["mgomp"]
	(__do_global_ctors__entry__mgomp)
	(__do_global_dtors__entry__mgomp): New.
	[!"mgomp"] (__do_global_ctors__entry, __do_global_dtors__entry):
	New.
	libgomp/
	* plugin/plugin-nvptx.c (nvptx_do_global_cdtors): New.
	(nvptx_close_device, GOMP_OFFLOAD_load_image)
	(GOMP_OFFLOAD_unload_image): Call it.
---
 libgcc/config/nvptx/gbl-ctors.c |  55 +++++++++++++++
 libgomp/plugin/plugin-nvptx.c   | 117 +++++++++++++++++++++++++++++++-
 2 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/libgcc/config/nvptx/gbl-ctors.c b/libgcc/config/nvptx/gbl-ctors.c
index a2ca053e5e3..a56d64f8ef8 100644
--- a/libgcc/config/nvptx/gbl-ctors.c
+++ b/libgcc/config/nvptx/gbl-ctors.c
@@ -68,6 +68,61 @@ __gbl_ctors (void)
 }
 
 
+/* For nvptx offloading configurations, need '.entry' wrappers.  */
+
+# if defined(__nvptx_softstack__) && defined(__nvptx_unisimt__)
+
+/* OpenMP */
+
+/* See 'crt0.c', 'mgomp.c'.  */
+extern void *__nvptx_stacks[32] __attribute__((shared,nocommon));
+extern unsigned __nvptx_uni[32] __attribute__((shared,nocommon));
+
+__attribute__((kernel)) void __do_global_ctors__entry__mgomp (void *);
+
+void
+__do_global_ctors__entry__mgomp (void *nvptx_stacks_0)
+{
+  __nvptx_stacks[0] = nvptx_stacks_0;
+  __nvptx_uni[0] = 0;
+
+  __static_do_global_ctors ();
+}
+
+__attribute__((kernel)) void __do_global_dtors__entry__mgomp (void *);
+
+void
+__do_global_dtors__entry__mgomp (void *nvptx_stacks_0)
+{
+  __nvptx_stacks[0] = nvptx_stacks_0;
+  __nvptx_uni[0] = 0;
+
+  __static_do_global_dtors ();
+}
+
+# else
+
+/* OpenACC */
+
+__attribute__((kernel)) void __do_global_ctors__entry (void);
+
+void
+__do_global_ctors__entry (void)
+{
+  __static_do_global_ctors ();
+}
+
+__attribute__((kernel)) void __do_global_dtors__entry (void);
+
+void
+__do_global_dtors__entry (void)
+{
+  __static_do_global_dtors ();
+}
+
+# endif
+
+
 /* The following symbol just provides a means for the nvptx-tools 'ld' to
    trigger linking in this file.  */
 
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 4cedc5390a3..0f3a3be1898 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -346,6 +346,11 @@ static struct ptx_device **ptx_devices;
    default is set here.  */
 static unsigned lowlat_pool_size = 8 * 1024;
 
+static bool nvptx_do_global_cdtors (CUmodule, struct ptx_device *,
+				    const char *);
+static size_t nvptx_stacks_size ();
+static void *nvptx_stacks_acquire (struct ptx_device *, size_t, int);
+
 static inline struct nvptx_thread *
 nvptx_thread (void)
 {
@@ -565,6 +570,18 @@ nvptx_close_device (struct ptx_device *ptx_dev)
   if (!ptx_dev)
     return true;
 
+  bool ret = true;
+
+  for (struct ptx_image_data *image = ptx_dev->images;
+       image != NULL;
+       image = image->next)
+    {
+      if (!nvptx_do_global_cdtors (image->module, ptx_dev,
+				   "__do_global_dtors__entry"
+				   /* or "__do_global_dtors__entry__mgomp" */))
+	ret = false;
+    }
+
   for (struct ptx_free_block *b = ptx_dev->free_blocks; b;)
     {
       struct ptx_free_block *b_next = b->next;
@@ -585,7 +602,8 @@ nvptx_close_device (struct ptx_device *ptx_dev)
     CUDA_CALL (cuCtxDestroy, ptx_dev->ctx);
 
   free (ptx_dev);
-  return true;
+
+  return ret;
 }
 
 static int
@@ -1317,6 +1335,93 @@ nvptx_set_clocktick (CUmodule module, struct ptx_device *dev)
     GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r));
 }
 
+/* Invoke MODULE's global constructors/destructors.  */
+
+static bool
+nvptx_do_global_cdtors (CUmodule module, struct ptx_device *ptx_dev,
+			const char *funcname)
+{
+  bool ret = true;
+  char *funcname_mgomp = NULL;
+  CUresult r;
+  CUfunction funcptr;
+  r = CUDA_CALL_NOCHECK (cuModuleGetFunction,
+			 &funcptr, module, funcname);
+  GOMP_PLUGIN_debug (0, "cuModuleGetFunction (%s): %s\n",
+		     funcname, cuda_error (r));
+  if (r == CUDA_ERROR_NOT_FOUND)
+    {
+      /* Try '[funcname]__mgomp'.  */
+
+      size_t funcname_len = strlen (funcname);
+      const char *mgomp_suffix = "__mgomp";
+      size_t mgomp_suffix_len = strlen (mgomp_suffix);
+      funcname_mgomp
+	= GOMP_PLUGIN_malloc (funcname_len + mgomp_suffix_len + 1);
+      memcpy (funcname_mgomp, funcname, funcname_len);
+      memcpy (funcname_mgomp + funcname_len,
+	      mgomp_suffix, mgomp_suffix_len + 1);
+      funcname = funcname_mgomp;
+
+      r = CUDA_CALL_NOCHECK (cuModuleGetFunction,
+			     &funcptr, module, funcname);
+      GOMP_PLUGIN_debug (0, "cuModuleGetFunction (%s): %s\n",
+			 funcname, cuda_error (r));
+    }
+  if (r == CUDA_ERROR_NOT_FOUND)
+    ;
+  else if (r != CUDA_SUCCESS)
+    {
+      GOMP_PLUGIN_error ("cuModuleGetFunction (%s) error: %s",
+			 funcname, cuda_error (r));
+      ret = false;
+    }
+  else
+    {
+      /* If necessary, set up soft stack.  */
+      void *nvptx_stacks_0;
+      void *kargs[1];
+      if (funcname_mgomp)
+	{
+	  size_t stack_size = nvptx_stacks_size ();
+	  pthread_mutex_lock (&ptx_dev->omp_stacks.lock);
+	  nvptx_stacks_0 = nvptx_stacks_acquire (ptx_dev, stack_size, 1);
+	  nvptx_stacks_0 += stack_size;
+	  kargs[0] = &nvptx_stacks_0;
+	}
+      r = CUDA_CALL_NOCHECK (cuLaunchKernel,
+			     funcptr,
+			     1, 1, 1, 1, 1, 1,
+			     /* sharedMemBytes */ 0,
+			     /* hStream */ NULL,
+			     /* kernelParams */ funcname_mgomp ? kargs : NULL,
+			     /* extra */ NULL);
+      if (r != CUDA_SUCCESS)
+	{
+	  GOMP_PLUGIN_error ("cuLaunchKernel (%s) error: %s",
+			     funcname, cuda_error (r));
+	  ret = false;
+	}
+
+      r = CUDA_CALL_NOCHECK (cuStreamSynchronize,
+			     NULL);
+      if (r != CUDA_SUCCESS)
+	{
+	  GOMP_PLUGIN_error ("cuStreamSynchronize (%s) error: %s",
+			     funcname, cuda_error (r));
+	  ret = false;
+	}
+
+      if (funcname_mgomp)
+	pthread_mutex_unlock (&ptx_dev->omp_stacks.lock);
+    }
+
+  if (funcname_mgomp)
+    free (funcname_mgomp);
+
+  return ret;
+}
+
 /* Load the (partial) program described by TARGET_DATA to device
    number ORD.  Allocate and return TARGET_TABLE.  If not NULL, REV_FN_TABLE
    will contain the on-device addresses of the functions for reverse offload.
@@ -1546,6 +1651,11 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
 
   nvptx_set_clocktick (module, dev);
 
+  if (!nvptx_do_global_cdtors (module, dev,
+			       "__do_global_ctors__entry"
+			       /* or "__do_global_ctors__entry__mgomp" */))
+    return -1;
+
   return fn_entries + var_entries + other_entries;
 }
 
@@ -1571,6 +1681,11 @@ GOMP_OFFLOAD_unload_image (int ord, unsigned version, const void *target_data)
   for (prev_p = &dev->images; (image = *prev_p) != 0; prev_p = &image->next)
     if (image->target_data == target_data)
       {
+	if (!nvptx_do_global_cdtors (image->module, dev,
+				     "__do_global_dtors__entry"
+				     /* or "__do_global_dtors__entry__mgomp" */))
+	  ret = false;
+
 	*prev_p = image->next;
 	if (CUDA_CALL_NOCHECK (cuModuleUnload, image->module) != CUDA_SUCCESS)
 	  ret = false;
-- 
2.34.1

next prev parent reply	other threads:[~2024-06-06 12:02 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <878rjqaku5.fsf@dem-tschwing-1.ger.mentorg.com>
2022-12-02 13:35 ` nvptx: Support global constructors/destructors via 'collect2' Thomas Schwinge
2022-12-20  8:03   ` [PING] " Thomas Schwinge
2023-01-11 11:48     ` [PING^2] " Thomas Schwinge
2023-01-24  9:01     ` Make 'libgcc/config/nvptx/crt0.c' build '--without-headers' (was: [PING] nvptx: Support global constructors/destructors via 'collect2') Thomas Schwinge
2022-12-23 13:35   ` nvptx: Support global constructors/destructors via 'collect2' for offloading (was: " Thomas Schwinge
2022-12-23 13:37     ` Thomas Schwinge
2023-01-11 11:49       ` [PING] " Thomas Schwinge
2023-01-20 20:46     ` [og12] " Thomas Schwinge
2024-06-06 12:02     ` Thomas Schwinge [this message]
2023-01-20 20:41   ` [og12] nvptx: Support global constructors/destructors via 'collect2' Thomas Schwinge
2023-01-20 20:45     ` Thomas Schwinge
2024-05-31 13:15   ` nvptx target: Global constructor, destructor support, via nvptx-tools 'ld' (was: nvptx: Support global constructors/destructors via 'collect2') Thomas Schwinge
2024-06-03  7:28     ` [patch] install.texi (nvptx): Recommend nvptx-tools 2024-05-30 (was: Re: nvptx target: Global constructor, destructor support, via nvptx-tools 'ld') Tobias Burnus
2024-06-03  8:23       ` Richard Biener
2024-06-03  8:37         ` [patch] install.texi (nvptx): Recommend nvptx-tools 2024-05-30 Tobias Burnus
2024-06-03  9:09           ` Richard Biener
2024-06-03 10:26             ` Tobias Burnus
2024-06-03 11:16               ` Richard Biener
2024-06-08 23:01             ` Gerald Pfeifer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87wmn2mg8y.fsf@euler.schwinge.ddns.net \
    --to=tschwinge@baylibre.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=tdevries@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).