public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* fix gomp offload routine unloading
@ 2015-07-20 23:24 Nathan Sidwell
  2015-07-21  9:59 ` Jakub Jelinek
  0 siblings, 1 reply; 5+ messages in thread
From: Nathan Sidwell @ 2015-07-20 23:24 UTC (permalink / raw)
  To: Jakub Jelinek, Thomas Schwinge; +Cc: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 916 bytes --]

I've posted versions of this patch before, but muddled up with other changes. 
Hopefully this version is sufficiently self-contained to be understandable.

When a dynamic object is unregistered, we unload the offloaded routines it 
registed.  We walk the splay tree mapping host-side functions and vars to 
device-side versions.  Unfortunately we do this after having destroyed the splay 
tree itself.  thus we never find anything, and this ends up leaking memory (the 
vector containing the mappings inserted into the tree).

this patch breaks out a new function, gomp_unload_image_from_device, which IHO 
makes the code more understandable in its own right.  But we also arrange to 
call this function before destroying the splay tree.

I rename gomp_offload_image_to_device to gomp_load_image_to_device  for 
consistency -- gomp_offload_unload_image_from_device seemed a bit of a mouthful.

ok for trunk?

nathan

[-- Attachment #2: trunk-unload.patch --]
[-- Type: text/x-patch, Size: 10541 bytes --]

2015-07-20  Nathan Sidwell  <nathan@codesourcery.com>

	libgomp/
	* target.c (gomp_offload_image_to_device): Rename to ...
	(gomp_load_image_to_device): ... here.
	(GOMP_offload_register): Adjust call.
	(gomp_init_device): Likewise.
	(gomp_unload_image_from_device): New.  Broken out of ...
	(GOMP_offload_unregister): ... here.  Call it.
	(gomp_unload_device): New.
	* libgomp.h (gomp_unload_device): Declare.
	* oacc-init.c (acc_shutdown_1): Unload from device before deleting
	mem maps.

	gcc/
	* config/nvptx/mkoffload.c (process): Add destructor call.

Index: gcc/config/nvptx/mkoffload.c
===================================================================
--- gcc/config/nvptx/mkoffload.c	(revision 226013)
+++ gcc/config/nvptx/mkoffload.c	(working copy)
@@ -880,18 +880,29 @@ process (FILE *in, FILE *out)
   fprintf (out, "#ifdef __cplusplus\n"
 	   "extern \"C\" {\n"
 	   "#endif\n");
+
   fprintf (out, "extern void GOMP_offload_register"
 	   " (const void *, int, const void *);\n");
+  fprintf (out, "extern void GOMP_offload_unregister"
+	   " (const void *, int, const void *);\n");
+
   fprintf (out, "#ifdef __cplusplus\n"
 	   "}\n"
 	   "#endif\n");
 
   fprintf (out, "extern const void *const __OFFLOAD_TABLE__[];\n\n");
-  fprintf (out, "static __attribute__((constructor)) void init (void)\n{\n");
-  fprintf (out, "  GOMP_offload_register (__OFFLOAD_TABLE__, %d,\n",
-	   GOMP_DEVICE_NVIDIA_PTX);
-  fprintf (out, "                         &target_data);\n");
-  fprintf (out, "};\n");
+
+  fprintf (out, "static __attribute__((constructor)) void init (void)\n"
+	   "{\n"
+	   "  GOMP_offload_register (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
+	   "                         &target_data);\n"
+	   "};\n", GOMP_DEVICE_NVIDIA_PTX);
+
+  fprintf (out, "static __attribute__((destructor)) void fini (void)\n"
+	   "{\n"
+	   "  GOMP_offload_unregister (__OFFLOAD_TABLE__, %d/*NVIDIA_PTX*/,\n"
+	   "                           &target_data);\n"
+	   "};\n", GOMP_DEVICE_NVIDIA_PTX);
 }
 
 static void
Index: libgomp/libgomp.h
===================================================================
--- libgomp/libgomp.h	(revision 226013)
+++ libgomp/libgomp.h	(working copy)
@@ -782,6 +782,7 @@ extern void gomp_unmap_vars (struct targ
 extern void gomp_init_device (struct gomp_device_descr *);
 extern void gomp_free_memmap (struct splay_tree_s *);
 extern void gomp_fini_device (struct gomp_device_descr *);
+extern void gomp_unload_device (struct gomp_device_descr *);
 
 /* work.c */
 
Index: libgomp/oacc-init.c
===================================================================
--- libgomp/oacc-init.c	(revision 226013)
+++ libgomp/oacc-init.c	(working copy)
@@ -252,6 +252,18 @@ acc_shutdown_1 (acc_device_t d)
   /* Get the base device for this device type.  */
   base_dev = resolve_device (d, true);
 
+  ndevs = base_dev->get_num_devices_func ();
+
+  /* Unload all the devices of this type that have been opened.  */
+  for (i = 0; i < ndevs; i++)
+    {
+      struct gomp_device_descr *acc_dev = &base_dev[i];
+
+      gomp_mutex_lock (&acc_dev->lock);
+      gomp_unload_device (acc_dev);
+      gomp_mutex_unlock (&acc_dev->lock);
+    }
+  
   gomp_mutex_lock (&goacc_thread_lock);
 
   /* Free target-specific TLS data and close all devices.  */
@@ -290,7 +302,6 @@ acc_shutdown_1 (acc_device_t d)
 
   gomp_mutex_unlock (&goacc_thread_lock);
 
-  ndevs = base_dev->get_num_devices_func ();
 
   /* Close all the devices of this type that have been opened.  */
   for (i = 0; i < ndevs; i++)
Index: libgomp/target.c
===================================================================
--- libgomp/target.c	(revision 226013)
+++ libgomp/target.c	(working copy)
@@ -638,12 +638,13 @@ gomp_update (struct gomp_device_descr *d
 
 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
    And insert to splay tree the mapping between addresses from HOST_TABLE and
-   from loaded target image.  */
+   from loaded target image.  We rely in the host and device compiler
+   emitting variable and functions in the same order.  */
 
 static void
-gomp_offload_image_to_device (struct gomp_device_descr *devicep,
-			      const void *host_table, const void *target_data,
-			      bool is_register_lock)
+gomp_load_image_to_device (struct gomp_device_descr *devicep,
+			   const void *host_table, const void *target_data,
+			   bool is_register_lock)
 {
   void **host_func_table = ((void ***) host_table)[0];
   void **host_funcs_end  = ((void ***) host_table)[1];
@@ -658,7 +659,8 @@ gomp_offload_image_to_device (struct gom
   /* Load image to device and get target addresses for the image.  */
   struct addr_pair *target_table = NULL;
   int i, num_target_entries
-    = devicep->load_image_func (devicep->target_id, target_data, &target_table);
+    = devicep->load_image_func (devicep->target_id, target_data,
+				&target_table);
 
   if (num_target_entries != num_funcs + num_vars)
     {
@@ -725,6 +727,59 @@ gomp_offload_image_to_device (struct gom
   free (target_table);
 }
 
+/* Unload the mappings described by target_data from device DEVICE_P.
+   The device must be locked.   */
+
+static void
+gomp_unload_image_from_device (struct gomp_device_descr *devicep,
+			       const void *host_table, const void *target_data)
+{
+  void **host_func_table = ((void ***) host_table)[0];
+  void **host_funcs_end  = ((void ***) host_table)[1];
+  void **host_var_table  = ((void ***) host_table)[2];
+  void **host_vars_end   = ((void ***) host_table)[3];
+
+  /* The func table contains only addresses, the var table contains addresses
+     and corresponding sizes.  */
+  int num_funcs = host_funcs_end - host_func_table;
+  int num_vars  = (host_vars_end - host_var_table) / 2;
+
+  unsigned j;
+  struct splay_tree_key_s k;
+  splay_tree_key node = NULL;
+
+  /* Find mapping at start of node array */
+  if (num_funcs || num_vars)
+    {
+      k.host_start = num_funcs ? (uintptr_t) host_func_table[0] : (uintptr_t) host_var_table[0];
+      k.host_end = k.host_start + 1;
+      node = splay_tree_lookup (&devicep->mem_map, &k);
+    }
+  
+  devicep->unload_image_func (devicep->target_id, target_data);
+
+  /* Remove mappings from splay tree.  */
+  for (j = 0; j < num_funcs; j++)
+    {
+      k.host_start = (uintptr_t) host_func_table[j];
+      k.host_end = k.host_start + 1;
+      splay_tree_remove (&devicep->mem_map, &k);
+    }
+
+  for (j = 0; j < num_vars; j++)
+    {
+      k.host_start = (uintptr_t) host_var_table[j * 2];
+      k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
+      splay_tree_remove (&devicep->mem_map, &k);
+    }
+
+  if (node)
+    {
+      free (node->tgt);
+      free (node);
+    }
+}
+
 /* This function should be called from every offload image while loading.
    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
    the target, and TARGET_DATA needed by target plugin.  */
@@ -742,7 +797,7 @@ GOMP_offload_register (const void *host_
       struct gomp_device_descr *devicep = &devices[i];
       gomp_mutex_lock (&devicep->lock);
       if (devicep->type == target_type && devicep->is_initialized)
-	gomp_offload_image_to_device (devicep, host_table, target_data, true);
+	gomp_load_image_to_device (devicep, host_table, target_data, true);
       gomp_mutex_unlock (&devicep->lock);
     }
 
@@ -767,69 +822,17 @@ void
 GOMP_offload_unregister (const void *host_table, int target_type,
 			 const void *target_data)
 {
-  void **host_func_table = ((void ***) host_table)[0];
-  void **host_funcs_end  = ((void ***) host_table)[1];
-  void **host_var_table  = ((void ***) host_table)[2];
-  void **host_vars_end   = ((void ***) host_table)[3];
   int i;
 
-  /* The func table contains only addresses, the var table contains addresses
-     and corresponding sizes.  */
-  int num_funcs = host_funcs_end - host_func_table;
-  int num_vars  = (host_vars_end - host_var_table) / 2;
-
   gomp_mutex_lock (&register_lock);
 
   /* Unload image from all initialized devices.  */
   for (i = 0; i < num_devices; i++)
     {
-      int j;
       struct gomp_device_descr *devicep = &devices[i];
       gomp_mutex_lock (&devicep->lock);
-      if (devicep->type != target_type || !devicep->is_initialized)
-	{
-	  gomp_mutex_unlock (&devicep->lock);
-	  continue;
-	}
-
-      devicep->unload_image_func (devicep->target_id, target_data);
-
-      /* Remove mapping from splay tree.  */
-      struct splay_tree_key_s k;
-      splay_tree_key node = NULL;
-      if (num_funcs > 0)
-	{
-	  k.host_start = (uintptr_t) host_func_table[0];
-	  k.host_end = k.host_start + 1;
-	  node = splay_tree_lookup (&devicep->mem_map, &k);
-	}
-      else if (num_vars > 0)
-	{
-	  k.host_start = (uintptr_t) host_var_table[0];
-	  k.host_end = k.host_start + (uintptr_t) host_var_table[1];
-	  node = splay_tree_lookup (&devicep->mem_map, &k);
-	}
-
-      for (j = 0; j < num_funcs; j++)
-	{
-	  k.host_start = (uintptr_t) host_func_table[j];
-	  k.host_end = k.host_start + 1;
-	  splay_tree_remove (&devicep->mem_map, &k);
-	}
-
-      for (j = 0; j < num_vars; j++)
-	{
-	  k.host_start = (uintptr_t) host_var_table[j * 2];
-	  k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
-	  splay_tree_remove (&devicep->mem_map, &k);
-	}
-
-      if (node)
-	{
-	  free (node->tgt);
-	  free (node);
-	}
-
+      if (devicep->type == target_type && devicep->is_initialized)
+	gomp_unload_image_from_device(devicep, host_table, target_data);
       gomp_mutex_unlock (&devicep->lock);
     }
 
@@ -858,13 +861,31 @@ gomp_init_device (struct gomp_device_des
     {
       struct offload_image_descr *image = &offload_images[i];
       if (image->type == devicep->type)
-	gomp_offload_image_to_device (devicep, image->host_table,
-				      image->target_data, false);
+	gomp_load_image_to_device (devicep, image->host_table,
+				   image->target_data, false);
     }
 
   devicep->is_initialized = true;
 }
 
+attribute_hidden void
+gomp_unload_device (struct gomp_device_descr *devicep)
+{
+  if (devicep->is_initialized)
+    {
+      unsigned i;
+      
+      /* Unload from device all images registered at the moment.  */
+      for (i = 0; i < num_offload_images; i++)
+	{
+	  struct offload_image_descr *image = &offload_images[i];
+	  if (image->type == devicep->type)
+	    gomp_unload_image_from_device (devicep, image->host_table,
+					   image->target_data);
+	}
+    }
+}
+
 /* Free address mapping tables.  MM must be locked on entry, and remains locked
    on return.  */
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: fix gomp offload routine unloading
  2015-07-20 23:24 fix gomp offload routine unloading Nathan Sidwell
@ 2015-07-21  9:59 ` Jakub Jelinek
  2015-07-21 12:49   ` Nathan Sidwell
  0 siblings, 1 reply; 5+ messages in thread
From: Jakub Jelinek @ 2015-07-21  9:59 UTC (permalink / raw)
  To: Nathan Sidwell, Ilya Verbin; +Cc: Thomas Schwinge, GCC Patches

On Mon, Jul 20, 2015 at 07:08:55PM -0400, Nathan Sidwell wrote:
> 2015-07-20  Nathan Sidwell  <nathan@codesourcery.com>
> 
> 	libgomp/
> 	* target.c (gomp_offload_image_to_device): Rename to ...
> 	(gomp_load_image_to_device): ... here.
> 	(GOMP_offload_register): Adjust call.
> 	(gomp_init_device): Likewise.
> 	(gomp_unload_image_from_device): New.  Broken out of ...
> 	(GOMP_offload_unregister): ... here.  Call it.
> 	(gomp_unload_device): New.
> 	* libgomp.h (gomp_unload_device): Declare.
> 	* oacc-init.c (acc_shutdown_1): Unload from device before deleting
> 	mem maps.
> 
> 	gcc/
> 	* config/nvptx/mkoffload.c (process): Add destructor call.

Ok if also tested on Intel MIC, with a few changes:

> +  /* Find mapping at start of node array */
> +  if (num_funcs || num_vars)
> +    {
> +      k.host_start = num_funcs ? (uintptr_t) host_func_table[0] : (uintptr_t) host_var_table[0];

Too long line, please wrap it.

> +      if (devicep->type == target_type && devicep->is_initialized)
> +	gomp_unload_image_from_device(devicep, host_table, target_data);

Space before (.

	Jakub

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: fix gomp offload routine unloading
  2015-07-21  9:59 ` Jakub Jelinek
@ 2015-07-21 12:49   ` Nathan Sidwell
  2015-07-21 13:25     ` Ilya Verbin
  0 siblings, 1 reply; 5+ messages in thread
From: Nathan Sidwell @ 2015-07-21 12:49 UTC (permalink / raw)
  To: Jakub Jelinek, Ilya Verbin; +Cc: Thomas Schwinge, GCC Patches

On 07/21/15 05:51, Jakub Jelinek wrote:
> On Mon, Jul 20, 2015 at 07:08:55PM -0400, Nathan Sidwell wrote:
>> 2015-07-20  Nathan Sidwell  <nathan@codesourcery.com>
>>
>> 	libgomp/
>> 	* target.c (gomp_offload_image_to_device): Rename to ...
>> 	(gomp_load_image_to_device): ... here.
>> 	(GOMP_offload_register): Adjust call.
>> 	(gomp_init_device): Likewise.
>> 	(gomp_unload_image_from_device): New.  Broken out of ...
>> 	(GOMP_offload_unregister): ... here.  Call it.
>> 	(gomp_unload_device): New.
>> 	* libgomp.h (gomp_unload_device): Declare.
>> 	* oacc-init.c (acc_shutdown_1): Unload from device before deleting
>> 	mem maps.
>>
>> 	gcc/
>> 	* config/nvptx/mkoffload.c (process): Add destructor call.
>
> Ok if also tested on Intel MIC, with a few changes:

Ilya, are you able to test Intel MIC for me?

nathan

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: fix gomp offload routine unloading
  2015-07-21 12:49   ` Nathan Sidwell
@ 2015-07-21 13:25     ` Ilya Verbin
  2015-07-21 13:39       ` Nathan Sidwell
  0 siblings, 1 reply; 5+ messages in thread
From: Ilya Verbin @ 2015-07-21 13:25 UTC (permalink / raw)
  To: Nathan Sidwell; +Cc: Jakub Jelinek, Thomas Schwinge, GCC Patches

On Tue, Jul 21, 2015 at 08:15:41 -0400, Nathan Sidwell wrote:
> On 07/21/15 05:51, Jakub Jelinek wrote:
> >On Mon, Jul 20, 2015 at 07:08:55PM -0400, Nathan Sidwell wrote:
> >>2015-07-20  Nathan Sidwell  <nathan@codesourcery.com>
> >>
> >>	libgomp/
> >>	* target.c (gomp_offload_image_to_device): Rename to ...
> >>	(gomp_load_image_to_device): ... here.
> >>	(GOMP_offload_register): Adjust call.
> >>	(gomp_init_device): Likewise.
> >>	(gomp_unload_image_from_device): New.  Broken out of ...
> >>	(GOMP_offload_unregister): ... here.  Call it.
> >>	(gomp_unload_device): New.
> >>	* libgomp.h (gomp_unload_device): Declare.
> >>	* oacc-init.c (acc_shutdown_1): Unload from device before deleting
> >>	mem maps.
> >>
> >>	gcc/
> >>	* config/nvptx/mkoffload.c (process): Add destructor call.
> >
> >Ok if also tested on Intel MIC, with a few changes:
> 
> Ilya, are you able to test Intel MIC for me?

I don't see any regressions on MIC.

  -- Ilya

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: fix gomp offload routine unloading
  2015-07-21 13:25     ` Ilya Verbin
@ 2015-07-21 13:39       ` Nathan Sidwell
  0 siblings, 0 replies; 5+ messages in thread
From: Nathan Sidwell @ 2015-07-21 13:39 UTC (permalink / raw)
  To: Ilya Verbin; +Cc: Jakub Jelinek, Thomas Schwinge, GCC Patches

On 07/21/15 09:20, Ilya Verbin wrote:
> On Tue, Jul 21, 2015 at 08:15:41 -0400, Nathan Sidwell wrote:
>> On 07/21/15 05:51, Jakub Jelinek wrote:
>>> On Mon, Jul 20, 2015 at 07:08:55PM -0400, Nathan Sidwell wrote:
>>>> 2015-07-20  Nathan Sidwell  <nathan@codesourcery.com>
>>>>
>>>> 	libgomp/
>>>> 	* target.c (gomp_offload_image_to_device): Rename to ...
>>>> 	(gomp_load_image_to_device): ... here.
>>>> 	(GOMP_offload_register): Adjust call.
>>>> 	(gomp_init_device): Likewise.
>>>> 	(gomp_unload_image_from_device): New.  Broken out of ...
>>>> 	(GOMP_offload_unregister): ... here.  Call it.
>>>> 	(gomp_unload_device): New.
>>>> 	* libgomp.h (gomp_unload_device): Declare.
>>>> 	* oacc-init.c (acc_shutdown_1): Unload from device before deleting
>>>> 	mem maps.
>>>>
>>>> 	gcc/
>>>> 	* config/nvptx/mkoffload.c (process): Add destructor call.
>>>
>>> Ok if also tested on Intel MIC, with a few changes:
>>
>> Ilya, are you able to test Intel MIC for me?
>
> I don't see any regressions on MIC.

thanks!

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-07-21 13:25 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-20 23:24 fix gomp offload routine unloading Nathan Sidwell
2015-07-21  9:59 ` Jakub Jelinek
2015-07-21 12:49   ` Nathan Sidwell
2015-07-21 13:25     ` Ilya Verbin
2015-07-21 13:39       ` Nathan Sidwell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).