public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Tidy up locking for libgomp OpenACC entry points
@ 2015-04-22 18:43 Julian Brown
  2015-04-23  8:32 ` Jakub Jelinek
  2015-04-23 16:42 ` Thomas Schwinge
  0 siblings, 2 replies; 5+ messages in thread
From: Julian Brown @ 2015-04-22 18:43 UTC (permalink / raw)
  To: gcc-patches, Thomas Schwinge

[-- Attachment #1: Type: text/plain, Size: 967 bytes --]

Hi,

This patch is an attempt to fix some potential race conditions with
accesses to shared data structures from multiple concurrent threads in
libgomp's OpenACC entry points. The main change is to move locking out
of lookup_host and lookup_dev in oacc-mem.c and into their callers
(which can then hold the locks for the whole operation that they are
performing).

Also missing locking has been added for gomp_acc_insert_pointer.

Tests look OK (with offloading to NVidia PTX).

OK? (For the gomp4 branch, maybe, if trunk's not suitable at the
moment?)

Thanks,

Julian

ChangeLog

    libgomp/
    * oacc-mem.c (lookup_host): Remove locking from function. Note
    locking requirement for caller in function comment.
    (lookup_dev): Likewise.
    (acc_free, acc_deviceptr, acc_hostptr, acc_is_present)
    (acc_map_data, acc_unmap_data, present_create_copy, delete_copyout)
    (update_dev_host, gomp_acc_insert_pointer, gomp_acc_remove_pointer):
    Add locking.

[-- Attachment #2: more-locking-2.diff --]
[-- Type: text/x-patch, Size: 11460 bytes --]

commit 983e08e46be24380a52095851cd9c6eb481eb47c
Author: Julian Brown <julian@codesourcery.com>
Date:   Tue Apr 21 12:42:17 2015 -0700

    More locking in oacc-mem.c

diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 89ef5fc..d53af4b 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -35,7 +35,8 @@
 #include <stdint.h>
 #include <assert.h>
 
-/* Return block containing [H->S), or NULL if not contained.  */
+/* Return block containing [H->S), or NULL if not contained.  The device lock
+   for DEV must be locked on entry, and remains locked on exit.  */
 
 static splay_tree_key
 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
@@ -46,9 +47,7 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
   node.host_start = (uintptr_t) h;
   node.host_end = (uintptr_t) h + s;
 
-  gomp_mutex_lock (&dev->lock);
   key = splay_tree_lookup (&dev->mem_map, &node);
-  gomp_mutex_unlock (&dev->lock);
 
   return key;
 }
@@ -56,7 +55,8 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
 /* Return block containing [D->S), or NULL if not contained.
    The list isn't ordered by device address, so we have to iterate
    over the whole array.  This is not expected to be a common
-   operation.  */
+   operation.  The device lock associated with TGT must be locked on entry, and
+   remains locked on exit.  */
 
 static splay_tree_key
 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
@@ -67,16 +67,12 @@ lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
   if (!tgt)
     return NULL;
 
-  gomp_mutex_lock (&tgt->device_descr->lock);
-
   for (t = tgt; t != NULL; t = t->prev)
     {
       if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
         break;
     }
 
-  gomp_mutex_unlock (&tgt->device_descr->lock);
-
   if (!t)
     return NULL;
 
@@ -120,25 +116,32 @@ acc_free (void *d)
 {
   splay_tree_key k;
   struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *acc_dev = thr->dev;
 
   if (!d)
     return;
 
   assert (thr && thr->dev);
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   /* We don't have to call lazy open here, as the ptr value must have
      been returned by acc_malloc.  It's not permitted to pass NULL in
      (unless you got that null from acc_malloc).  */
-  if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
-   {
-     void *offset;
+  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
+    {
+      void *offset;
+
+      offset = d - k->tgt->tgt_start + k->tgt_offset;
 
-     offset = d - k->tgt->tgt_start + k->tgt_offset;
+      gomp_mutex_unlock (&acc_dev->lock);
 
-     acc_unmap_data ((void *)(k->host_start + offset));
-   }
+      acc_unmap_data ((void *)(k->host_start + offset));
+    }
+  else
+    gomp_mutex_unlock (&acc_dev->lock);
 
-  thr->dev->free_func (thr->dev->target_id, d);
+  acc_dev->free_func (acc_dev->target_id, d);
 }
 
 void
@@ -178,16 +181,24 @@ acc_deviceptr (void *h)
   goacc_lazy_initialize ();
 
   struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *dev = thr->dev;
+
+  gomp_mutex_lock (&dev->lock);
 
-  n = lookup_host (thr->dev, h, 1);
+  n = lookup_host (dev, h, 1);
 
   if (!n)
-    return NULL;
+    {
+      gomp_mutex_unlock (&dev->lock);
+      return NULL;
+    }
 
   offset = h - n->host_start;
 
   d = n->tgt->tgt_start + n->tgt_offset + offset;
 
+  gomp_mutex_unlock (&dev->lock);
+
   return d;
 }
 
@@ -204,16 +215,24 @@ acc_hostptr (void *d)
   goacc_lazy_initialize ();
 
   struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *acc_dev = thr->dev;
 
-  n = lookup_dev (thr->dev->openacc.data_environ, d, 1);
+  gomp_mutex_lock (&acc_dev->lock);
+
+  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
 
   if (!n)
-    return NULL;
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      return NULL;
+    }
 
   offset = d - n->tgt->tgt_start + n->tgt_offset;
 
   h = n->host_start + offset;
 
+  gomp_mutex_unlock (&acc_dev->lock);
+
   return h;
 }
 
@@ -232,6 +251,8 @@ acc_is_present (void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   n = lookup_host (acc_dev, h, s);
 
   if (n && ((uintptr_t)h < n->host_start
@@ -239,6 +260,8 @@ acc_is_present (void *h, size_t s)
 	    || s > n->host_end - n->host_start))
     n = NULL;
 
+  gomp_mutex_unlock (&acc_dev->lock);
+
   return n != NULL;
 }
 
@@ -274,20 +297,32 @@ acc_map_data (void *h, void *d, size_t s)
 	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
                     (void *)h, (int)s, (void *)d, (int)s);
 
+      gomp_mutex_lock (&acc_dev->lock);
+
       if (lookup_host (acc_dev, h, s))
-	gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
-		    (int)s);
+        {
+	  gomp_mutex_unlock (&acc_dev->lock);
+	  gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
+		      (int)s);
+	}
 
       if (lookup_dev (thr->dev->openacc.data_environ, d, s))
-	gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
-		    (int)s);
+        {
+	  gomp_mutex_unlock (&acc_dev->lock);
+	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
+		      (int)s);
+	}
+
+      gomp_mutex_unlock (&acc_dev->lock);
 
       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
 			   &kinds, true, false);
     }
 
+  gomp_mutex_lock (&acc_dev->lock);
   tgt->prev = acc_dev->openacc.data_environ;
   acc_dev->openacc.data_environ = tgt;
+  gomp_mutex_unlock (&acc_dev->lock);
 }
 
 void
@@ -299,17 +334,26 @@ acc_unmap_data (void *h)
   /* No need to call lazy open, as the address must have been mapped.  */
 
   size_t host_size;
+
+  gomp_mutex_lock (&acc_dev->lock);
+
   splay_tree_key n = lookup_host (acc_dev, h, 1);
   struct target_mem_desc *t;
 
   if (!n)
-    gomp_fatal ("%p is not a mapped block", (void *)h);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("%p is not a mapped block", (void *)h);
+    }
 
   host_size = n->host_end - n->host_start;
 
   if (n->host_start != (uintptr_t) h)
-    gomp_fatal ("[%p,%d] surrounds1 %p",
-		(void *) n->host_start, (int) host_size, (void *) h);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("[%p,%d] surrounds %p",
+		  (void *) n->host_start, (int) host_size, (void *) h);
+    }
 
   t = n->tgt;
 
@@ -323,8 +367,6 @@ acc_unmap_data (void *h)
       t->tgt_end = 0;
       t->to_free = 0;
 
-      gomp_mutex_lock (&acc_dev->lock);
-
       for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
 	   tp = t, t = t->prev)
 	if (n->tgt == t)
@@ -336,10 +378,10 @@ acc_unmap_data (void *h)
 
 	    break;
 	  }
-
-      gomp_mutex_unlock (&acc_dev->lock);
     }
 
+  gomp_mutex_unlock (&acc_dev->lock);
+
   gomp_unmap_vars (t, true);
 }
 
@@ -361,6 +403,8 @@ present_create_copy (unsigned f, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   n = lookup_host (acc_dev, h, s);
   if (n)
     {
@@ -368,13 +412,22 @@ present_create_copy (unsigned f, void *h, size_t s)
       d = (void *) (n->tgt->tgt_start + n->tgt_offset);
 
       if (!(f & FLAG_PRESENT))
-        gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
-            (void *)h, (int)s, (void *)d, (int)s);
+        {
+	  gomp_mutex_unlock (&acc_dev->lock);
+          gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
+        	      (void *)h, (int)s, (void *)d, (int)s);
+	}
       if ((h + s) > (void *)n->host_end)
-        gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+	{
+	  gomp_mutex_unlock (&acc_dev->lock);
+	  gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+	}
+
+      gomp_mutex_unlock (&acc_dev->lock);
     }
   else if (!(f & FLAG_CREATE))
     {
+      gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
     }
   else
@@ -389,6 +442,8 @@ present_create_copy (unsigned f, void *h, size_t s)
       else
 	kinds = GOMP_MAP_ALLOC;
 
+      gomp_mutex_unlock (&acc_dev->lock);
+
       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
 			   false);
 
@@ -439,25 +494,35 @@ delete_copyout (unsigned f, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   n = lookup_host (acc_dev, h, s);
 
   /* No need to call lazy open, as the data must already have been
      mapped.  */
 
   if (!n)
-    gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
+    }
 
   d = (void *) (n->tgt->tgt_start + n->tgt_offset);
 
   host_size = n->host_end - n->host_start;
 
   if (n->host_start != (uintptr_t) h || host_size != s)
-    gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
-		(void *) n->host_start, (int) host_size, (void *) h, (int) s);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
+		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
+    }
 
   if (f & FLAG_COPYOUT)
     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
 
+  gomp_mutex_unlock (&acc_dev->lock);
+
   acc_unmap_data (h);
 
   acc_dev->free_func (acc_dev->target_id, d);
@@ -482,20 +547,27 @@ update_dev_host (int is_dev, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   n = lookup_host (acc_dev, h, s);
 
   /* No need to call lazy open, as the data must already have been
      mapped.  */
 
   if (!n)
-    gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
+    }
 
   d = (void *) (n->tgt->tgt_start + n->tgt_offset);
 
+  gomp_mutex_unlock (&acc_dev->lock);
+
   if (is_dev)
     acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
   else
-    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);  
 }
 
 void
@@ -522,8 +594,11 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
 		       NULL, sizes, kinds, true, false);
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
+
+  gomp_mutex_lock (&acc_dev->lock);
   tgt->prev = acc_dev->openacc.data_environ;
   acc_dev->openacc.data_environ = tgt;
+  gomp_mutex_unlock (&acc_dev->lock);
 }
 
 void
@@ -535,10 +610,15 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
   struct target_mem_desc *t;
   int minrefs = (mapnum == 1) ? 2 : 3;
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   n = lookup_host (acc_dev, h, 1);
 
   if (!n)
-    gomp_fatal ("%p is not a mapped block", (void *)h);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("%p is not a mapped block", (void *)h);
+    }
 
   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
 
@@ -546,8 +626,6 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
 
   struct target_mem_desc *tp;
 
-  gomp_mutex_lock (&acc_dev->lock);
-
   if (t->refcount == minrefs)
     {
       /* This is the last reference, so pull the descriptor off the

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Tidy up locking for libgomp OpenACC entry points
  2015-04-22 18:43 [PATCH] Tidy up locking for libgomp OpenACC entry points Julian Brown
@ 2015-04-23  8:32 ` Jakub Jelinek
  2015-04-23 16:42 ` Thomas Schwinge
  1 sibling, 0 replies; 5+ messages in thread
From: Jakub Jelinek @ 2015-04-23  8:32 UTC (permalink / raw)
  To: Julian Brown; +Cc: gcc-patches, Thomas Schwinge

On Wed, Apr 22, 2015 at 07:42:43PM +0100, Julian Brown wrote:
> @@ -120,25 +116,32 @@ acc_free (void *d)
>  {
>    splay_tree_key k;
>    struct goacc_thread *thr = goacc_thread ();
> +  struct gomp_device_descr *acc_dev = thr->dev;

IMHO you want to move this line after:

>  
>    if (!d)
>      return;
>  
>    assert (thr && thr->dev);

the assert.  Supposedly also the thr = line after the return; line,
no need to do it if returning early.  As gcc now defaults to -std=gnu11
for C, libgomp is written in C11 and you don't need to limit to C89 (and
even back then, it was compiled by gcc and thus you could use GNU
extensions, such as mixed declarations and code).

Otherwise, I have no problem with this going to trunk, if Thomas is ok with
it.

	Jakub

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Tidy up locking for libgomp OpenACC entry points
  2015-04-22 18:43 [PATCH] Tidy up locking for libgomp OpenACC entry points Julian Brown
  2015-04-23  8:32 ` Jakub Jelinek
@ 2015-04-23 16:42 ` Thomas Schwinge
  2015-04-24 17:13   ` Julian Brown
  1 sibling, 1 reply; 5+ messages in thread
From: Thomas Schwinge @ 2015-04-23 16:42 UTC (permalink / raw)
  To: Julian Brown; +Cc: gcc-patches, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 4918 bytes --]

Hi!

On Wed, 22 Apr 2015 19:42:43 +0100, Julian Brown <julian@codesourcery.com> wrote:
> This patch is an attempt to fix some potential race conditions with
> accesses to shared data structures from multiple concurrent threads in
> libgomp's OpenACC entry points. The main change is to move locking out
> of lookup_host and lookup_dev in oacc-mem.c and into their callers
> (which can then hold the locks for the whole operation that they are
> performing).

Yeah, that makes sense I guess.

> Also missing locking has been added for gomp_acc_insert_pointer.
> 
> Tests look OK (with offloading to NVidia PTX).

How did you test to get some confidence in the locking being sufficient?

I just did a very cursory review, but it looks good with a few review
comments below.

Going further (separate patch?), a few more comments:

Is it OK that oacc-init.c:cached_base_dev is accessed without locking?

Generally, we have to keep in mind that the same device may be accessed
in parallel through both OpenACC and OpenMP interfaces.  For this, for
example, in oacc-init.c, even though acc_device_lock is held, is it OK to
call gomp_init_device(D) without D->lock being locked?  (Compare to
target.c code.)

Please document what exactly oacc-init.c:acc_device_lock is to guard.
I'm not sure I'm understanding this correctly.

Should oacc-init.c:acc_shutdown_1 release goacc_thread_lock before any
gomp_fatal calls?  (That seems to be the general policy in libgomp.)


> --- a/libgomp/oacc-mem.c
> +++ b/libgomp/oacc-mem.c

> @@ -120,25 +116,32 @@ acc_free (void *d)
>  {
>    splay_tree_key k;
>    struct goacc_thread *thr = goacc_thread ();
> +  struct gomp_device_descr *acc_dev = thr->dev;
>  
>    if (!d)
>      return;
>  
>    assert (thr && thr->dev);
>  
> +  gomp_mutex_lock (&acc_dev->lock);
> +
>    /* We don't have to call lazy open here, as the ptr value must have
>       been returned by acc_malloc.  It's not permitted to pass NULL in
>       (unless you got that null from acc_malloc).  */
> -  if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
> -   {
> -     void *offset;
> +  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
> +    {
> +      void *offset;
> +
> +      offset = d - k->tgt->tgt_start + k->tgt_offset;
>  
> -     offset = d - k->tgt->tgt_start + k->tgt_offset;
> +      gomp_mutex_unlock (&acc_dev->lock);
>  
> -     acc_unmap_data ((void *)(k->host_start + offset));
> -   }
> +      acc_unmap_data ((void *)(k->host_start + offset));
> +    }
> +  else
> +    gomp_mutex_unlock (&acc_dev->lock);

Does it make sense to make the unlock unconditional, and move the
acc_unmap_data after it, guarded by »if (k)«?

> -  thr->dev->free_func (thr->dev->target_id, d);
> +  acc_dev->free_func (acc_dev->target_id, d);
>  }
>  
>  void
> @@ -178,16 +181,24 @@ acc_deviceptr (void *h)
>    goacc_lazy_initialize ();
>  
>    struct goacc_thread *thr = goacc_thread ();
> +  struct gomp_device_descr *dev = thr->dev;
> +
> +  gomp_mutex_lock (&dev->lock);
>  
> -  n = lookup_host (thr->dev, h, 1);
> +  n = lookup_host (dev, h, 1);
>  
>    if (!n)
> -    return NULL;
> +    {
> +      gomp_mutex_unlock (&dev->lock);
> +      return NULL;
> +    }
>  
>    offset = h - n->host_start;
>  
>    d = n->tgt->tgt_start + n->tgt_offset + offset;
>  
> +  gomp_mutex_unlock (&dev->lock);
> +
>    return d;
>  }

Do we need to retain the lock while working with n?  If not, the unlock
could be placed right after the lookup_host, unconditionally.  I'm
confused -- it's commonly being done (retained) in target.c code, but not
in the tgt_fn lookup in target.c:GOMP_target.

> @@ -204,16 +215,24 @@ acc_hostptr (void *d)
>    goacc_lazy_initialize ();
>  
>    struct goacc_thread *thr = goacc_thread ();
> +  struct gomp_device_descr *acc_dev = thr->dev;
>  
> -  n = lookup_dev (thr->dev->openacc.data_environ, d, 1);
> +  gomp_mutex_lock (&acc_dev->lock);
> +
> +  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
>  
>    if (!n)
> -    return NULL;
> +    {
> +      gomp_mutex_unlock (&acc_dev->lock);
> +      return NULL;
> +    }
>  
>    offset = d - n->tgt->tgt_start + n->tgt_offset;
>  
>    h = n->host_start + offset;
>  
> +  gomp_mutex_unlock (&acc_dev->lock);
> +
>    return h;
>  }

Similar, and also for other code later on.

> @@ -439,25 +494,35 @@ delete_copyout (unsigned f, void *h, size_t s)

>    if (f & FLAG_COPYOUT)
>      acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
>  
> +  gomp_mutex_unlock (&acc_dev->lock);
> +
>    acc_unmap_data (h);
>  
>    acc_dev->free_func (acc_dev->target_id, d);

Should external functions (dev2host_func) be called without any locks
held (to avoid issues with any call-backs); so move the unlocking before
that?


Grüße,
 Thomas

[-- Attachment #2: Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Tidy up locking for libgomp OpenACC entry points
  2015-04-23 16:42 ` Thomas Schwinge
@ 2015-04-24 17:13   ` Julian Brown
  2015-04-29 17:47     ` Thomas Schwinge
  0 siblings, 1 reply; 5+ messages in thread
From: Julian Brown @ 2015-04-24 17:13 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: gcc-patches, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 6736 bytes --]

On Thu, 23 Apr 2015 18:41:34 +0200
Thomas Schwinge <thomas@codesourcery.com> wrote:

> Hi!
> 
> On Wed, 22 Apr 2015 19:42:43 +0100, Julian Brown
> <julian@codesourcery.com> wrote:
> > This patch is an attempt to fix some potential race conditions with
> > accesses to shared data structures from multiple concurrent threads
> > in libgomp's OpenACC entry points. The main change is to move
> > locking out of lookup_host and lookup_dev in oacc-mem.c and into
> > their callers (which can then hold the locks for the whole
> > operation that they are performing).
> 
> Yeah, that makes sense I guess.
> 
> > Also missing locking has been added for gomp_acc_insert_pointer.
> > 
> > Tests look OK (with offloading to NVidia PTX).
> 
> How did you test to get some confidence in the locking being
> sufficient?

Merely by running the existing tests and via inspection, sadly. I'm not
sure how much value we'd get from implementing an exhaustive threading
testsuite at this stage: I guess testcases will be easier to come by in
the future if/when people start to use e.g. OpenMP and OpenACC together.

> Going further (separate patch?), a few more comments:
> 
> Is it OK that oacc-init.c:cached_base_dev is accessed without locking?
> 
> Generally, we have to keep in mind that the same device may be
> accessed in parallel through both OpenACC and OpenMP interfaces.  For
> this, for example, in oacc-init.c, even though acc_device_lock is
> held, is it OK to call gomp_init_device(D) without D->lock being
> locked?  (Compare to target.c code.)
> 
> Please document what exactly oacc-init.c:acc_device_lock is to guard.
> I'm not sure I'm understanding this correctly.

I've attached a follow-on patch that documents the purpose of
acc_device_lock -- and also fixes some places that should have been
holding the lock, but were not.

I've also added locking (with dev->lock) when calling gomp_init_device
and gomp_fini_device from the OpenACC initialisation/finalisation code.

> Should oacc-init.c:acc_shutdown_1 release goacc_thread_lock before any
> gomp_fatal calls?  (That seems to be the general policy in libgomp.)

I added this to the first patch.

> > --- a/libgomp/oacc-mem.c
> > +++ b/libgomp/oacc-mem.c
> 
> > @@ -120,25 +116,32 @@ acc_free (void *d)
> >  {
> >    splay_tree_key k;
> >    struct goacc_thread *thr = goacc_thread ();
> > +  struct gomp_device_descr *acc_dev = thr->dev;
> >  
> >    if (!d)
> >      return;
> >  
> >    assert (thr && thr->dev);
> >  
> > +  gomp_mutex_lock (&acc_dev->lock);
> > +
> >    /* We don't have to call lazy open here, as the ptr value must
> > have been returned by acc_malloc.  It's not permitted to pass NULL
> > in (unless you got that null from acc_malloc).  */
> > -  if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
> > -   {
> > -     void *offset;
> > +  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
> > +    {
> > +      void *offset;
> > +
> > +      offset = d - k->tgt->tgt_start + k->tgt_offset;
> >  
> > -     offset = d - k->tgt->tgt_start + k->tgt_offset;
> > +      gomp_mutex_unlock (&acc_dev->lock);
> >  
> > -     acc_unmap_data ((void *)(k->host_start + offset));
> > -   }
> > +      acc_unmap_data ((void *)(k->host_start + offset));
> > +    }
> > +  else
> > +    gomp_mutex_unlock (&acc_dev->lock);
> 
> Does it make sense to make the unlock unconditional, and move the
> acc_unmap_data after it, guarded by »if (k)«?

I've left this one -- just a stylistic tweak, but I think it's fine
as-is.

> > -  thr->dev->free_func (thr->dev->target_id, d);
> > +  acc_dev->free_func (acc_dev->target_id, d);
> >  }
> >  
> >  void
> > @@ -178,16 +181,24 @@ acc_deviceptr (void *h)
> >    goacc_lazy_initialize ();
> >  
> >    struct goacc_thread *thr = goacc_thread ();
> > +  struct gomp_device_descr *dev = thr->dev;
> > +
> > +  gomp_mutex_lock (&dev->lock);
> >  
> > -  n = lookup_host (thr->dev, h, 1);
> > +  n = lookup_host (dev, h, 1);
> >  
> >    if (!n)
> > -    return NULL;
> > +    {
> > +      gomp_mutex_unlock (&dev->lock);
> > +      return NULL;
> > +    }
> >  
> >    offset = h - n->host_start;
> >  
> >    d = n->tgt->tgt_start + n->tgt_offset + offset;
> >  
> > +  gomp_mutex_unlock (&dev->lock);
> > +
> >    return d;
> >  }
> 
> Do we need to retain the lock while working with n?  If not, the
> unlock could be placed right after the lookup_host, unconditionally.
> I'm confused -- it's commonly being done (retained) in target.c code,
> but not in the tgt_fn lookup in target.c:GOMP_target.

I think the difference can be explained as follows: a given mapping
(splay_key_tree_s) is essentially immutable after it is created (apart
from the refcounts). Thus it can be safely accessed *so long as we know
it will not be deallocated*.

Now, in some parts of target.c, we have an "active" target_mem_desc,
corresponding to a set of host-target mappings. So long as we are
holding that target_mem_desc (e.g. as we are in GOMP_target_data), we
know that none of the associated mappings' refcounts will fall to zero:
so, we can access them (read only) safely without explicitly holding the
lock.

But, that's *not* the case for e.g. acc_deviceptr: that can be called
at any point, in particular partway through deallocation of a set of
mappings. So we need the lock for that.

I didn't write the code of course, so E&OE :-).

> Should external functions (dev2host_func) be called without any locks
> held (to avoid issues with any call-backs); so move the unlocking
> before that?

I fixed this in the first patch.

Re-tested (libgomp), results look OK.

OK to apply now?

Julian

ChangeLog

    libgomp/
    * oacc-init.c (acc_shutdown_1): Call gomp_mutex_unlock for
    goacc_thread_lock on error paths.
    * oacc-mem.c (lookup_host): Remove locking from function. Note
    locking requirement for caller in function comment.
    (lookup_dev): Likewise.
    (acc_free, acc_deviceptr, acc_hostptr, acc_is_present)
    (acc_map_data, acc_unmap_data, present_create_copy, delete_copyout)
    (update_dev_host, gomp_acc_insert_pointer, gomp_acc_remove_pointer):
    Add locking.

ChangeLog

    libgomp/
    * oacc-init.c (acc_device_lock): Add explanatory comment.
    (resolve_device): Add comment about locking requirement.
    (acc_init_1, acc_shutdown_1): Likewise. Add locking around
    gomp_init_device and gomp_fini_device calls.
    (acc_get_num_devices, acc_set_device_type, acc_get_device_type)
    (acc_get_device_num, acc_set_device_num): Add locking around
    resolve_device and gomp_init_device calls.

[-- Attachment #2: acc-device-lock-tweaks-1.diff --]
[-- Type: text/x-patch, Size: 3703 bytes --]

commit 5009a4a24f15150976a9ad717f81c3436a082cea
Author: Julian Brown <julian@codesourcery.com>
Date:   Fri Apr 24 03:49:14 2015 -0700

    Fix usage of acc_device_lock

diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 9d47a23..503f8b8 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -36,6 +36,9 @@
 #include <stdbool.h>
 #include <string.h>
 
+/* This lock is used to protect access to cached_base_dev, dispatchers and
+   the (abstract) initialisation state of attached offloading devices.  */
+
 static gomp_mutex_t acc_device_lock;
 
 /* A cached version of the dispatcher for the global "current" accelerator type,
@@ -106,6 +109,8 @@ name_of_acc_device_t (enum acc_device_t type)
     }
 }
 
+/* ACC_DEVICE_LOCK should be held before calling this function.  */
+
 static struct gomp_device_descr *
 resolve_device (acc_device_t d)
 {
@@ -166,7 +171,8 @@ resolve_device (acc_device_t d)
 
 /* This is called when plugins have been initialized, and serves to call
    (indirectly) the target's device_init hook.  Calling multiple times without
-   an intervening acc_shutdown_1 call is an error.  */
+   an intervening acc_shutdown_1 call is an error.  ACC_DEVICE_LOCK should be
+   held before calling this function.  */
 
 static struct gomp_device_descr *
 acc_init_1 (acc_device_t d)
@@ -183,14 +189,21 @@ acc_init_1 (acc_device_t d)
 
   acc_dev = &base_dev[goacc_device_num];
 
+  gomp_mutex_lock (&acc_dev->lock);
   if (acc_dev->is_initialized)
-    gomp_fatal ("device already active");
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("device already active");
+    }
 
   gomp_init_device (acc_dev);
+  gomp_mutex_unlock (&acc_dev->lock);
 
   return base_dev;
 }
 
+/* ACC_DEVICE_LOCK should be held before calling this function.  */
+
 static void
 acc_shutdown_1 (acc_device_t d)
 {
@@ -249,11 +262,13 @@ acc_shutdown_1 (acc_device_t d)
   for (i = 0; i < ndevs; i++)
     {
       struct gomp_device_descr *acc_dev = &base_dev[i];
+      gomp_mutex_lock (&acc_dev->lock);
       if (acc_dev->is_initialized)
         {
 	  devices_active = true;
 	  gomp_fini_device (acc_dev);
 	}
+      gomp_mutex_unlock (&acc_dev->lock);
     }
 
   if (!devices_active)
@@ -410,7 +425,10 @@ acc_get_num_devices (acc_device_t d)
 
   gomp_init_targets_once ();
 
+  gomp_mutex_lock (&acc_device_lock);
   acc_dev = resolve_device (d);
+  gomp_mutex_unlock (&acc_device_lock);
+
   if (!acc_dev)
     return 0;
 
@@ -441,8 +459,10 @@ acc_set_device_type (acc_device_t d)
   cached_base_dev = base_dev = resolve_device (d);
   acc_dev = &base_dev[goacc_device_num];
 
+  gomp_mutex_lock (&acc_dev->lock);
   if (!acc_dev->is_initialized)
     gomp_init_device (acc_dev);
+  gomp_mutex_unlock (&acc_dev->lock);
 
   gomp_mutex_unlock (&acc_device_lock);
 
@@ -473,7 +493,9 @@ acc_get_device_type (void)
     {
       gomp_init_targets_once ();
 
+      gomp_mutex_lock (&acc_device_lock);
       dev = resolve_device (acc_device_default);
+      gomp_mutex_unlock (&acc_device_lock);
       res = acc_device_type (dev->type);
     }
 
@@ -497,7 +519,9 @@ acc_get_device_num (acc_device_t d)
   if (!cached_base_dev)
     gomp_init_targets_once ();
 
+  gomp_mutex_lock (&acc_device_lock);
   dev = resolve_device (d);
+  gomp_mutex_unlock (&acc_device_lock);
   if (!dev)
     gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
 
@@ -539,8 +563,10 @@ acc_set_device_num (int ord, acc_device_t d)
 
       acc_dev = &base_dev[ord];
 
+      gomp_mutex_lock (&acc_dev->lock);
       if (!acc_dev->is_initialized)
         gomp_init_device (acc_dev);
+      gomp_mutex_unlock (&acc_dev->lock);
 
       gomp_mutex_unlock (&acc_device_lock);
 

[-- Attachment #3: more-locking-4.diff --]
[-- Type: text/x-patch, Size: 12124 bytes --]

commit 8ae37f2c789e3a61fe4c62101fc13ea56cc1ee9f
Author: Julian Brown <julian@codesourcery.com>
Date:   Tue Apr 21 12:42:17 2015 -0700

    More locking in oacc-mem.c

diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 03db8ee..9d47a23 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -218,11 +218,17 @@ acc_shutdown_1 (acc_device_t d)
       /* This would mean the user is shutting down OpenACC in the middle of an
          "acc data" pragma.  Likely not intentional.  */
       if (walk->mapped_data)
-	gomp_fatal ("shutdown in 'acc data' region");
+	{
+	  gomp_mutex_unlock (&goacc_thread_lock);
+	  gomp_fatal ("shutdown in 'acc data' region");
+	}
 
       /* Similarly, if this happens then user code has done something weird.  */
       if (walk->saved_bound_dev)
-        gomp_fatal ("shutdown during host fallback");
+	{
+	  gomp_mutex_unlock (&goacc_thread_lock);
+	  gomp_fatal ("shutdown during host fallback");
+	}
 
       if (walk->dev)
 	{
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 89ef5fc..90d43eb 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -35,7 +35,8 @@
 #include <stdint.h>
 #include <assert.h>
 
-/* Return block containing [H->S), or NULL if not contained.  */
+/* Return block containing [H->S), or NULL if not contained.  The device lock
+   for DEV must be locked on entry, and remains locked on exit.  */
 
 static splay_tree_key
 lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
@@ -46,9 +47,7 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
   node.host_start = (uintptr_t) h;
   node.host_end = (uintptr_t) h + s;
 
-  gomp_mutex_lock (&dev->lock);
   key = splay_tree_lookup (&dev->mem_map, &node);
-  gomp_mutex_unlock (&dev->lock);
 
   return key;
 }
@@ -56,7 +55,8 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
 /* Return block containing [D->S), or NULL if not contained.
    The list isn't ordered by device address, so we have to iterate
    over the whole array.  This is not expected to be a common
-   operation.  */
+   operation.  The device lock associated with TGT must be locked on entry, and
+   remains locked on exit.  */
 
 static splay_tree_key
 lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
@@ -67,16 +67,12 @@ lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
   if (!tgt)
     return NULL;
 
-  gomp_mutex_lock (&tgt->device_descr->lock);
-
   for (t = tgt; t != NULL; t = t->prev)
     {
       if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
         break;
     }
 
-  gomp_mutex_unlock (&tgt->device_descr->lock);
-
   if (!t)
     return NULL;
 
@@ -119,26 +115,35 @@ void
 acc_free (void *d)
 {
   splay_tree_key k;
-  struct goacc_thread *thr = goacc_thread ();
 
   if (!d)
     return;
 
+  struct goacc_thread *thr = goacc_thread ();
+
   assert (thr && thr->dev);
 
+  struct gomp_device_descr *acc_dev = thr->dev;
+
+  gomp_mutex_lock (&acc_dev->lock);
+
   /* We don't have to call lazy open here, as the ptr value must have
      been returned by acc_malloc.  It's not permitted to pass NULL in
      (unless you got that null from acc_malloc).  */
-  if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
-   {
-     void *offset;
+  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
+    {
+      void *offset;
 
-     offset = d - k->tgt->tgt_start + k->tgt_offset;
+      offset = d - k->tgt->tgt_start + k->tgt_offset;
 
-     acc_unmap_data ((void *)(k->host_start + offset));
-   }
+      gomp_mutex_unlock (&acc_dev->lock);
 
-  thr->dev->free_func (thr->dev->target_id, d);
+      acc_unmap_data ((void *)(k->host_start + offset));
+    }
+  else
+    gomp_mutex_unlock (&acc_dev->lock);
+
+  acc_dev->free_func (acc_dev->target_id, d);
 }
 
 void
@@ -178,16 +183,24 @@ acc_deviceptr (void *h)
   goacc_lazy_initialize ();
 
   struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *dev = thr->dev;
+
+  gomp_mutex_lock (&dev->lock);
 
-  n = lookup_host (thr->dev, h, 1);
+  n = lookup_host (dev, h, 1);
 
   if (!n)
-    return NULL;
+    {
+      gomp_mutex_unlock (&dev->lock);
+      return NULL;
+    }
 
   offset = h - n->host_start;
 
   d = n->tgt->tgt_start + n->tgt_offset + offset;
 
+  gomp_mutex_unlock (&dev->lock);
+
   return d;
 }
 
@@ -204,16 +217,24 @@ acc_hostptr (void *d)
   goacc_lazy_initialize ();
 
   struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *acc_dev = thr->dev;
 
-  n = lookup_dev (thr->dev->openacc.data_environ, d, 1);
+  gomp_mutex_lock (&acc_dev->lock);
+
+  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
 
   if (!n)
-    return NULL;
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      return NULL;
+    }
 
   offset = d - n->tgt->tgt_start + n->tgt_offset;
 
   h = n->host_start + offset;
 
+  gomp_mutex_unlock (&acc_dev->lock);
+
   return h;
 }
 
@@ -232,6 +253,8 @@ acc_is_present (void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   n = lookup_host (acc_dev, h, s);
 
   if (n && ((uintptr_t)h < n->host_start
@@ -239,6 +262,8 @@ acc_is_present (void *h, size_t s)
 	    || s > n->host_end - n->host_start))
     n = NULL;
 
+  gomp_mutex_unlock (&acc_dev->lock);
+
   return n != NULL;
 }
 
@@ -274,20 +299,32 @@ acc_map_data (void *h, void *d, size_t s)
 	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
                     (void *)h, (int)s, (void *)d, (int)s);
 
+      gomp_mutex_lock (&acc_dev->lock);
+
       if (lookup_host (acc_dev, h, s))
-	gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
-		    (int)s);
+        {
+	  gomp_mutex_unlock (&acc_dev->lock);
+	  gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
+		      (int)s);
+	}
 
       if (lookup_dev (thr->dev->openacc.data_environ, d, s))
-	gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
-		    (int)s);
+        {
+	  gomp_mutex_unlock (&acc_dev->lock);
+	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
+		      (int)s);
+	}
+
+      gomp_mutex_unlock (&acc_dev->lock);
 
       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
 			   &kinds, true, false);
     }
 
+  gomp_mutex_lock (&acc_dev->lock);
   tgt->prev = acc_dev->openacc.data_environ;
   acc_dev->openacc.data_environ = tgt;
+  gomp_mutex_unlock (&acc_dev->lock);
 }
 
 void
@@ -299,17 +336,26 @@ acc_unmap_data (void *h)
   /* No need to call lazy open, as the address must have been mapped.  */
 
   size_t host_size;
+
+  gomp_mutex_lock (&acc_dev->lock);
+
   splay_tree_key n = lookup_host (acc_dev, h, 1);
   struct target_mem_desc *t;
 
   if (!n)
-    gomp_fatal ("%p is not a mapped block", (void *)h);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("%p is not a mapped block", (void *)h);
+    }
 
   host_size = n->host_end - n->host_start;
 
   if (n->host_start != (uintptr_t) h)
-    gomp_fatal ("[%p,%d] surrounds1 %p",
-		(void *) n->host_start, (int) host_size, (void *) h);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("[%p,%d] surrounds %p",
+		  (void *) n->host_start, (int) host_size, (void *) h);
+    }
 
   t = n->tgt;
 
@@ -323,8 +369,6 @@ acc_unmap_data (void *h)
       t->tgt_end = 0;
       t->to_free = 0;
 
-      gomp_mutex_lock (&acc_dev->lock);
-
       for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
 	   tp = t, t = t->prev)
 	if (n->tgt == t)
@@ -336,10 +380,10 @@ acc_unmap_data (void *h)
 
 	    break;
 	  }
-
-      gomp_mutex_unlock (&acc_dev->lock);
     }
 
+  gomp_mutex_unlock (&acc_dev->lock);
+
   gomp_unmap_vars (t, true);
 }
 
@@ -361,6 +405,8 @@ present_create_copy (unsigned f, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   n = lookup_host (acc_dev, h, s);
   if (n)
     {
@@ -368,13 +414,22 @@ present_create_copy (unsigned f, void *h, size_t s)
       d = (void *) (n->tgt->tgt_start + n->tgt_offset);
 
       if (!(f & FLAG_PRESENT))
-        gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
-            (void *)h, (int)s, (void *)d, (int)s);
+        {
+	  gomp_mutex_unlock (&acc_dev->lock);
+          gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
+        	      (void *)h, (int)s, (void *)d, (int)s);
+	}
       if ((h + s) > (void *)n->host_end)
-        gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+	{
+	  gomp_mutex_unlock (&acc_dev->lock);
+	  gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+	}
+
+      gomp_mutex_unlock (&acc_dev->lock);
     }
   else if (!(f & FLAG_CREATE))
     {
+      gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
     }
   else
@@ -389,6 +444,8 @@ present_create_copy (unsigned f, void *h, size_t s)
       else
 	kinds = GOMP_MAP_ALLOC;
 
+      gomp_mutex_unlock (&acc_dev->lock);
+
       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
 			   false);
 
@@ -439,21 +496,31 @@ delete_copyout (unsigned f, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   n = lookup_host (acc_dev, h, s);
 
   /* No need to call lazy open, as the data must already have been
      mapped.  */
 
   if (!n)
-    gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
+    }
 
   d = (void *) (n->tgt->tgt_start + n->tgt_offset);
 
   host_size = n->host_end - n->host_start;
 
   if (n->host_start != (uintptr_t) h || host_size != s)
-    gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
-		(void *) n->host_start, (int) host_size, (void *) h, (int) s);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
+		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
+    }
+
+  gomp_mutex_unlock (&acc_dev->lock);
 
   if (f & FLAG_COPYOUT)
     acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
@@ -482,16 +549,23 @@ update_dev_host (int is_dev, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   n = lookup_host (acc_dev, h, s);
 
   /* No need to call lazy open, as the data must already have been
      mapped.  */
 
   if (!n)
-    gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
+    }
 
   d = (void *) (n->tgt->tgt_start + n->tgt_offset);
 
+  gomp_mutex_unlock (&acc_dev->lock);
+
   if (is_dev)
     acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
   else
@@ -522,8 +596,11 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
 		       NULL, sizes, kinds, true, false);
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
+
+  gomp_mutex_lock (&acc_dev->lock);
   tgt->prev = acc_dev->openacc.data_environ;
   acc_dev->openacc.data_environ = tgt;
+  gomp_mutex_unlock (&acc_dev->lock);
 }
 
 void
@@ -535,10 +612,15 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
   struct target_mem_desc *t;
   int minrefs = (mapnum == 1) ? 2 : 3;
 
+  gomp_mutex_lock (&acc_dev->lock);
+
   n = lookup_host (acc_dev, h, 1);
 
   if (!n)
-    gomp_fatal ("%p is not a mapped block", (void *)h);
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("%p is not a mapped block", (void *)h);
+    }
 
   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
 
@@ -546,8 +628,6 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
 
   struct target_mem_desc *tp;
 
-  gomp_mutex_lock (&acc_dev->lock);
-
   if (t->refcount == minrefs)
     {
       /* This is the last reference, so pull the descriptor off the

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Tidy up locking for libgomp OpenACC entry points
  2015-04-24 17:13   ` Julian Brown
@ 2015-04-29 17:47     ` Thomas Schwinge
  0 siblings, 0 replies; 5+ messages in thread
From: Thomas Schwinge @ 2015-04-29 17:47 UTC (permalink / raw)
  To: Julian Brown; +Cc: gcc-patches, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 1150 bytes --]

Hi Julian!

On Fri, 24 Apr 2015 18:13:08 +0100, Julian Brown <julian@codesourcery.com> wrote:
> On Thu, 23 Apr 2015 18:41:34 +0200
> Thomas Schwinge <thomas@codesourcery.com> wrote:
> > On Wed, 22 Apr 2015 19:42:43 +0100, Julian Brown
> > <julian@codesourcery.com> wrote:
> > > This patch is an attempt to fix some potential race conditions with
> > > accesses to shared data structures from multiple concurrent threads
> > > in libgomp's OpenACC entry points.

> > > Tests look OK (with offloading to NVidia PTX).
> > 
> > How did you test to get some confidence in the locking being
> > sufficient?
> 
> Merely by running the existing tests and via inspection, sadly. I'm not
> sure how much value we'd get from implementing an exhaustive threading
> testsuite at this stage: I guess testcases will be easier to come by in
> the future if/when people start to use e.g. OpenMP and OpenACC together.

;-) The poor person to be the first to actually exercise all that code...

Thanks for your explanations and the rework.

> Re-tested (libgomp), results look OK.
> 
> OK to apply now?

OK, thanks!


Grüße,
 Thomas

[-- Attachment #2: Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-04-29 17:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-22 18:43 [PATCH] Tidy up locking for libgomp OpenACC entry points Julian Brown
2015-04-23  8:32 ` Jakub Jelinek
2015-04-23 16:42 ` Thomas Schwinge
2015-04-24 17:13   ` Julian Brown
2015-04-29 17:47     ` Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).