public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Thomas Schwinge <Thomas_Schwinge@mentor.com>
To: Chung-Lin Tang <cltang@codesourcery.com>
Cc: <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts
Date: Fri, 14 Dec 2018 14:56:00 -0000	[thread overview]
Message-ID: <yxfpefakupg7.fsf@hertz.schwinge.homeip.net> (raw)
In-Reply-To: <12319572-dd02-c946-f2b9-9d047be9c707@mentor.com>

Hi Chung-Lin!

On Tue, 25 Sep 2018 21:10:47 +0800, Chung-Lin Tang <chunglin_tang@mentor.com> wrote:
> --- a/libgomp/oacc-async.c
> +++ b/libgomp/oacc-async.c

> +attribute_hidden struct goacc_asyncqueue *
> +lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
> +{
> +  /* The special value acc_async_noval (-1) maps to the thread-specific
> +     default async stream.  */
> +  if (async == acc_async_noval)
> +    async = thr->default_async;
> +
> +  if (async == acc_async_sync)
> +    return NULL;
> +
> +  if (async < 0)
> +    gomp_fatal ("bad async %d", async);
> +
> +  struct gomp_device_descr *dev = thr->dev;
> +
> +  if (!create
> +      && (async >= dev->openacc.async.nasyncqueue
> +	  || !dev->openacc.async.asyncqueue[async]))
> +    return NULL;
> +

Doesn't this last block also have to be included in the lock you're
taking below?

> +  gomp_mutex_lock (&dev->openacc.async.lock);
> +  if (async >= dev->openacc.async.nasyncqueue)
> +    {
> +      int diff = async + 1 - dev->openacc.async.nasyncqueue;
> +      dev->openacc.async.asyncqueue
> +	= gomp_realloc (dev->openacc.async.asyncqueue,
> +			sizeof (goacc_aq) * (async + 1));
> +      memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
> +	      0, sizeof (goacc_aq) * diff);
> +      dev->openacc.async.nasyncqueue = async + 1;
> +    }
> +
> +  if (!dev->openacc.async.asyncqueue[async])
> +    {
> +      dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func ();
> +
> +      /* Link new async queue into active list.  */
> +      goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
> +      n->aq = dev->openacc.async.asyncqueue[async];
> +      n->next = dev->openacc.async.active;
> +      dev->openacc.async.active = n;
> +    }
> +  gomp_mutex_unlock (&dev->openacc.async.lock);
> +  return dev->openacc.async.asyncqueue[async];
> +}

And then, some more concerns, as encoded in the following patch (but
please also continue reading below):

commit d2d6aaeca840debbec14e421be705ef56d444ac7
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Wed Dec 12 15:57:30 2018 +0100

    into async re-work: locking concerns
---
 libgomp/oacc-async.c          | 18 +++++++++++++++---
 libgomp/plugin/plugin-nvptx.c |  6 ++++++
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git libgomp/oacc-async.c libgomp/oacc-async.c
index 89a405ebcdb1..68e4e65e8182 100644
--- libgomp/oacc-async.c
+++ libgomp/oacc-async.c
@@ -84,17 +84,21 @@ lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
   if (id < 0)
     return NULL;
 
+  struct goacc_asyncqueue *ret = NULL;
+
   struct gomp_device_descr *dev = thr->dev;
 
+  gomp_mutex_lock (&dev->openacc.async.lock);
+
   if (!create
       && (id >= dev->openacc.async.nasyncqueue
 	  || !dev->openacc.async.asyncqueue[id]))
-    return NULL;
+    goto out;
 
-  gomp_mutex_lock (&dev->openacc.async.lock);
   if (id >= dev->openacc.async.nasyncqueue)
     {
       int diff = id + 1 - dev->openacc.async.nasyncqueue;
+      // TODO gomp_realloc might call "gomp_fatal" with "&dev->openacc.async.lock" locked.  Might cause deadlock?
       dev->openacc.async.asyncqueue
 	= gomp_realloc (dev->openacc.async.asyncqueue,
 			sizeof (goacc_aq) * (id + 1));
@@ -105,16 +109,23 @@ lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
 
   if (!dev->openacc.async.asyncqueue[id])
     {
+      //TODO We have "&dev->openacc.async.lock" locked here, and if "openacc.async.construct_func" calls "GOMP_PLUGIN_fatal" (via "CUDA_CALL_ASSERT", for example), that might cause deadlock?
+      //TODO Change the interface to emit an error in the plugin, but then "return NULL", and we catch that here, unlock, and bail out?
       dev->openacc.async.asyncqueue[id] = dev->openacc.async.construct_func ();
 
       /* Link new async queue into active list.  */
+      // TODO gomp_malloc might call "gomp_fatal" with "&dev->openacc.async.lock" locked.  Might cause deadlock?
       goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
       n->aq = dev->openacc.async.asyncqueue[id];
       n->next = dev->openacc.async.active;
       dev->openacc.async.active = n;
     }
+  ret = dev->openacc.async.asyncqueue[id];
+
+ out:
   gomp_mutex_unlock (&dev->openacc.async.lock);
-  return dev->openacc.async.asyncqueue[id];
+
+  return ret;
 }
 
 /* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
@@ -305,6 +316,7 @@ goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
       goacc_aq_list next;
       for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
 	{
+	  //TODO Can/should/must we "synchronize" here (how?), so as to make sure that no other operation on this asyncqueue is going on while/after we've destructed it here?
 	  ret &= devicep->openacc.async.destruct_func (l->aq);
 	  next = l->next;
 	  free (l);
diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c
index 577ed39ef3f6..872e91f05e78 100644
--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -1389,6 +1389,7 @@ GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *aq)
   if (r == CUDA_ERROR_NOT_READY)
     return 0;
 
+  //TODO Is this safe to call, or might this cause deadlock if something's locked?
   GOMP_PLUGIN_error ("cuStreamQuery error: %s", cuda_error (r));
   return -1;
 }
@@ -1396,6 +1397,7 @@ GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *aq)
 void
 GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *aq)
 {
+  //TODO Is this safe to call, or might this cause deadlock if something's locked?
   CUDA_CALL_ASSERT (cuStreamSynchronize, aq->cuda_stream);
 }
 
@@ -1404,6 +1406,7 @@ GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *aq1,
 				      struct goacc_asyncqueue *aq2)
 {
   CUevent e;
+  //TODO Are these safe to call, or might this cause deadlock if something's locked?
   CUDA_CALL_ASSERT (cuEventCreate, &e, CU_EVENT_DISABLE_TIMING);
   CUDA_CALL_ASSERT (cuEventRecord, e, aq1->cuda_stream);
   CUDA_CALL_ASSERT (cuStreamWaitEvent, aq2->cuda_stream, e, 0);
@@ -1413,6 +1416,7 @@ static void
 cuda_callback_wrapper (CUstream stream, CUresult res, void *ptr)
 {
   if (res != CUDA_SUCCESS)
+    //TODO Is this safe to call, or might this cause deadlock if something's locked?
     GOMP_PLUGIN_fatal ("%s error: %s", __FUNCTION__, cuda_error (res));
   struct nvptx_callback *cb = (struct nvptx_callback *) ptr;
   cb->fn (cb->ptr);
@@ -1424,10 +1428,12 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
 					   void (*callback_fn)(void *),
 					   void *userptr)
 {
+  //TODO Is this safe to call, or might this cause deadlock if something's locked?
   struct nvptx_callback *b = GOMP_PLUGIN_malloc (sizeof (*b));
   b->fn = callback_fn;
   b->ptr = userptr;
   b->aq = aq;
+  //TODO Is this safe to call, or might this cause deadlock if something's locked?
   CUDA_CALL_ASSERT (cuStreamAddCallback, aq->cuda_stream,
 		    cuda_callback_wrapper, (void *) b, 0);
 }


But then, I wonder if we couldn't skip all that locking, if we moved the
"asyncqueue"s from "acc_dispatch_t" into "goacc_thread"?

commit c9282e058f67cb8f8ca1720d7f9e3fe0c04b6c89
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Thu Dec 13 18:00:16 2018 +0100

    [TODO] into async re-work: move "asyncqueue"s from "acc_dispatch_t" into "goacc_thread"?
---
 libgomp/libgomp.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git libgomp/libgomp.h libgomp/libgomp.h
index 574fcd1ee4ad..09852589d2f1 100644
--- libgomp/libgomp.h
+++ libgomp/libgomp.h
@@ -949,6 +949,11 @@ typedef struct acc_dispatch_t
   __typeof (GOMP_OFFLOAD_openacc_exec) *exec_func;
 
   struct {
+    //TODO Why do these live in the "device" data structure, and not in the "per-thread" data structure?
+    //TODO Aren't they meant to be separate per thread?
+    //TODO That is, as far as I remember right now, OpenACC explicitly states that an asyncqueue doesn't entail any synchronization between different host threads.
+    //TODO Verify OpenACC.
+    //TODO With that moved into "goacc_thread", we could get rid of all the locking needed here?
     /* Once created and put into the "active" list, asyncqueues are then never
        destructed and removed from the "active" list, other than if the TODO
        device is shut down.  */

At this point, I will again (as in that other email) state that my
understanding of OpenACC is that an async queue does not entail any
inter-thread synchronization, so it would seem reasonable that all async
queues are separate per thread.


Grüße
 Thomas

  parent reply	other threads:[~2018-12-14 14:56 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-25 13:11 Chung-Lin Tang
2018-12-07 11:33 ` Thomas Schwinge
2018-12-07 14:19   ` Chung-Lin Tang
2018-12-14 14:11     ` Thomas Schwinge
2018-12-14 14:17 ` Thomas Schwinge
2018-12-14 14:52   ` Chung-Lin Tang
2018-12-17 13:52     ` Thomas Schwinge
2018-12-18  9:35       ` Chung-Lin Tang
2018-12-14 14:32 ` Thomas Schwinge
2018-12-14 14:42   ` Chung-Lin Tang
2018-12-17 13:56     ` Thomas Schwinge
2018-12-14 14:54 ` Thomas Schwinge
2018-12-14 15:01   ` Chung-Lin Tang
2018-12-17 14:11     ` Thomas Schwinge
2018-12-14 14:56 ` Thomas Schwinge [this message]
2018-12-17 11:03   ` Chung-Lin Tang
2018-12-17 14:32     ` Thomas Schwinge
2018-12-18 10:03       ` Chung-Lin Tang
2018-12-18 11:44         ` Thomas Schwinge
2018-12-18 15:06 ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v2) Chung-Lin Tang
2018-12-18 21:04   ` Thomas Schwinge
2018-12-21 16:25     ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v3) Chung-Lin Tang
2018-12-28 14:52       ` Thomas Schwinge
2019-01-02 12:46     ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v2) Chung-Lin Tang
2019-01-05  9:47       ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v4) Chung-Lin Tang
2019-01-07 14:16         ` Thomas Schwinge
2019-01-08 14:04           ` Chung-Lin Tang
2019-01-07 14:15       ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v2) Thomas Schwinge

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=yxfpefakupg7.fsf@hertz.schwinge.homeip.net \
    --to=thomas_schwinge@mentor.com \
    --cc=cltang@codesourcery.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).