From: Thomas Schwinge <Thomas_Schwinge@mentor.com>
To: Chung-Lin Tang <cltang@codesourcery.com>
Cc: <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts
Date: Fri, 14 Dec 2018 14:56:00 -0000 [thread overview]
Message-ID: <yxfpefakupg7.fsf@hertz.schwinge.homeip.net> (raw)
In-Reply-To: <12319572-dd02-c946-f2b9-9d047be9c707@mentor.com>
Hi Chung-Lin!
On Tue, 25 Sep 2018 21:10:47 +0800, Chung-Lin Tang <chunglin_tang@mentor.com> wrote:
> --- a/libgomp/oacc-async.c
> +++ b/libgomp/oacc-async.c
> +attribute_hidden struct goacc_asyncqueue *
> +lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
> +{
> + /* The special value acc_async_noval (-1) maps to the thread-specific
> + default async stream. */
> + if (async == acc_async_noval)
> + async = thr->default_async;
> +
> + if (async == acc_async_sync)
> + return NULL;
> +
> + if (async < 0)
> + gomp_fatal ("bad async %d", async);
> +
> + struct gomp_device_descr *dev = thr->dev;
> +
> + if (!create
> + && (async >= dev->openacc.async.nasyncqueue
> + || !dev->openacc.async.asyncqueue[async]))
> + return NULL;
> +
Doesn't this last block also have to be included in the lock you're
taking below?
> + gomp_mutex_lock (&dev->openacc.async.lock);
> + if (async >= dev->openacc.async.nasyncqueue)
> + {
> + int diff = async + 1 - dev->openacc.async.nasyncqueue;
> + dev->openacc.async.asyncqueue
> + = gomp_realloc (dev->openacc.async.asyncqueue,
> + sizeof (goacc_aq) * (async + 1));
> + memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
> + 0, sizeof (goacc_aq) * diff);
> + dev->openacc.async.nasyncqueue = async + 1;
> + }
> +
> + if (!dev->openacc.async.asyncqueue[async])
> + {
> + dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func ();
> +
> + /* Link new async queue into active list. */
> + goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
> + n->aq = dev->openacc.async.asyncqueue[async];
> + n->next = dev->openacc.async.active;
> + dev->openacc.async.active = n;
> + }
> + gomp_mutex_unlock (&dev->openacc.async.lock);
> + return dev->openacc.async.asyncqueue[async];
> +}
And then, some more concerns, as encoded in the following patch (but
please also continue reading below):
commit d2d6aaeca840debbec14e421be705ef56d444ac7
Author: Thomas Schwinge <thomas@codesourcery.com>
Date: Wed Dec 12 15:57:30 2018 +0100
into async re-work: locking concerns
---
libgomp/oacc-async.c | 18 +++++++++++++++---
libgomp/plugin/plugin-nvptx.c | 6 ++++++
2 files changed, 21 insertions(+), 3 deletions(-)
diff --git libgomp/oacc-async.c libgomp/oacc-async.c
index 89a405ebcdb1..68e4e65e8182 100644
--- libgomp/oacc-async.c
+++ libgomp/oacc-async.c
@@ -84,17 +84,21 @@ lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
if (id < 0)
return NULL;
+ struct goacc_asyncqueue *ret = NULL;
+
struct gomp_device_descr *dev = thr->dev;
+ gomp_mutex_lock (&dev->openacc.async.lock);
+
if (!create
&& (id >= dev->openacc.async.nasyncqueue
|| !dev->openacc.async.asyncqueue[id]))
- return NULL;
+ goto out;
- gomp_mutex_lock (&dev->openacc.async.lock);
if (id >= dev->openacc.async.nasyncqueue)
{
int diff = id + 1 - dev->openacc.async.nasyncqueue;
+ // TODO gomp_realloc might call "gomp_fatal" with "&dev->openacc.async.lock" locked. Might cause deadlock?
dev->openacc.async.asyncqueue
= gomp_realloc (dev->openacc.async.asyncqueue,
sizeof (goacc_aq) * (id + 1));
@@ -105,16 +109,23 @@ lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
if (!dev->openacc.async.asyncqueue[id])
{
+ //TODO We have "&dev->openacc.async.lock" locked here, and if "openacc.async.construct_func" calls "GOMP_PLUGIN_fatal" (via "CUDA_CALL_ASSERT", for example), that might cause deadlock?
+ //TODO Change the interface to emit an error in the plugin, but then "return NULL", and we catch that here, unlock, and bail out?
dev->openacc.async.asyncqueue[id] = dev->openacc.async.construct_func ();
/* Link new async queue into active list. */
+ // TODO gomp_malloc might call "gomp_fatal" with "&dev->openacc.async.lock" locked. Might cause deadlock?
goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
n->aq = dev->openacc.async.asyncqueue[id];
n->next = dev->openacc.async.active;
dev->openacc.async.active = n;
}
+ ret = dev->openacc.async.asyncqueue[id];
+
+ out:
gomp_mutex_unlock (&dev->openacc.async.lock);
- return dev->openacc.async.asyncqueue[id];
+
+ return ret;
}
/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
@@ -305,6 +316,7 @@ goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
goacc_aq_list next;
for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
{
+ //TODO Can/should/must we "synchronize" here (how?), so as to make sure that no other operation on this asyncqueue is going on while/after we've destructed it here?
ret &= devicep->openacc.async.destruct_func (l->aq);
next = l->next;
free (l);
diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c
index 577ed39ef3f6..872e91f05e78 100644
--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -1389,6 +1389,7 @@ GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *aq)
if (r == CUDA_ERROR_NOT_READY)
return 0;
+ //TODO Is this safe to call, or might this cause deadlock if something's locked?
GOMP_PLUGIN_error ("cuStreamQuery error: %s", cuda_error (r));
return -1;
}
@@ -1396,6 +1397,7 @@ GOMP_OFFLOAD_openacc_async_test (struct goacc_asyncqueue *aq)
void
GOMP_OFFLOAD_openacc_async_synchronize (struct goacc_asyncqueue *aq)
{
+ //TODO Is this safe to call, or might this cause deadlock if something's locked?
CUDA_CALL_ASSERT (cuStreamSynchronize, aq->cuda_stream);
}
@@ -1404,6 +1406,7 @@ GOMP_OFFLOAD_openacc_async_serialize (struct goacc_asyncqueue *aq1,
struct goacc_asyncqueue *aq2)
{
CUevent e;
+ //TODO Are these safe to call, or might this cause deadlock if something's locked?
CUDA_CALL_ASSERT (cuEventCreate, &e, CU_EVENT_DISABLE_TIMING);
CUDA_CALL_ASSERT (cuEventRecord, e, aq1->cuda_stream);
CUDA_CALL_ASSERT (cuStreamWaitEvent, aq2->cuda_stream, e, 0);
@@ -1413,6 +1416,7 @@ static void
cuda_callback_wrapper (CUstream stream, CUresult res, void *ptr)
{
if (res != CUDA_SUCCESS)
+ //TODO Is this safe to call, or might this cause deadlock if something's locked?
GOMP_PLUGIN_fatal ("%s error: %s", __FUNCTION__, cuda_error (res));
struct nvptx_callback *cb = (struct nvptx_callback *) ptr;
cb->fn (cb->ptr);
@@ -1424,10 +1428,12 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
void (*callback_fn)(void *),
void *userptr)
{
+ //TODO Is this safe to call, or might this cause deadlock if something's locked?
struct nvptx_callback *b = GOMP_PLUGIN_malloc (sizeof (*b));
b->fn = callback_fn;
b->ptr = userptr;
b->aq = aq;
+ //TODO Is this safe to call, or might this cause deadlock if something's locked?
CUDA_CALL_ASSERT (cuStreamAddCallback, aq->cuda_stream,
cuda_callback_wrapper, (void *) b, 0);
}
But then, I wonder if we couldn't skip all that locking, if we moved the
"asyncqueue"s from "acc_dispatch_t" into "goacc_thread"?
commit c9282e058f67cb8f8ca1720d7f9e3fe0c04b6c89
Author: Thomas Schwinge <thomas@codesourcery.com>
Date: Thu Dec 13 18:00:16 2018 +0100
[TODO] into async re-work: move "asyncqueue"s from "acc_dispatch_t" into "goacc_thread"?
---
libgomp/libgomp.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git libgomp/libgomp.h libgomp/libgomp.h
index 574fcd1ee4ad..09852589d2f1 100644
--- libgomp/libgomp.h
+++ libgomp/libgomp.h
@@ -949,6 +949,11 @@ typedef struct acc_dispatch_t
__typeof (GOMP_OFFLOAD_openacc_exec) *exec_func;
struct {
+ //TODO Why do these live in the "device" data structure, and not in the "per-thread" data structure?
+ //TODO Aren't they meant to be separate per thread?
+ //TODO That is, as far as I remember right now, OpenACC explicitly states that an asyncqueue doesn't entail any synchronization between different host threads.
+ //TODO Verify OpenACC.
+ //TODO With that moved into "goacc_thread", we could get rid of all the locking needed here?
/* Once created and put into the "active" list, asyncqueues are then never
destructed and removed from the "active" list, other than if the TODO
device is shut down. */
At this point, I will again (as in that other email) state that my
understanding of OpenACC is that an async queue does not entail any
inter-thread synchronization, so it would seem reasonable that all async
queues are separate per thread.
Grüße
Thomas
next prev parent reply other threads:[~2018-12-14 14:56 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-09-25 13:11 Chung-Lin Tang
2018-12-07 11:33 ` Thomas Schwinge
2018-12-07 14:19 ` Chung-Lin Tang
2018-12-14 14:11 ` Thomas Schwinge
2018-12-14 14:17 ` Thomas Schwinge
2018-12-14 14:52 ` Chung-Lin Tang
2018-12-17 13:52 ` Thomas Schwinge
2018-12-18 9:35 ` Chung-Lin Tang
2018-12-14 14:32 ` Thomas Schwinge
2018-12-14 14:42 ` Chung-Lin Tang
2018-12-17 13:56 ` Thomas Schwinge
2018-12-14 14:54 ` Thomas Schwinge
2018-12-14 15:01 ` Chung-Lin Tang
2018-12-17 14:11 ` Thomas Schwinge
2018-12-14 14:56 ` Thomas Schwinge [this message]
2018-12-17 11:03 ` Chung-Lin Tang
2018-12-17 14:32 ` Thomas Schwinge
2018-12-18 10:03 ` Chung-Lin Tang
2018-12-18 11:44 ` Thomas Schwinge
2018-12-18 15:06 ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v2) Chung-Lin Tang
2018-12-18 21:04 ` Thomas Schwinge
2018-12-21 16:25 ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v3) Chung-Lin Tang
2018-12-28 14:52 ` Thomas Schwinge
2019-01-02 12:46 ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v2) Chung-Lin Tang
2019-01-05 9:47 ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v4) Chung-Lin Tang
2019-01-07 14:16 ` Thomas Schwinge
2019-01-08 14:04 ` Chung-Lin Tang
2019-01-07 14:15 ` [PATCH 2/6, OpenACC, libgomp] Async re-work, oacc-* parts (revised, v2) Thomas Schwinge
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=yxfpefakupg7.fsf@hertz.schwinge.homeip.net \
--to=thomas_schwinge@mentor.com \
--cc=cltang@codesourcery.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).