public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-12] OpenACC profiling-interface fixes for asynchronous operations
@ 2022-06-29 14:36 Kwok Yeung
  0 siblings, 0 replies; only message in thread
From: Kwok Yeung @ 2022-06-29 14:36 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:719f93c8618a134f90b5b661ab70c918d659ad05

commit 719f93c8618a134f90b5b661ab70c918d659ad05
Author: Julian Brown <julian@codesourcery.com>
Date:   Tue Sep 10 20:34:45 2019 -0700

    OpenACC profiling-interface fixes for asynchronous operations
    
            libgomp/
            * oacc-host.c (host_openacc_async_queue_callback): Invoke callback
            function immediately.
            * oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch,
            queue_async_prof_dispatch): New.
            (GOACC_parallel_keyed): Call queue_async_prof_dispatch for asynchronous
            profile-event dispatches.
            (GOACC_update): Likewise.
            * oacc-mem.c (GOACC_enter_exit_data): Call queue_async_prof_dispatch
            for asynchronous profile-event dispatches.
            * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
            (cb_compute_construct_start): Remove/fix TODO.
            * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
            (cb_exit_data_start): Tweak expected state values.
            (cb_exit_data_end): Likewise.
            (cb_compute_construct_start): Remove/fix TODO.
            (cb_compute_construct_end): Don't do adjustments for
            acc_ev_enqueue_launch_start/acc_ev_enqueue_launch_end callbacks.
            (cb_compute_construct_end): Tweak expected state values.
            (cb_enqueue_launch_start, cb_enqueue_launch_end): Don't expect
            launch-enqueue operations to happen synchronously with respect to
            profiling events on async streams.
            (main): Tweak expected state values.
            * testsuite/libgomp.oacc-c-c++-common/lib-94.c (main): Reorder
            operations for async-safety.

Diff:
---
 libgomp/ChangeLog.omp                              |  27 +++
 libgomp/oacc-host.c                                |   5 +-
 libgomp/oacc-mem.c                                 |  32 +++-
 libgomp/oacc-parallel.c                            | 192 ++++++++++++++++-----
 .../libgomp.oacc-c-c++-common/acc_prof-init-1.c    |   5 +-
 .../acc_prof-parallel-1.c                          |  64 ++-----
 6 files changed, 221 insertions(+), 104 deletions(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index c25000dd33a..fd89e921512 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,30 @@
+2019-09-17  Julian Brown  <julian@codesourcery.com>
+
+	* oacc-host.c (host_openacc_async_queue_callback): Invoke callback
+	function immediately.
+	* oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch,
+	queue_async_prof_dispatch): New.
+	(GOACC_parallel_keyed): Call queue_async_prof_dispatch for asynchronous
+	profile-event dispatches.
+	(GOACC_update): Likewise.
+	* oacc-mem.c (GOACC_enter_exit_data): Call queue_async_prof_dispatch
+	for asynchronous profile-event dispatches.
+	* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
+	(cb_compute_construct_start): Remove/fix TODO.
+	* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+	(cb_exit_data_start): Tweak expected state values.
+	(cb_exit_data_end): Likewise.
+	(cb_compute_construct_start): Remove/fix TODO.
+	(cb_compute_construct_end): Don't do adjustments for
+	acc_ev_enqueue_launch_start/acc_ev_enqueue_launch_end callbacks.
+	(cb_compute_construct_end): Tweak expected state values.
+	(cb_enqueue_launch_start, cb_enqueue_launch_end): Don't expect
+	launch-enqueue operations to happen synchronously with respect to
+	profiling events on async streams.
+	(main): Tweak expected state values.
+	* testsuite/libgomp.oacc-c-c++-common/lib-94.c (main): Reorder
+	operations for async-safety.
+
 2019-09-05  Julian Brown  <julian@codesourcery.com>
 
 	* testsuite/libgomp.oacc-fortran/lib-13.f90: End data region after
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 5bb889926d3..8c8c524599d 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -204,10 +204,9 @@ host_openacc_async_dev2host (int ord __attribute__ ((unused)),
 static void
 host_openacc_async_queue_callback (struct goacc_asyncqueue *aq
 				   __attribute__ ((unused)),
-				   void (*callback_fn)(void *)
-				   __attribute__ ((unused)),
-				   void *userptr __attribute__ ((unused)))
+				   void (*callback_fn)(void *), void *userptr)
 {
+  callback_fn (userptr);
 }
 
 static struct goacc_asyncqueue *
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 73b2710c2b8..9ea625837c5 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -1318,6 +1318,12 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
   gomp_mutex_unlock (&acc_dev->lock);
 }
 
+struct async_prof_callback_info *
+queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq,
+			   acc_prof_info *prof_info, acc_event_info *event_info,
+			   acc_api_info *api_info,
+			   struct async_prof_callback_info *prev_info);
+
 static void
 goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
 				size_t *sizes, unsigned short *kinds,
@@ -1328,6 +1334,7 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
 
   struct goacc_thread *thr;
   struct gomp_device_descr *acc_dev;
+  struct async_prof_callback_info *data_start_info = NULL;
 
   goacc_lazy_initialize ();
 
@@ -1383,9 +1390,19 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
       api_info.async_handle = NULL;
     }
 
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
   if (profiling_p)
-    goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-			      &api_info);
+    {
+      if (aq)
+	data_start_info
+	  = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+				       &enter_exit_data_event_info, &api_info,
+				       NULL);
+      else
+	goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+				  &api_info);
+    }
 
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
       || (flags & GOACC_FLAG_HOST_FALLBACK))
@@ -1399,8 +1416,6 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
   if (num_waits)
     goacc_wait (async, num_waits, ap);
 
-  goacc_aq aq = get_goacc_asyncqueue (async);
-
   if (data_enter)
     goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
   else
@@ -1412,8 +1427,13 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs,
       prof_info.event_type
 	= data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-				&api_info);
+      if (aq)
+	queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+				   &enter_exit_data_event_info, &api_info,
+				   data_start_info);
+      else
+	goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+				  &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index d66bc882a5f..81e8eba4225 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -259,6 +259,62 @@ handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
 }
 
 
+struct async_prof_callback_info {
+  acc_prof_info prof_info;
+  acc_event_info event_info;
+  acc_api_info api_info;
+  struct async_prof_callback_info *start_info;
+};
+
+static void
+async_prof_dispatch (void *ptr)
+{
+  struct async_prof_callback_info *info
+    = (struct async_prof_callback_info *) ptr;
+
+  if (info->start_info)
+    {
+      /* The TOOL_INFO must be preserved from a start event to the
+	 corresponding end event.  Copy that here.  */
+      void *tool_info = info->start_info->event_info.other_event.tool_info;
+      info->event_info.other_event.tool_info = tool_info;
+    }
+
+  goacc_profiling_dispatch (&info->prof_info, &info->event_info,
+			    &info->api_info);
+
+  /* The async_prof_dispatch function is (so far) always used for start/end
+     profiling event pairs: the start and end parts are queued, then each is
+     dispatched (or the dispatches might be interleaved before the end part is
+     queued).
+     In any case, it's not safe to delete either info structure before the
+     whole bracketed event is complete.  */
+
+  if (info->start_info)
+    {
+      free (info->start_info);
+      free (info);
+    }
+}
+
+struct async_prof_callback_info *
+queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq,
+			   acc_prof_info *prof_info, acc_event_info *event_info,
+			   acc_api_info *api_info,
+			   struct async_prof_callback_info *prev_info)
+{
+  struct async_prof_callback_info *info = malloc (sizeof (*info));
+
+  info->prof_info = *prof_info;
+  info->event_info = *event_info;
+  info->api_info = *api_info;
+  info->start_info = prev_info;
+
+  devicep->openacc.async.queue_callback_func (aq, async_prof_dispatch,
+					      (void *) info);
+  return info;
+}
+
 /* Launch a possibly offloaded function with FLAGS.  FN is the host fn
    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
    blocks to be copied to/from the device.  Varadic arguments are
@@ -284,6 +340,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
   unsigned dims[GOMP_DIM_MAX];
   unsigned tag;
   struct goacc_ncarray_info *nca_info = NULL;
+  struct async_prof_callback_info *comp_start_info = NULL,
+				  *data_start_info = NULL;
 
 #ifdef HAVE_INTTYPES_H
   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
@@ -345,31 +403,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       api_info.async_handle = NULL;
     }
 
-  if (profiling_p)
-    goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
-			      &api_info);
-
   handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
 
-  /* Host fallback if "if" clause is false or if the current device is set to
-     the host.  */
-  if (flags & GOACC_FLAG_HOST_FALLBACK)
-    {
-      prof_info.device_type = acc_device_host;
-      api_info.device_type = prof_info.device_type;
-      goacc_save_and_set_bind (acc_device_host);
-      fn (hostaddrs);
-      goacc_restore_bind ();
-      goto out_prof;
-    }
-  else if (acc_device_type (acc_dev->type) == acc_device_host)
-    {
-      fn (hostaddrs);
-      goto out_prof;
-    }
-  else if (profiling_p)
-    api_info.device_api = acc_device_api_cuda;
-
   /* Default: let the runtime choose.  */
   for (i = 0; i != GOMP_DIM_MAX; i++)
     dims[i] = 0;
@@ -402,11 +437,12 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
 	    if (async == GOMP_LAUNCH_OP_MAX)
 	      async = va_arg (ap, unsigned);
 
-	    if (profiling_p)
-	      {
-		prof_info.async = async;
-		prof_info.async_queue = prof_info.async;
-	      }
+	    /* Set async number in profiling data, unless the device is the
+	       host or we're doing host fallback.  */
+	    if (profiling_p
+	        && !(flags & GOACC_FLAG_HOST_FALLBACK)
+		&& acc_device_type (acc_dev->type) != acc_device_host)
+	      prof_info.async = prof_info.async_queue = async;
 
 	    break;
 	  }
@@ -434,6 +470,39 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
 
   va_end (ap);
 
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
+  if (profiling_p)
+    {
+      if (aq)
+	comp_start_info
+	  = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+				       &compute_construct_event_info,
+				       &api_info, NULL);
+      else
+	goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+				  &api_info);
+    }
+
+  /* Host fallback if "if" clause is false or if the current device is set to
+     the host.  */
+  if (flags & GOACC_FLAG_HOST_FALLBACK)
+    {
+      prof_info.device_type = acc_device_host;
+      api_info.device_type = prof_info.device_type;
+      goacc_save_and_set_bind (acc_device_host);
+      fn (hostaddrs);
+      goacc_restore_bind ();
+      goto out_prof;
+    }
+  else if (acc_device_type (acc_dev->type) == acc_device_host)
+    {
+      fn (hostaddrs);
+      goto out_prof;
+    }
+  else if (profiling_p)
+    api_info.device_api = acc_device_api_cuda;
+
   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
     {
       k.host_start = (uintptr_t) fn;
@@ -462,12 +531,16 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
 	= compute_construct_event_info.other_event.parent_construct;
       enter_exit_data_event_info.other_event.implicit = 1;
       enter_exit_data_event_info.other_event.tool_info = NULL;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-				&api_info);
+      if (aq)
+	data_start_info
+	  = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+				       &enter_exit_data_event_info, &api_info,
+				       NULL);
+      else
+	goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+				  &api_info);
     }
 
-  goacc_aq aq = get_goacc_asyncqueue (async);
-
   tgt = gomp_map_vars_openacc (acc_dev, aq, mapnum, hostaddrs, sizes, kinds,
 			       nca_info);
   free (nca_info);
@@ -477,8 +550,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       prof_info.event_type = acc_ev_enter_data_end;
       enter_exit_data_event_info.other_event.event_type
 	= prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-				&api_info);
+      if (aq)
+	queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+				   &enter_exit_data_event_info, &api_info,
+				   data_start_info);
+      else
+	goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+				  &api_info);
     }
 
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
@@ -497,8 +575,14 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       prof_info.event_type = acc_ev_exit_data_start;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
       enter_exit_data_event_info.other_event.tool_info = NULL;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-				&api_info);
+      if (aq)
+	data_start_info
+	  = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+				       &enter_exit_data_event_info, &api_info,
+				       NULL);
+      else
+	goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+				  &api_info);
     }
 
   /* If running synchronously (aq == NULL), this will unmap immediately.  */
@@ -508,8 +592,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
     {
       prof_info.event_type = acc_ev_exit_data_end;
       enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
-				&api_info);
+      if (aq)
+	queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+				   &enter_exit_data_event_info, &api_info,
+				   data_start_info);
+      else
+	goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+				  &api_info);
     }
 
  out_prof:
@@ -518,8 +607,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       prof_info.event_type = acc_ev_compute_construct_end;
       compute_construct_event_info.other_event.event_type
 	= prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
-				&api_info);
+      if (aq)
+	queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+				   &compute_construct_event_info, &api_info,
+				   comp_start_info);
+      else
+	goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+				  &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
@@ -757,6 +851,8 @@ GOACC_update (int flags_m, size_t mapnum,
 
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
+  goacc_aq aq = NULL;
+  struct async_prof_callback_info *update_start_info = NULL;
 
   bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
 
@@ -806,7 +902,15 @@ GOACC_update (int flags_m, size_t mapnum,
     }
 
   if (profiling_p)
-    goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+    {
+      aq = get_goacc_asyncqueue (async);
+      if (aq)
+	update_start_info
+	  = queue_async_prof_dispatch (acc_dev, aq, &prof_info,
+				       &update_event_info, &api_info, NULL);
+      else
+	goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+    }
 
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
       || (flags & GOACC_FLAG_HOST_FALLBACK))
@@ -893,7 +997,11 @@ GOACC_update (int flags_m, size_t mapnum,
     {
       prof_info.event_type = acc_ev_update_end;
       update_event_info.other_event.event_type = prof_info.event_type;
-      goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+      if (aq)
+	queue_async_prof_dispatch (acc_dev, aq, &prof_info, &update_event_info,
+				   &api_info, update_start_info);
+      else
+	goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
 
       thr->prof_info = NULL;
       thr->api_info = NULL;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
index 91b373216c9..a33fac7556c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
@@ -172,7 +172,10 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info
   assert (prof_info->device_type == acc_device_type);
   assert (prof_info->device_number == acc_device_num);
   assert (prof_info->thread_id == -1);
-  assert (prof_info->async == /* TODO acc_async */ acc_async_sync);
+  if (acc_device_type == acc_device_host)
+    assert (prof_info->async == acc_async_sync);
+  else
+    assert (prof_info->async == acc_async);
   assert (prof_info->async_queue == prof_info->async);
   assert (prof_info->src_file == NULL);
   assert (prof_info->func_name == NULL);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
index 28a47ccc27d..663f7f724d5 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
@@ -316,9 +316,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_
 {
   DEBUG_printf ("%s\n", __FUNCTION__);
 
-  assert (state == 7
+  assert (state == 5
 #if ASYNC_EXIT_DATA
-	  || state == 107
+	  || state == 105
 #endif
 	  );
   STATE_OP (state, ++);
@@ -372,9 +372,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in
 {
   DEBUG_printf ("%s\n", __FUNCTION__);
 
-  assert (state == 8
+  assert (state == 6
 #if ASYNC_EXIT_DATA
-	  || state == 108
+	  || state == 106
 #endif
 	  );
   STATE_OP (state, ++);
@@ -458,7 +458,10 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info
   assert (prof_info->device_type == acc_device_type);
   assert (prof_info->device_number == acc_device_num);
   assert (prof_info->thread_id == -1);
-  assert (prof_info->async == /* TODO acc_async */ acc_async_sync);
+  if (acc_device_type == acc_device_host)
+    assert (prof_info->async == acc_async_sync);
+  else
+    assert (prof_info->async == acc_async);
   assert (prof_info->async_queue == prof_info->async);
   assert (prof_info->src_file == NULL);
   assert (prof_info->func_name == NULL);
@@ -499,9 +502,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
     {
       /* Compensate for the missing 'acc_ev_enter_data_end'.  */
       state += 1;
-      /* Compensate for the missing 'acc_ev_enqueue_launch_start' and
-	 'acc_ev_enqueue_launch_end'.  */
-      state += 2;
       /* Compensate for the missing 'acc_ev_exit_data_start' and
 	 'acc_ev_exit_data_end'.  */
       state += 2;
@@ -514,8 +514,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
       state += 2;
     }
 #endif
-  assert (state == 9
-	  || state == 109);
+  assert (state == 7
+	  || state == 107);
   STATE_OP (state, ++);
 
   assert (tool_info != NULL);
@@ -569,17 +569,6 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
 
   assert (acc_device_type != acc_device_host);
 
-  assert (state == 5
-	  || state == 105);
-  STATE_OP (state, ++);
-
-  assert (tool_info != NULL);
-  assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
-  assert (tool_info->nested == NULL);
-  tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info);
-  assert (tool_info->nested != NULL);
-  tool_info->nested->nested = NULL;
-
   assert (prof_info->event_type == acc_ev_enqueue_launch_start);
   assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
   assert (prof_info->version == _ACC_PROF_INFO_VERSION);
@@ -623,13 +612,6 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
   assert (api_info->device_handle == NULL);
   assert (api_info->context_handle == NULL);
   assert (api_info->async_handle == NULL);
-
-  tool_info->nested->event_info.launch_event.event_type = event_info->launch_event.event_type;
-  tool_info->nested->event_info.launch_event.kernel_name = strdup (event_info->launch_event.kernel_name);
-  tool_info->nested->event_info.launch_event.num_gangs = event_info->launch_event.num_gangs;
-  tool_info->nested->event_info.launch_event.num_workers = event_info->launch_event.num_workers;
-  tool_info->nested->event_info.launch_event.vector_length = event_info->launch_event.vector_length;
-  event_info->other_event.tool_info = tool_info->nested;
 }
 
 static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
@@ -638,19 +620,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
 
   assert (acc_device_type != acc_device_host);
 
-  assert (state == 6
-	  || state == 106);
-  STATE_OP (state, ++);
-
-  assert (tool_info != NULL);
-  assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
-  assert (tool_info->nested != NULL);
-  assert (tool_info->nested->event_info.launch_event.event_type == acc_ev_enqueue_launch_start);
-  assert (tool_info->nested->event_info.launch_event.kernel_name != NULL);
-  assert (tool_info->nested->event_info.launch_event.num_gangs >= 1);
-  assert (tool_info->nested->event_info.launch_event.num_workers >= 1);
-  assert (tool_info->nested->event_info.launch_event.vector_length >= 1);
-
   assert (prof_info->event_type == acc_ev_enqueue_launch_end);
   assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES);
   assert (prof_info->version == _ACC_PROF_INFO_VERSION);
@@ -670,12 +639,7 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
   assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES);
   assert (event_info->launch_event.parent_construct == acc_construct_parallel);
   assert (event_info->launch_event.implicit == 1);
-  assert (event_info->launch_event.tool_info == tool_info->nested);
   assert (event_info->launch_event.kernel_name != NULL);
-  assert (strcmp (event_info->launch_event.kernel_name, tool_info->nested->event_info.launch_event.kernel_name) == 0);
-  assert (event_info->launch_event.num_gangs == tool_info->nested->event_info.launch_event.num_gangs);
-  assert (event_info->launch_event.num_workers == tool_info->nested->event_info.launch_event.num_workers);
-  assert (event_info->launch_event.vector_length == tool_info->nested->event_info.launch_event.vector_length);
 
   if (acc_device_type == acc_device_host)
     assert (api_info->device_api == acc_device_api_none);
@@ -689,10 +653,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
   assert (api_info->device_handle == NULL);
   assert (api_info->context_handle == NULL);
   assert (api_info->async_handle == NULL);
-
-  free ((void *) tool_info->nested->event_info.launch_event.kernel_name);
-  free (tool_info->nested);
-  tool_info->nested = NULL;
 }
 
 
@@ -725,7 +685,7 @@ int main()
     }
     assert (state_init == 4);
   }
-  assert (state == 10);
+  assert (state == 8);
 
   STATE_OP (state, = 100);
 
@@ -742,7 +702,7 @@ int main()
 #pragma acc wait
     assert (state_init == 104);
   }
-  assert (state == 110);
+  assert (state == 108);
 
   return 0;
 }


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-06-29 14:36 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-29 14:36 [gcc/devel/omp/gcc-12] OpenACC profiling-interface fixes for asynchronous operations Kwok Yeung

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).