From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1643) id DD7F03858D3C; Fri, 10 Mar 2023 11:32:11 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org DD7F03858D3C DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1678447931; bh=3+mQUSOPzth93gUEUD5xFvmTjVp47a4Iegoap9NGY/w=; h=From:To:Subject:Date:From; b=vbc6oCliNmhIU6HUoasG6XpTBZwUP3eN61BS0nOlblmMWlJ68O2SnY6tlY68wE1lF gmJHSoEXJ1afu032BL+4SL/rVqz6MQJaDn8mf5SB6Ae+OeM2yipLuigNEJt9izQDmQ I3GZmpNy+o+A/zerOuudTdfyeFs1xtoJ+cKko3Ro= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Thomas Schwinge To: gcc-cvs@gcc.gnu.org Subject: [gcc/devel/omp/gcc-12] Revert "OpenACC profiling-interface fixes for asynchronous operations" X-Act-Checkin: gcc X-Git-Author: Thomas Schwinge X-Git-Refname: refs/heads/devel/omp/gcc-12 X-Git-Oldrev: 1818bab2ce9f11d8dde5b378f580971b87a5c4ff X-Git-Newrev: b8beaa8447ed3c1637e8f93a08c0e47b5709290f Message-Id: <20230310113211.DD7F03858D3C@sourceware.org> Date: Fri, 10 Mar 2023 11:32:11 +0000 (GMT) List-Id: https://gcc.gnu.org/g:b8beaa8447ed3c1637e8f93a08c0e47b5709290f commit b8beaa8447ed3c1637e8f93a08c0e47b5709290f Author: Thomas Schwinge Date: Thu Mar 2 11:28:24 2023 +0100 Revert "OpenACC profiling-interface fixes for asynchronous operations" There is occasional execution failure; these changes need to be reviewed. This reverts og12 commit 719f93c8618a134f90b5b661ab70c918d659ad05. libgomp/ * oacc-host.c: Revert "OpenACC profiling-interface fixes for asynchronous operations" changes. * oacc-mem.c: Likewise. * oacc-parallel.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. Diff: --- libgomp/ChangeLog.omp | 9 + libgomp/oacc-host.c | 5 +- libgomp/oacc-mem.c | 32 +--- libgomp/oacc-parallel.c | 192 +++++---------------- .../libgomp.oacc-c-c++-common/acc_prof-init-1.c | 5 +- .../acc_prof-parallel-1.c | 64 +++++-- 6 files changed, 113 insertions(+), 194 deletions(-) diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index d55b0503920..0e984754bb0 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,5 +1,14 @@ 2023-03-10 Thomas Schwinge + * oacc-host.c: Revert + "OpenACC profiling-interface fixes for asynchronous operations" + changes. + * oacc-mem.c: Likewise. + * oacc-parallel.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Revert "Revert changes to acc_prof-init-1.c and acc_prof-parallel-1.c" changes. diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index afc99c5a374..94792abe5ce 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -215,9 +215,10 @@ host_openacc_async_dev2host (int ord __attribute__ ((unused)), static void host_openacc_async_queue_callback (struct goacc_asyncqueue *aq __attribute__ ((unused)), - void (*callback_fn)(void *), void *userptr) + void (*callback_fn)(void *) + __attribute__ ((unused)), + void *userptr __attribute__ ((unused))) { - callback_fn (userptr); } static struct goacc_asyncqueue * diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 4b7d306f402..6fb8be98542 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -1431,12 +1431,6 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, gomp_mutex_unlock (&acc_dev->lock); } -struct async_prof_callback_info * -queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq, - acc_prof_info *prof_info, acc_event_info *event_info, - acc_api_info *api_info, - struct async_prof_callback_info *prev_info); - static void goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, @@ -1447,7 +1441,6 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, struct goacc_thread *thr; struct gomp_device_descr *acc_dev; - struct async_prof_callback_info *data_start_info = NULL; goacc_lazy_initialize (); @@ -1503,19 +1496,9 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, api_info.async_handle = NULL; } - goacc_aq aq = get_goacc_asyncqueue (async); - if (profiling_p) - { - if (aq) - data_start_info - = queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - NULL); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); - } + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) @@ -1529,6 +1512,8 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, if (num_waits) goacc_wait (async, num_waits, ap); + goacc_aq aq = get_goacc_asyncqueue (async); + if (data_enter) goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq); else @@ -1540,13 +1525,8 @@ goacc_enter_exit_data_internal (int flags_m, size_t mapnum, void **hostaddrs, prof_info.event_type = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - if (aq) - queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - data_start_info); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); thr->prof_info = NULL; thr->api_info = NULL; diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 81e8eba4225..d66bc882a5f 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -259,62 +259,6 @@ handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, } -struct async_prof_callback_info { - acc_prof_info prof_info; - acc_event_info event_info; - acc_api_info api_info; - struct async_prof_callback_info *start_info; -}; - -static void -async_prof_dispatch (void *ptr) -{ - struct async_prof_callback_info *info - = (struct async_prof_callback_info *) ptr; - - if (info->start_info) - { - /* The TOOL_INFO must be preserved from a start event to the - corresponding end event. Copy that here. */ - void *tool_info = info->start_info->event_info.other_event.tool_info; - info->event_info.other_event.tool_info = tool_info; - } - - goacc_profiling_dispatch (&info->prof_info, &info->event_info, - &info->api_info); - - /* The async_prof_dispatch function is (so far) always used for start/end - profiling event pairs: the start and end parts are queued, then each is - dispatched (or the dispatches might be interleaved before the end part is - queued). - In any case, it's not safe to delete either info structure before the - whole bracketed event is complete. */ - - if (info->start_info) - { - free (info->start_info); - free (info); - } -} - -struct async_prof_callback_info * -queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq, - acc_prof_info *prof_info, acc_event_info *event_info, - acc_api_info *api_info, - struct async_prof_callback_info *prev_info) -{ - struct async_prof_callback_info *info = malloc (sizeof (*info)); - - info->prof_info = *prof_info; - info->event_info = *event_info; - info->api_info = *api_info; - info->start_info = prev_info; - - devicep->openacc.async.queue_callback_func (aq, async_prof_dispatch, - (void *) info); - return info; -} - /* Launch a possibly offloaded function with FLAGS. FN is the host fn address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory blocks to be copied to/from the device. Varadic arguments are @@ -340,8 +284,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), unsigned dims[GOMP_DIM_MAX]; unsigned tag; struct goacc_ncarray_info *nca_info = NULL; - struct async_prof_callback_info *comp_start_info = NULL, - *data_start_info = NULL; #ifdef HAVE_INTTYPES_H gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", @@ -403,8 +345,31 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), api_info.async_handle = NULL; } + if (profiling_p) + goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, + &api_info); + handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); + /* Host fallback if "if" clause is false or if the current device is set to + the host. */ + if (flags & GOACC_FLAG_HOST_FALLBACK) + { + prof_info.device_type = acc_device_host; + api_info.device_type = prof_info.device_type; + goacc_save_and_set_bind (acc_device_host); + fn (hostaddrs); + goacc_restore_bind (); + goto out_prof; + } + else if (acc_device_type (acc_dev->type) == acc_device_host) + { + fn (hostaddrs); + goto out_prof; + } + else if (profiling_p) + api_info.device_api = acc_device_api_cuda; + /* Default: let the runtime choose. */ for (i = 0; i != GOMP_DIM_MAX; i++) dims[i] = 0; @@ -437,12 +402,11 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), if (async == GOMP_LAUNCH_OP_MAX) async = va_arg (ap, unsigned); - /* Set async number in profiling data, unless the device is the - host or we're doing host fallback. */ - if (profiling_p - && !(flags & GOACC_FLAG_HOST_FALLBACK) - && acc_device_type (acc_dev->type) != acc_device_host) - prof_info.async = prof_info.async_queue = async; + if (profiling_p) + { + prof_info.async = async; + prof_info.async_queue = prof_info.async; + } break; } @@ -470,39 +434,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), va_end (ap); - goacc_aq aq = get_goacc_asyncqueue (async); - - if (profiling_p) - { - if (aq) - comp_start_info - = queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &compute_construct_event_info, - &api_info, NULL); - else - goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, - &api_info); - } - - /* Host fallback if "if" clause is false or if the current device is set to - the host. */ - if (flags & GOACC_FLAG_HOST_FALLBACK) - { - prof_info.device_type = acc_device_host; - api_info.device_type = prof_info.device_type; - goacc_save_and_set_bind (acc_device_host); - fn (hostaddrs); - goacc_restore_bind (); - goto out_prof; - } - else if (acc_device_type (acc_dev->type) == acc_device_host) - { - fn (hostaddrs); - goto out_prof; - } - else if (profiling_p) - api_info.device_api = acc_device_api_cuda; - if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) { k.host_start = (uintptr_t) fn; @@ -531,16 +462,12 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), = compute_construct_event_info.other_event.parent_construct; enter_exit_data_event_info.other_event.implicit = 1; enter_exit_data_event_info.other_event.tool_info = NULL; - if (aq) - data_start_info - = queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - NULL); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } + goacc_aq aq = get_goacc_asyncqueue (async); + tgt = gomp_map_vars_openacc (acc_dev, aq, mapnum, hostaddrs, sizes, kinds, nca_info); free (nca_info); @@ -550,13 +477,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), prof_info.event_type = acc_ev_enter_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - if (aq) - queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - data_start_info); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } devaddrs = gomp_alloca (sizeof (void *) * mapnum); @@ -575,14 +497,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), prof_info.event_type = acc_ev_exit_data_start; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; enter_exit_data_event_info.other_event.tool_info = NULL; - if (aq) - data_start_info - = queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - NULL); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } /* If running synchronously (aq == NULL), this will unmap immediately. */ @@ -592,13 +508,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), { prof_info.event_type = acc_ev_exit_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - if (aq) - queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &enter_exit_data_event_info, &api_info, - data_start_info); - else - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } out_prof: @@ -607,13 +518,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), prof_info.event_type = acc_ev_compute_construct_end; compute_construct_event_info.other_event.event_type = prof_info.event_type; - if (aq) - queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &compute_construct_event_info, &api_info, - comp_start_info); - else - goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, - &api_info); + goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, + &api_info); thr->prof_info = NULL; thr->api_info = NULL; @@ -851,8 +757,6 @@ GOACC_update (int flags_m, size_t mapnum, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; - goacc_aq aq = NULL; - struct async_prof_callback_info *update_start_info = NULL; bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); @@ -902,15 +806,7 @@ GOACC_update (int flags_m, size_t mapnum, } if (profiling_p) - { - aq = get_goacc_asyncqueue (async); - if (aq) - update_start_info - = queue_async_prof_dispatch (acc_dev, aq, &prof_info, - &update_event_info, &api_info, NULL); - else - goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); - } + goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) @@ -997,11 +893,7 @@ GOACC_update (int flags_m, size_t mapnum, { prof_info.event_type = acc_ev_update_end; update_event_info.other_event.event_type = prof_info.event_type; - if (aq) - queue_async_prof_dispatch (acc_dev, aq, &prof_info, &update_event_info, - &api_info, update_start_info); - else - goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); + goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); thr->prof_info = NULL; thr->api_info = NULL; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index a33fac7556c..91b373216c9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -172,10 +172,7 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info assert (prof_info->device_type == acc_device_type); assert (prof_info->device_number == acc_device_num); assert (prof_info->thread_id == -1); - if (acc_device_type == acc_device_host) - assert (prof_info->async == acc_async_sync); - else - assert (prof_info->async == acc_async); + assert (prof_info->async == /* TODO acc_async */ acc_async_sync); assert (prof_info->async_queue == prof_info->async); assert (prof_info->src_file == NULL); assert (prof_info->func_name == NULL); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index 663f7f724d5..28a47ccc27d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -316,9 +316,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_ { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 5 + assert (state == 7 #if ASYNC_EXIT_DATA - || state == 105 + || state == 107 #endif ); STATE_OP (state, ++); @@ -372,9 +372,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 6 + assert (state == 8 #if ASYNC_EXIT_DATA - || state == 106 + || state == 108 #endif ); STATE_OP (state, ++); @@ -458,10 +458,7 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info assert (prof_info->device_type == acc_device_type); assert (prof_info->device_number == acc_device_num); assert (prof_info->thread_id == -1); - if (acc_device_type == acc_device_host) - assert (prof_info->async == acc_async_sync); - else - assert (prof_info->async == acc_async); + assert (prof_info->async == /* TODO acc_async */ acc_async_sync); assert (prof_info->async_queue == prof_info->async); assert (prof_info->src_file == NULL); assert (prof_info->func_name == NULL); @@ -502,6 +499,9 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * { /* Compensate for the missing 'acc_ev_enter_data_end'. */ state += 1; + /* Compensate for the missing 'acc_ev_enqueue_launch_start' and + 'acc_ev_enqueue_launch_end'. */ + state += 2; /* Compensate for the missing 'acc_ev_exit_data_start' and 'acc_ev_exit_data_end'. */ state += 2; @@ -514,8 +514,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * state += 2; } #endif - assert (state == 7 - || state == 107); + assert (state == 9 + || state == 109); STATE_OP (state, ++); assert (tool_info != NULL); @@ -569,6 +569,17 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (acc_device_type != acc_device_host); + assert (state == 5 + || state == 105); + STATE_OP (state, ++); + + assert (tool_info != NULL); + assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); + assert (tool_info->nested == NULL); + tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info); + assert (tool_info->nested != NULL); + tool_info->nested->nested = NULL; + assert (prof_info->event_type == acc_ev_enqueue_launch_start); assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); assert (prof_info->version == _ACC_PROF_INFO_VERSION); @@ -612,6 +623,13 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (api_info->device_handle == NULL); assert (api_info->context_handle == NULL); assert (api_info->async_handle == NULL); + + tool_info->nested->event_info.launch_event.event_type = event_info->launch_event.event_type; + tool_info->nested->event_info.launch_event.kernel_name = strdup (event_info->launch_event.kernel_name); + tool_info->nested->event_info.launch_event.num_gangs = event_info->launch_event.num_gangs; + tool_info->nested->event_info.launch_event.num_workers = event_info->launch_event.num_workers; + tool_info->nested->event_info.launch_event.vector_length = event_info->launch_event.vector_length; + event_info->other_event.tool_info = tool_info->nested; } static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) @@ -620,6 +638,19 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (acc_device_type != acc_device_host); + assert (state == 6 + || state == 106); + STATE_OP (state, ++); + + assert (tool_info != NULL); + assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); + assert (tool_info->nested != NULL); + assert (tool_info->nested->event_info.launch_event.event_type == acc_ev_enqueue_launch_start); + assert (tool_info->nested->event_info.launch_event.kernel_name != NULL); + assert (tool_info->nested->event_info.launch_event.num_gangs >= 1); + assert (tool_info->nested->event_info.launch_event.num_workers >= 1); + assert (tool_info->nested->event_info.launch_event.vector_length >= 1); + assert (prof_info->event_type == acc_ev_enqueue_launch_end); assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); assert (prof_info->version == _ACC_PROF_INFO_VERSION); @@ -639,7 +670,12 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES); assert (event_info->launch_event.parent_construct == acc_construct_parallel); assert (event_info->launch_event.implicit == 1); + assert (event_info->launch_event.tool_info == tool_info->nested); assert (event_info->launch_event.kernel_name != NULL); + assert (strcmp (event_info->launch_event.kernel_name, tool_info->nested->event_info.launch_event.kernel_name) == 0); + assert (event_info->launch_event.num_gangs == tool_info->nested->event_info.launch_event.num_gangs); + assert (event_info->launch_event.num_workers == tool_info->nested->event_info.launch_event.num_workers); + assert (event_info->launch_event.vector_length == tool_info->nested->event_info.launch_event.vector_length); if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); @@ -653,6 +689,10 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (api_info->device_handle == NULL); assert (api_info->context_handle == NULL); assert (api_info->async_handle == NULL); + + free ((void *) tool_info->nested->event_info.launch_event.kernel_name); + free (tool_info->nested); + tool_info->nested = NULL; } @@ -685,7 +725,7 @@ int main() } assert (state_init == 4); } - assert (state == 8); + assert (state == 10); STATE_OP (state, = 100); @@ -702,7 +742,7 @@ int main() #pragma acc wait assert (state_init == 104); } - assert (state == 108); + assert (state == 110); return 0; }