From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 113498 invoked by alias); 6 Sep 2019 16:02:35 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 113443 invoked by uid 89); 6 Sep 2019 16:02:35 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-20.9 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,RCVD_IN_DNSWL_NONE,SPF_PASS autolearn=ham version=3.3.1 spammy=5818, agent X-HELO: esa4.mentor.iphmx.com Received: from esa4.mentor.iphmx.com (HELO esa4.mentor.iphmx.com) (68.232.137.252) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Fri, 06 Sep 2019 16:02:32 +0000 IronPort-SDR: w3EkFeZT3KJwFUos63GM29YPQ1px4++XaMxh9WBMUoCYgZTQAQ/Lr5ns3lmtyblR6tSzCiRxB3 0W8D9q0Uy8rtGSSf3E+oelFUzq0/ritV2rEBU25ylSjiynsRgz7sVSmWuSbJmsIpo1Ma9n0z0v sPhktM1dWKmMCTs66r15AoVAGrY95U5N3FfJ/7sfITnUHVhbZ/+vPwKKdG36k2Y4Z6T+XzjhYE 9lqgFN+LPf4LM58uGyZRLq5+l03a7bCgEOI/cyDWOcbOTJV7dvyxlC/eYqspU7tudDOARZhfq8 35o= Received: from orw-gwy-02-in.mentorg.com ([192.94.38.167]) by esa4.mentor.iphmx.com with ESMTP; 06 Sep 2019 08:02:31 -0800 IronPort-SDR: Hh7hNmUrl6dav0zG5Pl5lsVUkhaTc2+feNdbx/8uSGll0EHvpGLAHVSDvt48gTLjsECoaEbeaC 4oi1iyHRGxPL00wY7A69XflwzUwSnjfTkO7weiaqpUOIdVvV8P//A4E4yaNGmFWS8KRiSxiTeP 1O7IsEdCH0Y0HEyV1jB2ixFotaA5uAYWtN40DE8PuEI021vM1oVJCDMB0q7NKzZR4W5VZHg3Hd ZNkjQvuveAja2zZDdJCHDxxDGry+P016b7/906iFtC58R6MnEYsXeoh0k1DbAIKISQHgiJOXzp kK4= From: Julian Brown To: CC: Thomas Schwinge , Andrew Stubbs Subject: [PATCH] [og9] OpenACC profiling support for AMD GCN Date: Fri, 06 Sep 2019 16:02:00 -0000 Message-ID: <20190906160213.69722-3-julian@codesourcery.com> In-Reply-To: <20190906160213.69722-1-julian@codesourcery.com> References: <20190906160213.69722-1-julian@codesourcery.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain Return-Path: julian@codesourcery.com X-IsSubscribed: yes X-SW-Source: 2019-09/txt/msg00413.txt.bz2 This patch adds profiling support to the AMD GCN libgomp plugin, modeled after the equivalent support in the NVPTX plugin. This gives a positive test delta in AMD GCN offload testing. I will apply to the openacc-gcc-9-branch shortly. Julian 2019-09-06 Julian Brown libgomp/ * plugin/plugin-gcn.c (GOMP_OFFLOAD_alloc_by_agent, GOMP_OFFLOAD_free, gcn_exec): Add profiling support. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Add GCN support. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. --- libgomp/ChangeLog.openacc | 9 ++ libgomp/plugin/plugin-gcn.c | 96 +++++++++++++++++++ .../acc_prof-init-1.c | 2 + .../acc_prof-kernels-1.c | 4 + .../acc_prof-parallel-1.c | 12 +++ 5 files changed, 123 insertions(+) diff --git a/libgomp/ChangeLog.openacc b/libgomp/ChangeLog.openacc index d7a4c7a5f8a..8ed0a10a589 100644 --- a/libgomp/ChangeLog.openacc +++ b/libgomp/ChangeLog.openacc @@ -1,3 +1,12 @@ +2019-09-06 Julian Brown + + * plugin/plugin-gcn.c (GOMP_OFFLOAD_alloc_by_agent, + GOMP_OFFLOAD_free, gcn_exec): Add profiling support. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Add GCN + support. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. + 2019-09-06 Julian Brown * config/gcn/target.c (omp_pause_resource, omp_pause_resource_all): New diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index f0b22ebc3d7..2f273967bad 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -3024,6 +3024,35 @@ GOMP_OFFLOAD_alloc_by_agent (struct agent_info *agent, size_t size) return NULL; } + struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread (); + bool profiling_dispatch_p + = __builtin_expect (thr != NULL && thr->prof_info != NULL, false); + if (profiling_dispatch_p) + { + acc_prof_info *prof_info = thr->prof_info; + acc_event_info data_event_info; + acc_api_info *api_info = thr->api_info; + + prof_info->event_type = acc_ev_alloc; + + data_event_info.data_event.event_type = prof_info->event_type; + data_event_info.data_event.valid_bytes + = _ACC_DATA_EVENT_INFO_VALID_BYTES; + data_event_info.data_event.parent_construct + = acc_construct_parallel; + data_event_info.data_event.implicit = 1; + data_event_info.data_event.tool_info = NULL; + data_event_info.data_event.var_name = NULL; + data_event_info.data_event.bytes = size; + data_event_info.data_event.host_ptr = NULL; + data_event_info.data_event.device_ptr = (void *) ptr; + + api_info->device_api = acc_device_api_other; + + GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info, + api_info); + } + return ptr; } @@ -3046,6 +3075,35 @@ GOMP_OFFLOAD_free (int device, void *ptr) return false; } + struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread (); + bool profiling_dispatch_p + = __builtin_expect (thr != NULL && thr->prof_info != NULL, false); + if (profiling_dispatch_p) + { + acc_prof_info *prof_info = thr->prof_info; + acc_event_info data_event_info; + acc_api_info *api_info = thr->api_info; + + prof_info->event_type = acc_ev_free; + + data_event_info.data_event.event_type = prof_info->event_type; + data_event_info.data_event.valid_bytes + = _ACC_DATA_EVENT_INFO_VALID_BYTES; + data_event_info.data_event.parent_construct + = acc_construct_parallel; + data_event_info.data_event.implicit = 1; + data_event_info.data_event.tool_info = NULL; + data_event_info.data_event.var_name = NULL; + data_event_info.data_event.bytes = 0; + data_event_info.data_event.host_ptr = NULL; + data_event_info.data_event.device_ptr = (void *) ptr; + + api_info->device_api = acc_device_api_other; + + GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info, + api_info); + } + return true; } @@ -3276,6 +3334,35 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs, {1, 64, 16} }; + struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread (); + acc_prof_info *prof_info = thr->prof_info; + acc_event_info enqueue_launch_event_info; + acc_api_info *api_info = thr->api_info; + bool profiling_dispatch_p = __builtin_expect (prof_info != NULL, false); + if (profiling_dispatch_p) + { + prof_info->event_type = acc_ev_enqueue_launch_start; + + enqueue_launch_event_info.launch_event.event_type + = prof_info->event_type; + enqueue_launch_event_info.launch_event.valid_bytes + = _ACC_LAUNCH_EVENT_INFO_VALID_BYTES; + enqueue_launch_event_info.launch_event.parent_construct + = acc_construct_parallel; + enqueue_launch_event_info.launch_event.implicit = 1; + enqueue_launch_event_info.launch_event.tool_info = NULL; + enqueue_launch_event_info.launch_event.kernel_name + = (char *) kernel->name; + enqueue_launch_event_info.launch_event.num_gangs = kla.gdims[0]; + enqueue_launch_event_info.launch_event.num_workers = kla.gdims[2]; + enqueue_launch_event_info.launch_event.vector_length = kla.gdims[1]; + + api_info->device_api = acc_device_api_other; + + GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, + &enqueue_launch_event_info, api_info); + } + if (!async) { run_kernel (kernel, ind_da, &kla, NULL, false); @@ -3289,6 +3376,15 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs, aq->agent->device_id, aq->id, ind_da); queue_push_callback (aq, gomp_offload_free, ind_da); } + + if (profiling_dispatch_p) + { + prof_info->event_type = acc_ev_enqueue_launch_end; + enqueue_launch_event_info.launch_event.event_type = prof_info->event_type; + GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, + &enqueue_launch_event_info, + api_info); + } } void diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index 6a44e8ffb6a..cf980f1baec 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -224,6 +224,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); + else if (acc_device_type == acc_device_gcn) + assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c index 269b4398478..9c1cfbe292c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c @@ -107,6 +107,8 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (event_info->launch_event.vector_length >= 1); else if (acc_device_type == acc_device_nvidia) /* ... is special. */ assert (event_info->launch_event.vector_length == 32); + else if (acc_device_type == acc_device_gcn) /* ...and so is this. */ + assert (event_info->launch_event.vector_length == 64); else { #ifdef __OPTIMIZE__ @@ -119,6 +121,8 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); + else if (acc_device_type == acc_device_gcn) + assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index 116b9b538a6..5d392511592 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -265,6 +265,8 @@ static void cb_enter_data_end (acc_prof_info *prof_info, acc_event_info *event_i if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); + else if (acc_device_type == acc_device_gcn) + assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); @@ -319,6 +321,8 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_ if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); + else if (acc_device_type == acc_device_gcn) + assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); @@ -371,6 +375,8 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); + else if (acc_device_type == acc_device_gcn) + assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); @@ -510,6 +516,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); + else if (acc_device_type == acc_device_gcn) + assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); @@ -573,6 +581,8 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); + else if (acc_device_type == acc_device_gcn) + assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); @@ -637,6 +647,8 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); + else if (acc_device_type == acc_device_gcn) + assert (api_info->device_api == acc_device_api_other); else assert (api_info->device_api == acc_device_api_cuda); assert (api_info->valid_bytes == _ACC_API_INFO_VALID_BYTES); -- 2.22.0