libgomp/gcn: Prepare for reverse-offload callback handling libgomp/ChangeLog: * config/gcn/libgomp-gcn.h: New file. * config/gcn/target.c: Include it. (GOMP_ADDITIONAL_ICVS): Declare as extern var. (GOMP_target_ext): Handle reverse offload. * plugin/plugin-gcn.c (struct kernargs): Add 'int64_t rev_ptr' as 6th argument and 'struct rev_offload rev_data'. (struct agent_info): Add has_reverse_offload; move prog_finalized up to reduce padding. (create_kernel_dispatch): Init kernargs' rev_ptr and rev_data. (reverse_offload): New. (run_kernel): Call it. (GOMP_OFFLOAD_init_device, GOMP_OFFLOAD_load_image): Set has_reverse_offload. libgomp/config/gcn/libgomp-gcn.h | 50 +++++++++++++++++++++++++++++++++++++ libgomp/config/gcn/target.c | 35 ++++++++++++++++++++------ libgomp/plugin/plugin-gcn.c | 54 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 129 insertions(+), 10 deletions(-) diff --git a/libgomp/config/gcn/libgomp-gcn.h b/libgomp/config/gcn/libgomp-gcn.h new file mode 100644 index 00000000000..884f0094d05 --- /dev/null +++ b/libgomp/config/gcn/libgomp-gcn.h @@ -0,0 +1,50 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by Tobias Burnus . + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file contains defines and type definitions shared between the + nvptx target's libgomp.a and the plugin-nvptx.c, but that is only + needef for this target. */ + +#ifndef LIBGOMP_GCN_H +#define LIBGOMP_GCN_H 1 + + +struct rev_offload { + uint64_t fn; + uint64_t mapnum; + uint64_t addrs; + uint64_t sizes; + uint64_t kinds; + int32_t dev_num; + uint32_t lock; +}; + +#if (__SIZEOF_SHORT__ != 2 \ + || __SIZEOF_SIZE_T__ != 8 \ + || __SIZEOF_POINTER__ != 8) +#error "Data-type conversion required for rev_offload" +#endif + +#endif /* LIBGOMP_GCN_H */ diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c index c8484fa18d9..ecbf3f337d0 100644 --- a/libgomp/config/gcn/target.c +++ b/libgomp/config/gcn/target.c @@ -24,8 +24,11 @@ . */ #include "libgomp.h" +#include "libgomp-gcn.h" #include +extern volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS; + bool GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper, unsigned int thread_limit, bool first) @@ -75,16 +78,34 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, unsigned int flags, void **depend, void **args) { - (void) device; - (void) fn; - (void) mapnum; - (void) hostaddrs; - (void) sizes; - (void) kinds; + struct rev_offload *rev; + (void) flags; (void) depend; (void) args; - __builtin_unreachable (); + + if (device != GOMP_DEVICE_HOST_FALLBACK || fn == NULL) + return; + + register void **kernargs asm ("s8"); + rev = (struct rev_offload *) kernargs[5]; + + while (__sync_lock_test_and_set (&rev->lock, (uint8_t) 1)) + /* spin */ ; + + rev->mapnum = mapnum; + rev->addrs = hostaddrs; + rev->sizes = sizes; + rev->kinds = kinds; + rev->dev_num = GOMP_ADDITIONAL_ICVS.device_num; + + /* 'fn' must be last. */ + __atomic_store_n (&rev->fn, fn, __ATOMIC_RELEASE); + + /* Processed on the host - when done, fn is set to NULL. */ + while (__atomic_load_n (&rev->fn, __ATOMIC_ACQUIRE) != 0) + /* spin */ ; + __sync_lock_release (&rev->lock); } void diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 04b122f2a09..ebc9a55eb13 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -42,6 +42,7 @@ #include #include #include "libgomp-plugin.h" +#include "config/gcn/libgomp-gcn.h" #include "gomp-constants.h" #include #include "oacc-plugin.h" @@ -251,6 +252,9 @@ struct kernargs { Only needed for OpenMP. */ int64_t arena_ptr; + /* A pointer to reverse-offload. */ + int64_t rev_ptr; + /* Output data. */ struct output { int return_value; @@ -267,6 +271,8 @@ struct kernargs { } queue[1024]; unsigned int consumed; } output_data; + + struct rev_offload rev_data; }; /* A queue entry for a future asynchronous launch. */ @@ -422,6 +428,12 @@ struct agent_info if it has been. */ bool initialized; + /* Flag whether the HSA program that consists of all the modules has been + finalized. */ + bool prog_finalized; + /* Flag whether the HSA OpenMP's requires_reverse_offload has been used. */ + bool has_reverse_offload; + /* The instruction set architecture of the device. */ gcn_isa device_isa; /* Name of the agent. */ @@ -456,9 +468,6 @@ struct agent_info thread should have locked agent->module_rwlock for reading before acquiring it. */ pthread_mutex_t prog_mutex; - /* Flag whether the HSA program that consists of all the modules has been - finalized. */ - bool prog_finalized; /* HSA executable - the finalized program that is used to locate kernels. */ hsa_executable_t executable; }; @@ -1915,6 +1924,9 @@ create_kernel_dispatch (struct kernel_info *kernel, int num_teams) i++) kernargs->output_data.queue[i].written = 0; kernargs->output_data.consumed = 0; + kernargs->rev_ptr = (int64_t) &kernargs->rev_data; + kernargs->rev_data.lock = 0; + kernargs->rev_data.fn = 0; /* Pass in the heap location. */ kernargs->heap_ptr = (int64_t)kernel->module->heap; @@ -1931,6 +1943,36 @@ create_kernel_dispatch (struct kernel_info *kernel, int num_teams) return shadow; } +#if 1 +/* This is part of the patch: + "libgomp/nvptx: Prepare for reverse-offload callback handling" */ +static void GOMP_PLUGIN_target_rev (uint64_t, uint64_t, uint64_t, uint64_t, + uint64_t, int, + void (*) (void *, const void *, size_t, + void *), + void (*) (void *, const void *, size_t, + void *), void *) +{ +} +#endif + +static void +reverse_offload (struct kernargs *kernargs) +{ + uint64_t fn_ptr = __atomic_load_n (&kernargs->rev_data.fn, __ATOMIC_ACQUIRE); + if (fn_ptr == 0) + return; + + uint64_t mapnum = kernargs->rev_data.mapnum; + uint64_t addr_ptr = kernargs->rev_data.addrs; + uint64_t sizes_ptr = kernargs->rev_data.sizes; + uint64_t kinds_ptr = kernargs->rev_data.kinds; + int dev_num = (int) kernargs->rev_data.dev_num; + GOMP_PLUGIN_target_rev (fn_ptr, mapnum, addr_ptr, sizes_ptr, kinds_ptr, + dev_num, NULL, NULL); + __atomic_store_n (&kernargs->rev_data.fn, 0, __ATOMIC_RELEASE); +} + /* Output any data written to console output from the kernel. It is expected that this function is polled during kernel execution. @@ -2263,11 +2305,15 @@ run_kernel (struct kernel_info *kernel, void *vars, GCN_DEBUG ("Kernel dispatched, waiting for completion\n"); + bool has_reverse_offload = kernel->agent->has_reverse_offload; + /* Root signal waits with 1ms timeout. */ while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1, 1000 * 1000, HSA_WAIT_STATE_BLOCKED) != 0) { + if (has_reverse_offload) + reverse_offload (shadow->kernarg_address); console_output (kernel, shadow->kernarg_address, false); } console_output (kernel, shadow->kernarg_address, true); @@ -3340,6 +3386,7 @@ GOMP_OFFLOAD_init_device (int n) GCN_DEBUG ("GCN agent %d initialized\n", n); + agent->has_reverse_offload = false; agent->initialized = true; return true; } @@ -3547,6 +3594,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, GOMP_OFFLOAD_dev2host (agent->device_id, *rev_fn_table, (void*) fn_table_addr, kernel_count * sizeof (uint64_t)); + agent->has_reverse_offload = true; } return kernel_count + var_count + other_count;