libgomp/gcn: Prepare for reverse-offload callback handling libgomp/ChangeLog: * config/gcn/libgomp-gcn.h: New file; contains struct output, declared previously in plugin-gcn.c. * config/gcn/target.c: Include it. (GOMP_ADDITIONAL_ICVS): Declare as extern var. (GOMP_target_ext): Handle reverse offload. * plugin/plugin-gcn.c: Include libgomp-gcn.h. (struct kernargs): Replace struct def by the one from libgomp-gcn.h for output_data. (process_reverse_offload): New. (console_output): Call it. libgomp/config/gcn/libgomp-gcn.h | 61 ++++++++++++++++++++++++++++++++++++++++ libgomp/config/gcn/target.c | 44 ++++++++++++++++++++++++----- libgomp/plugin/plugin-gcn.c | 34 ++++++++++++---------- 3 files changed, 117 insertions(+), 22 deletions(-) diff --git a/libgomp/config/gcn/libgomp-gcn.h b/libgomp/config/gcn/libgomp-gcn.h new file mode 100644 index 00000000000..91560be787f --- /dev/null +++ b/libgomp/config/gcn/libgomp-gcn.h @@ -0,0 +1,61 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + Contributed by Tobias Burnus . + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file contains defines and type definitions shared between the + nvptx target's libgomp.a and the plugin-nvptx.c, but that is only + needef for this target. */ + +#ifndef LIBGOMP_GCN_H +#define LIBGOMP_GCN_H 1 + +/* This struct is also used in Newlib's libc/sys/amdgcn/write.c. */ +struct output +{ + int return_value; + unsigned int next_output; + struct printf_data { + int written; + union { + char msg[128]; + uint64_t msg_u64[2]; + }; + int type; + union { + int64_t ivalue; + double dvalue; + char text[128]; + uint64_t value_u64[2]; + }; + } queue[1024]; + unsigned int consumed; +}; + +#if (__SIZEOF_SHORT__ != 2 \ + || __SIZEOF_SIZE_T__ != 8 \ + || __SIZEOF_POINTER__ != 8) +#error "Data-type conversion required for rev_offload" +#endif + +#endif /* LIBGOMP_GCN_H */ diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c index c8484fa18d9..f5a4bf64655 100644 --- a/libgomp/config/gcn/target.c +++ b/libgomp/config/gcn/target.c @@ -24,8 +24,11 @@ . */ #include "libgomp.h" +#include "libgomp-gcn.h" #include +extern volatile struct gomp_offload_icvs GOMP_ADDITIONAL_ICVS; + bool GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper, unsigned int thread_limit, bool first) @@ -75,16 +78,43 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, unsigned int flags, void **depend, void **args) { - (void) device; - (void) fn; - (void) mapnum; - (void) hostaddrs; - (void) sizes; - (void) kinds; (void) flags; (void) depend; (void) args; - __builtin_unreachable (); + + if (device != GOMP_DEVICE_HOST_FALLBACK || fn == NULL) + return; + + /* The output data is at ((void*) kernargs)[2]. */ + register void **kernargs asm("s8"); + struct output *data = (struct output *) kernargs[2]; + /* Reserve one slot. */ + unsigned int index = __atomic_fetch_add (&data->next_output, 1, + __ATOMIC_ACQUIRE); + + if ((unsigned int) (index + 1) < data->consumed) + abort (); /* Overflow. */ + + /* Spinlock while the host catches up. */ + if (index >= 1024) + while (__atomic_load_n (&data->consumed, __ATOMIC_ACQUIRE) + <= (index - 1024)) + asm ("s_sleep 64"); + + unsigned int slot = index % 1024; + uint64_t addrs_sizes_kind[3] = {(uint64_t) hostaddrs, (uint64_t) sizes, + (uint64_t) kinds}; + data->queue[slot].msg_u64[0] = (uint64_t) fn; + data->queue[slot].msg_u64[1] = (uint64_t) mapnum; + data->queue[slot].value_u64[0] = (uint64_t) &addrs_sizes_kind[0]; + data->queue[slot].value_u64[1] = (uint64_t) GOMP_ADDITIONAL_ICVS.device_num; + + data->queue[slot].type = 4; /* Reverse offload. */ + __atomic_store_n (&data->queue[slot].written, 1, __ATOMIC_RELEASE); + + /* Spinlock while the host catches up. */ + while (__atomic_load_n (&data->queue[slot].written, __ATOMIC_ACQUIRE) != 0) + asm ("s_sleep 64"); } void diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 04b122f2a09..ffe5cf5af2c 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -42,6 +42,7 @@ #include #include #include "libgomp-plugin.h" +#include "config/gcn/libgomp-gcn.h" /* For struct output. */ #include "gomp-constants.h" #include #include "oacc-plugin.h" @@ -252,21 +253,7 @@ struct kernargs { int64_t arena_ptr; /* Output data. */ - struct output { - int return_value; - unsigned int next_output; - struct printf_data { - int written; - char msg[128]; - int type; - union { - int64_t ivalue; - double dvalue; - char text[128]; - }; - } queue[1024]; - unsigned int consumed; - } output_data; + struct output output_data; }; /* A queue entry for a future asynchronous launch. */ @@ -1931,6 +1918,19 @@ create_kernel_dispatch (struct kernel_info *kernel, int num_teams) return shadow; } +static void +process_reverse_offload (uint64_t fn, uint64_t mapnum, uint64_t rev_data, + uint64_t dev_num64) +{ + int dev_num = dev_num64; + uint64_t addrs_sizes_kinds[3]; + GOMP_OFFLOAD_host2dev (dev_num, &addrs_sizes_kinds, (void *) rev_data, + sizeof (addrs_sizes_kinds)); + GOMP_PLUGIN_target_rev (fn, mapnum, addrs_sizes_kinds[0], + addrs_sizes_kinds[1], addrs_sizes_kinds[2], + dev_num, NULL, NULL, NULL); +} + /* Output any data written to console output from the kernel. It is expected that this function is polled during kernel execution. @@ -1975,6 +1975,10 @@ console_output (struct kernel_info *kernel, struct kernargs *kernargs, case 1: printf ("%.128s%f\n", data->msg, data->dvalue); break; case 2: printf ("%.128s%.128s\n", data->msg, data->text); break; case 3: printf ("%.128s%.128s", data->msg, data->text); break; + case 4: + process_reverse_offload (data->msg_u64[0], data->msg_u64[1], + data->value_u64[0],data->value_u64[1]); + break; default: printf ("GCN print buffer error!\n"); break; } data->written = 0;