GCN: libgomp+mkoffload.cc: Prepare for reverse offload fn lookup Add support to GCN for reverse lookup of function name to prepare for 'omp target device(ancestor:1)'. gcc/ChangeLog: * config/gcn/mkoffload.cc (process_asm): Create .offload_func_table, similar to pre-existing .offload_var_table. libgomp/ChangeLog: * plugin/plugin-gcn.c (GOMP_OFFLOAD_load_image): Read .offload_func_table to populate rev_fn_table when requested. gcc/config/gcn/mkoffload.cc | 11 ++++++++++- libgomp/plugin/plugin-gcn.c | 26 +++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index 4206448703a..24d327355e3 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -537,63 +537,72 @@ process_asm (FILE *in, FILE *out, FILE *cfile) case IN_VARS: { char *varname; unsigned varsize; if (sscanf (buf, " .8byte %ms\n", &varname)) { fputs (buf, out); fgets (buf, sizeof (buf), in); if (!sscanf (buf, " .8byte %u\n", &varsize)) abort (); var_count++; } break; } case IN_FUNCS: { char *funcname; if (sscanf (buf, "\t.8byte\t%ms\n", &funcname)) { + fputs (buf, out); obstack_ptr_grow (&fns_os, funcname); fn_count++; continue; } break; } } char dummy; if (sscanf (buf, " .section .gnu.offload_vars%c", &dummy) > 0) { state = IN_VARS; /* Add a global symbol to allow plugin-gcn.c to locate the table at runtime. It can't use the "offload_var_table.N" emitted by the compiler because a) they're not global, and b) there's one for each input file combined into the binary. */ fputs (buf, out); fputs ("\t.global .offload_var_table\n" "\t.type .offload_var_table, @object\n" ".offload_var_table:\n", out); } else if (sscanf (buf, " .section .gnu.offload_funcs%c", &dummy) > 0) - state = IN_FUNCS; + { + state = IN_FUNCS; + /* Likewise for .gnu.offload_vars; used for reverse offload. */ + fputs (buf, out); + fputs ("\t.global .offload_func_table\n" + "\t.type .offload_func_table, @object\n" + ".offload_func_table:\n", + out); + } else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0) { state = IN_METADATA; regcount.kernel_name = NULL; regcount.sgpr_count = regcount.vgpr_count = -1; } else if (sscanf (buf, " .section %c", &dummy) > 0 || sscanf (buf, " .text%c", &dummy) > 0 || sscanf (buf, " .bss%c", &dummy) > 0 || sscanf (buf, " .data%c", &dummy) > 0 || sscanf (buf, " .ident %c", &dummy) > 0) state = IN_CODE; else if (sscanf (buf, " .end_amdgpu_metadata%c", &dummy) > 0) { state = IN_CODE; gcc_assert (regcount.kernel_name != NULL && regcount.sgpr_count >= 0 && regcount.vgpr_count >= 0); obstack_grow (®counts_os, ®count, sizeof (regcount)); diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 363e2950649..00603981866 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -3353,7 +3353,7 @@ GOMP_OFFLOAD_init_device (int n) int GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, struct addr_pair **target_table, - uint64_t **rev_fn_table __attribute__((unused))) + uint64_t **rev_fn_table) { if (GOMP_VERSION_DEV (version) != GOMP_VERSION_GCN) { @@ -3520,6 +3520,30 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, if (module->fini_array_func) kernel_count--; + if (rev_fn_table != NULL && kernel_count == 0) + *rev_fn_table = NULL; + else if (rev_fn_table != NULL) + { + hsa_status_t status; + hsa_executable_symbol_t var_symbol; + status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, + ".offload_func_table", + agent->id, 0, &var_symbol); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not find symbol for variable in the code object", + status); + uint64_t fn_table_addr; + status = hsa_fns.hsa_executable_symbol_get_info_fn + (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, + &fn_table_addr); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a variable from its symbol", status); + *rev_fn_table = GOMP_PLUGIN_malloc (kernel_count * sizeof (uint64_t)); + GOMP_OFFLOAD_dev2host (agent->device_id, *rev_fn_table, + (void*) fn_table_addr, + kernel_count * sizeof (uint64_t)); + } + return kernel_count + var_count + other_count; }