Hi! We had established the use of a boolean flag have_offload in gcc::context to indicate whether during compilation, we've actually seen any code to be offloaded (see cited below the relevant parts of the patch by Ilya et al.). This means that currently, the whole offload machinery will not be run unless we actually have any offloaded data. This means that the configured mkoffload programs (-foffload=[...], defaulting to configure-time --enable-offload-targets=[...]) will not be invoked unless we actually have any offloaded data. This means that we will not actually generate constructor code to call libgomp's GOMP_offload_register unless we actually have any offloaded data. At runtime, in libgomp, we then cannot reliably tell which -foffload=[...] targets have been specified during compilation. But: at runtime, I'd like to know which -foffload=[...] targets have been specified during compilation, so that we can, for example, reliably resort to host fallback execution for -foffload=disable instead of getting error message that an offloaded function is missing. On the other hand, for example, for -foffload=nvptx-none, even if user program code doesn't contain any offloaded data (and thus the offload machinery has not been run), the user program might still contain any executable directives or OpenACC runtime library calls, so we'd still like to use the libgomp nvptx plugin. However, we currently cannot detect this situation. I see two ways to resolve this: a) embed the compile-time -foffload=[...] configuration in the executable (as a string, for example) for libgomp to look that up, or b) make it a requirement that (if configured via -foffload=[...]), the offload machinery is run even if there is not actually any data to be offloaded, so we then reliably get the respective constructor call to libgomp's GOMP_offload_register. I once began to implement a), but this to get a big ugly, so then looked into b) instead. Compared to the status quo, always running the whole offloading machinery for the configured -foffload=[...] targets whenever -fopenacc/-fopenmp are active, certainly does introduce some overhead when there isn't actually any code to be offloaded, so I'm not sure whether that is acceptable? Anyway, please comment on the prototype patch for b) that I'm posting below, after citing the patch that added boolean flag have_offload in gcc::context: On Wed, 5 Nov 2014 15:46:55 +0300, Ilya Verbin wrote: > --- a/gcc/cgraph.c > +++ b/gcc/cgraph.c > @@ -474,6 +475,14 @@ cgraph_node::create (tree decl) > gcc_assert (TREE_CODE (decl) == FUNCTION_DECL); > > node->decl = decl; > + > + if (flag_openmp > + && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))) > + { > + node->offloadable = 1; > + g->have_offload = true; > + } > + > --- a/gcc/cgraphunit.c > +++ b/gcc/cgraphunit.c > @@ -2049,7 +2050,18 @@ ipa_passes (void) > targetm.asm_out.lto_start (); > > if (!in_lto_p) > - ipa_write_summaries (); > + { > + if (g->have_offload) > + { > + section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX; > + ipa_write_summaries (true); > + } > + if (flag_lto) > + { > + section_name_prefix = LTO_SECTION_NAME_PREFIX; > + ipa_write_summaries (false); > + } > + } > > if (flag_generate_lto) > targetm.asm_out.lto_end (); > @@ -2129,8 +2141,12 @@ symbol_table::compile (void) > fprintf (stderr, "Performing interprocedural optimizations\n"); > state = IPA; > > + /* Offloading requires LTO infrastructure. */ > + if (!in_lto_p && g->have_offload) > + flag_generate_lto = 1; > + > /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE. */ > - if (flag_lto) > + if (flag_generate_lto) > lto_streamer_hooks_init (); > > /* Don't run the IPA passes if there was any error or sorry messages. */ > --- a/gcc/context.c > +++ b/gcc/context.c > @@ -30,6 +30,8 @@ gcc::context *g; > > gcc::context::context () > { > + have_offload = false; > + > /* The pass manager's constructor uses the dump manager (to set up > dumps for the various passes), so the dump manager must be set up > before the pass manager. */ > --- a/gcc/context.h > +++ b/gcc/context.h > @@ -33,6 +33,9 @@ class context > public: > context (); > > + /* The flag shows if there are symbols to be streamed for offloading. */ > + bool have_offload; > + > /* Pass-management. */ > > pass_manager *get_passes () { gcc_assert (m_passes); return m_passes; } > --- a/gcc/omp-low.c > +++ b/gcc/omp-low.c > @@ -1933,26 +1944,19 @@ create_omp_child_function (omp_context *ctx, bool task_copy) > + if (is_targetreg_ctx (octx)) > { > - target_p = true; > + cgraph_node::get_create (decl)->offloadable = 1; > + g->have_offload = true; > break; > } > } > --- a/gcc/varpool.c > +++ b/gcc/varpool.c > @@ -155,6 +156,14 @@ varpool_node::get_create (tree decl) > > node = varpool_node::create_empty (); > node->decl = decl; > + > + if (flag_openmp > + && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))) > + { > + node->offloadable = 1; > + g->have_offload = true; > + } > + > node->register_symbol (); > return node; > } Prototype patch for b): --- gcc/cgraph.c +++ gcc/cgraph.c @@ -513,12 +512,7 @@ cgraph_node::create (tree decl) if ((flag_openacc || flag_openmp) && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl))) - { - node->offloadable = 1; -#ifdef ENABLE_OFFLOADING - g->have_offload = true; -#endif - } + node->offloadable = 1; node->register_symbol (); --- gcc/cgraphunit.c +++ gcc/cgraphunit.c @@ -2226,13 +2226,15 @@ ipa_passes (void) if (!in_lto_p) { - if (g->have_offload) +#ifdef ENABLE_OFFLOADING + if (flag_openacc || flag_openmp) { section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX; lto_stream_offload_p = true; ipa_write_summaries (); lto_stream_offload_p = false; } +#endif if (flag_lto) { section_name_prefix = LTO_SECTION_NAME_PREFIX; @@ -2323,9 +2325,11 @@ symbol_table::compile (void) fprintf (stderr, "Performing interprocedural optimizations\n"); state = IPA; +#ifdef ENABLE_OFFLOADING /* Offloading requires LTO infrastructure. */ - if (!in_lto_p && g->have_offload) + if (!in_lto_p && (flag_openacc || flag_openmp)) flag_generate_offload = 1; +#endif /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE. */ if (flag_generate_lto || flag_generate_offload) --- gcc/context.c +++ gcc/context.c @@ -29,8 +29,6 @@ gcc::context *g; gcc::context::context () { - have_offload = false; - /* The pass manager's constructor uses the dump manager (to set up dumps for the various passes), so the dump manager must be set up before the pass manager. */ --- gcc/context.h +++ gcc/context.h @@ -34,9 +34,6 @@ public: context (); ~context (); - /* The flag shows if there are symbols to be streamed for offloading. */ - bool have_offload; - /* Pass-management. */ pass_manager *get_passes () { gcc_assert (m_passes); return m_passes; } --- gcc/lto-cgraph.c +++ gcc/lto-cgraph.c @@ -1122,8 +1122,10 @@ read_string (struct lto_input_block *ib) void output_offload_tables (void) { +#if 0 if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars)) return; +#endif struct lto_simple_output_block *ob = lto_create_simple_output_block (LTO_section_offload_table); --- gcc/omp-low.c +++ gcc/omp-low.c @@ -2288,9 +2287,6 @@ create_omp_child_function (omp_context *ctx, bool task_copy) if (is_gimple_omp_offloaded (octx->stmt)) { cgraph_node::get_create (decl)->offloadable = 1; -#ifdef ENABLE_OFFLOADING - g->have_offload = true; -#endif break; } } --- gcc/varpool.c +++ gcc/varpool.c @@ -149,7 +148,6 @@ make_offloadable_1 (varpool_node *node, tree decl ATTRIBUTE_UNUSED) { node->offloadable = 1; #ifdef ENABLE_OFFLOADING - g->have_offload = true; if (!in_lto_p) vec_safe_push (offload_vars, decl); node->force_output = 1; Grüße, Thomas