Hi!

We had established the use of a boolean flag have_offload in gcc::context
to indicate whether during compilation, we've actually seen any code to
be offloaded (see cited below the relevant parts of the patch by Ilya et
al.).  This means that currently, the whole offload machinery will not be
run unless we actually have any offloaded data.  This means that the
configured mkoffload programs (-foffload=[...], defaulting to
configure-time --enable-offload-targets=[...]) will not be invoked unless
we actually have any offloaded data.  This means that we will not
actually generate constructor code to call libgomp's
GOMP_offload_register unless we actually have any offloaded data.  At
runtime, in libgomp, we then cannot reliably tell which -foffload=[...]
targets have been specified during compilation.

But: at runtime, I'd like to know which -foffload=[...] targets have been
specified during compilation, so that we can, for example, reliably
resort to host fallback execution for -foffload=disable instead of
getting error message that an offloaded function is missing.  On the
other hand, for example, for -foffload=nvptx-none, even if user program
code doesn't contain any offloaded data (and thus the offload machinery
has not been run), the user program might still contain any executable
directives or OpenACC runtime library calls, so we'd still like to use
the libgomp nvptx plugin.  However, we currently cannot detect this
situation.

I see two ways to resolve this: a) embed the compile-time -foffload=[...]
configuration in the executable (as a string, for example) for libgomp to
look that up, or b) make it a requirement that (if configured via
-foffload=[...]), the offload machinery is run even if there is not
actually any data to be offloaded, so we then reliably get the respective
constructor call to libgomp's GOMP_offload_register.  I once began to
implement a), but this to get a big ugly, so then looked into b) instead.
Compared to the status quo, always running the whole offloading machinery
for the configured -foffload=[...] targets whenever -fopenacc/-fopenmp
are active, certainly does introduce some overhead when there isn't
actually any code to be offloaded, so I'm not sure whether that is
acceptable?

Anyway, please comment on the prototype patch for b) that I'm posting
below, after citing the patch that added boolean flag have_offload in
gcc::context:

On Wed, 5 Nov 2014 15:46:55 +0300, Ilya Verbin <iverbin@gmail.com> wrote:
> --- a/gcc/cgraph.c
> +++ b/gcc/cgraph.c
> @@ -474,6 +475,14 @@ cgraph_node::create (tree decl)
>    gcc_assert (TREE_CODE (decl) == FUNCTION_DECL);
>  
>    node->decl = decl;
> +
> +  if (flag_openmp
> +      && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl)))
> +    {
> +      node->offloadable = 1;
> +      g->have_offload = true;
> +    }
> +

> --- a/gcc/cgraphunit.c
> +++ b/gcc/cgraphunit.c
> @@ -2049,7 +2050,18 @@ ipa_passes (void)
>      targetm.asm_out.lto_start ();
>  
>    if (!in_lto_p)
> -    ipa_write_summaries ();
> +    {
> +      if (g->have_offload)
> +	{
> +	  section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX;
> +	  ipa_write_summaries (true);
> +	}
> +      if (flag_lto)
> +	{
> +	  section_name_prefix = LTO_SECTION_NAME_PREFIX;
> +	  ipa_write_summaries (false);
> +	}
> +    }
>  
>    if (flag_generate_lto)
>      targetm.asm_out.lto_end ();
> @@ -2129,8 +2141,12 @@ symbol_table::compile (void)
>      fprintf (stderr, "Performing interprocedural optimizations\n");
>    state = IPA;
>  
> +  /* Offloading requires LTO infrastructure.  */
> +  if (!in_lto_p && g->have_offload)
> +    flag_generate_lto = 1;
> +
>    /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE.  */
> -  if (flag_lto)
> +  if (flag_generate_lto)
>      lto_streamer_hooks_init ();
>  
>    /* Don't run the IPA passes if there was any error or sorry messages.  */

> --- a/gcc/context.c
> +++ b/gcc/context.c
> @@ -30,6 +30,8 @@ gcc::context *g;
>  
>  gcc::context::context ()
>  {
> +  have_offload = false;
> +
>    /* The pass manager's constructor uses the dump manager (to set up
>       dumps for the various passes), so the dump manager must be set up
>       before the pass manager.  */

> --- a/gcc/context.h
> +++ b/gcc/context.h
> @@ -33,6 +33,9 @@ class context
>  public:
>    context ();
>  
> +  /* The flag shows if there are symbols to be streamed for offloading.  */
> +  bool have_offload;
> +
>    /* Pass-management.  */
>  
>    pass_manager *get_passes () { gcc_assert (m_passes); return m_passes; }

> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> @@ -1933,26 +1944,19 @@ create_omp_child_function (omp_context *ctx, bool task_copy)

> +	if (is_targetreg_ctx (octx))
>  	  {
> -	    target_p = true;
> +	    cgraph_node::get_create (decl)->offloadable = 1;
> +	    g->have_offload = true;
>  	    break;
>  	  }
>      }

> --- a/gcc/varpool.c
> +++ b/gcc/varpool.c
> @@ -155,6 +156,14 @@ varpool_node::get_create (tree decl)
>  
>    node = varpool_node::create_empty ();
>    node->decl = decl;
> +
> +  if (flag_openmp
> +      && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl)))
> +    {
> +      node->offloadable = 1;
> +      g->have_offload = true;
> +    }
> +
>    node->register_symbol ();
>    return node;
>  }

Prototype patch for b):

--- gcc/cgraph.c
+++ gcc/cgraph.c
@@ -513,12 +512,7 @@ cgraph_node::create (tree decl)
 
   if ((flag_openacc || flag_openmp)
       && lookup_attribute ("omp declare target", DECL_ATTRIBUTES (decl)))
-    {
-      node->offloadable = 1;
-#ifdef ENABLE_OFFLOADING
-      g->have_offload = true;
-#endif
-    }
+    node->offloadable = 1;
 
   node->register_symbol ();
 
--- gcc/cgraphunit.c
+++ gcc/cgraphunit.c
@@ -2226,13 +2226,15 @@ ipa_passes (void)
 
   if (!in_lto_p)
     {
-      if (g->have_offload)
+#ifdef ENABLE_OFFLOADING
+      if (flag_openacc || flag_openmp)
 	{
 	  section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX;
 	  lto_stream_offload_p = true;
 	  ipa_write_summaries ();
 	  lto_stream_offload_p = false;
 	}
+#endif
       if (flag_lto)
 	{
 	  section_name_prefix = LTO_SECTION_NAME_PREFIX;
@@ -2323,9 +2325,11 @@ symbol_table::compile (void)
     fprintf (stderr, "Performing interprocedural optimizations\n");
   state = IPA;
 
+#ifdef ENABLE_OFFLOADING
   /* Offloading requires LTO infrastructure.  */
-  if (!in_lto_p && g->have_offload)
+  if (!in_lto_p && (flag_openacc || flag_openmp))
     flag_generate_offload = 1;
+#endif
 
   /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE.  */
   if (flag_generate_lto || flag_generate_offload)
--- gcc/context.c
+++ gcc/context.c
@@ -29,8 +29,6 @@ gcc::context *g;
 
 gcc::context::context ()
 {
-  have_offload = false;
-
   /* The pass manager's constructor uses the dump manager (to set up
      dumps for the various passes), so the dump manager must be set up
      before the pass manager.  */
--- gcc/context.h
+++ gcc/context.h
@@ -34,9 +34,6 @@ public:
   context ();
   ~context ();
 
-  /* The flag shows if there are symbols to be streamed for offloading.  */
-  bool have_offload;
-
   /* Pass-management.  */
 
   pass_manager *get_passes () { gcc_assert (m_passes); return m_passes; }
--- gcc/lto-cgraph.c
+++ gcc/lto-cgraph.c
@@ -1122,8 +1122,10 @@ read_string (struct lto_input_block *ib)
 void
 output_offload_tables (void)
 {
+#if 0
   if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
     return;
+#endif
 
   struct lto_simple_output_block *ob
     = lto_create_simple_output_block (LTO_section_offload_table);
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -2288,9 +2287,6 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
 	if (is_gimple_omp_offloaded (octx->stmt))
 	  {
 	    cgraph_node::get_create (decl)->offloadable = 1;
-#ifdef ENABLE_OFFLOADING
-	    g->have_offload = true;
-#endif
 	    break;
 	  }
     }
--- gcc/varpool.c
+++ gcc/varpool.c
@@ -149,7 +148,6 @@ make_offloadable_1 (varpool_node *node, tree decl ATTRIBUTE_UNUSED)
 {
   node->offloadable = 1;
 #ifdef ENABLE_OFFLOADING
-  g->have_offload = true;
   if (!in_lto_p)
     vec_safe_push (offload_vars, decl);
   node->force_output = 1;


Grüße,
 Thomas