public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Jakub Jelinek <jakub@redhat.com>
To: Tobias Burnus <tobias@codesourcery.com>
Cc: gcc-patches <gcc-patches@gcc.gnu.org>
Subject: Re: [Patch][v5] OpenMP: Move omp requires checks to libgomp
Date: Fri, 1 Jul 2022 16:34:07 +0200	[thread overview]
Message-ID: <Yr8F36AD8gtcGyMl@tucnak> (raw)
In-Reply-To: <16ca2aa4-7e73-cf9d-9482-dd59f5b0cdae@codesourcery.com>

On Fri, Jul 01, 2022 at 03:06:05PM +0200, Tobias Burnus wrote:
> --- a/gcc/fortran/parse.cc
> +++ b/gcc/fortran/parse.cc
> @@ -1168,7 +1168,8 @@ decode_omp_directive (void)
>      }
>    switch (ret)
>      {
> -    case ST_OMP_DECLARE_TARGET:
> +    /* Set omp_target_seen; exclude ST_OMP_DECLARE_TARGET.
> +       FIXME: Get clarification, cf. OpenMP Spec Issue #3240.  */
>      case ST_OMP_TARGET:
>      case ST_OMP_TARGET_DATA:
>      case ST_OMP_TARGET_ENTER_DATA:
> @@ -6879,11 +6880,14 @@ done:
>  
>    /* Fixup for external procedures and resolve 'omp requires'.  */
>    int omp_requires;
> +  bool omp_target_seen;
>    omp_requires = 0;
> +  omp_target_seen = false;
>    for (gfc_current_ns = gfc_global_ns_list; gfc_current_ns;
>         gfc_current_ns = gfc_current_ns->sibling)
>      {
>        omp_requires |= gfc_current_ns->omp_requires;
> +      omp_target_seen |= gfc_current_ns->omp_target_seen;
>        gfc_check_externals (gfc_current_ns);
>      }
>    for (gfc_current_ns = gfc_global_ns_list; gfc_current_ns;
> @@ -6908,6 +6912,22 @@ done:
>        break;
>      }
>  
> +  if (omp_target_seen)
> +    omp_requires_mask = (enum omp_requires) (omp_requires_mask
> +					     | OMP_REQUIRES_TARGET_USED);
> +  if (omp_requires & OMP_REQ_REVERSE_OFFLOAD)
> +    omp_requires_mask = (enum omp_requires) (omp_requires_mask
> +					     | OMP_REQUIRES_REVERSE_OFFLOAD);
> +  if (omp_requires & OMP_REQ_UNIFIED_ADDRESS)
> +    omp_requires_mask = (enum omp_requires) (omp_requires_mask
> +					     | OMP_REQUIRES_UNIFIED_ADDRESS);
> +  if (omp_requires & OMP_REQ_UNIFIED_SHARED_MEMORY)
> +    omp_requires_mask
> +	  = (enum omp_requires) (omp_requires_mask
> +				 | OMP_REQUIRES_UNIFIED_SHARED_MEMORY);
> +  if (omp_requires & OMP_REQ_DYNAMIC_ALLOCATORS)
> +    omp_requires_mask = (enum omp_requires) (omp_requires_mask
> +					     | OMP_REQUIRES_DYNAMIC_ALLOCATORS);
>    /* Do the parse tree dump.  */
>    gfc_current_ns = flag_dump_fortran_original ? gfc_global_ns_list : NULL;

Will Fortran diagnose:
subroutine foo
!$omp requires unified_shared_memory
!$omp target
!$omp end target
end subroutine foo
subroutine bar
!$omp requires reverse_offload
!$omp target
!$omp end target
end subroutine bar

or just merge it from the different namespaces?
This is something that can be handled separately if it isn't resolved
and might need clarification from omp-lang.

> @@ -1764,6 +1781,20 @@ input_symtab (void)
>      }
>  }
>  
> +static void
> +omp_requires_to_name (char *buf, size_t size, unsigned int requires_mask)
> +{
> +  char *end = buf + size, *p = buf;
> +  if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
> +    p += snprintf (p, end - p, "unified_address");
> +  if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
> +    p += snprintf (p, end - p, "%sunified_shared_memory",
> +		   (p == buf ? "" : ", "));
> +  if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
> +    p += snprintf (p, end - p, "%sreverse_offload",
> +		   (p == buf ? "" : ", "));

So, what does this print if requires_mask is 0 (or just the target used bit
set but not unified_address, unified_shared_memory nor reverse_offload)?
Say in case of:
a.c
#pragma omp requires unified_address
void foo (void) {
#pragma omp target
;
}
b.c:
void bar (void) {
#pragma omp target
;
}
gcc -fopenmp -shared -o a.so a.c b.c
?

> @@ -1810,6 +1847,54 @@ input_offload_tables (bool do_force_output)
>  		 may be no refs to var_decl in offload LTO mode.  */
>  	      if (do_force_output)
>  		varpool_node::get (var_decl)->force_output = 1;
> +	      tmp_decl = var_decl;
> +	    }
> +	  else if (tag == LTO_symtab_edge)
> +	    {
> +	      static bool error_emitted = false;
> +	      HOST_WIDE_INT val = streamer_read_hwi (ib);
> +
> +	      if (omp_requires_mask == 0)
> +		{
> +		  omp_requires_mask = (omp_requires) val;
> +		  requires_decl = tmp_decl;
> +		  requires_fn = file_data->file_name;

And similarly here, if some device construct is seen but requires
directive isn't, not sure if in this version val would be 0 or something
with the TARGET_USED bit set.  In the latter case, only what is printed
for no requires or just atomic related requires is a problem, in the former
case due to the == 0 check mixing of 0 with non-zero would be ignored
but mixing of non-zero with 0 wouldn't be.

> +		}
> +	      else if (omp_requires_mask != val && !error_emitted)
> +		{
> +		  char buf[64], buf2[64];

Perhaps cleaner would be to size the buffers as
sizeof ("unified_address,unified_shared_memory,reverse_offload")
64 is more, but just a wild guess and if further clauses are added later,
it might be too small.

> +                (p == buf ? "" : ", "));
> +  if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
> +    p += snprintf (p, end - p, "%sreverse_offload
> +		  omp_requires_to_name (buf, sizeof (buf), omp_requires_mask);
> +		  omp_requires_to_name (buf2, sizeof (buf2), val);
> +		  error ("OpenMP %<requires%> directive with non-identical "
> +			 "clauses in multiple compilation units: %qs vs. %qs",
> +			 buf, buf2);

> @@ -1821,6 +1906,18 @@ input_offload_tables (bool do_force_output)
>        lto_destroy_simple_input_block (file_data, LTO_section_offload_table,
>  				      ib, data, len);
>      }
> +#ifdef ACCEL_COMPILER
> +  char *omp_requires_file = getenv ("GCC_OFFLOAD_OMP_REQUIRES_FILE");
> +  if (omp_requires_file == NULL || omp_requires_file[0] == '\0')
> +    fatal_error (input_location, "GCC_OFFLOAD_OMP_REQUIRES_FILE unset");
> +  FILE *f = fopen (omp_requires_file, "wb");
> +  if (!f)
> +    fatal_error (input_location, "Cannot open omp_requires file %qs",
> +		 omp_requires_file);
> +  uint32_t req_mask = omp_requires_mask & ~OMP_REQUIRES_TARGET_USED;

Perhaps it is better to also store the TARGET_USED bit and on the library
side completely ignore values of 0.

> --- a/gcc/omp-low.cc
> +++ b/gcc/omp-low.cc
> @@ -12701,6 +12701,11 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>        gcc_unreachable ();
>      }
>  
> +  /* Ensure that requires map is written via output_offload_tables, even if only
> +     'target (enter/exit) data' is used in the translation unit.  */
> +  if (ENABLE_OFFLOADING && (omp_requires_mask & OMP_REQUIRES_TARGET_USED))
> +    g->have_offload = true;

Is
c.c:
#pragma omp requires unified_shared_memory
d.c:
void baz (void) {
  #pragma omp target
  ;
}
ok?  Pedantically reading current standard probably yes, but perhaps again
something to be discussed.  The question is what the requires directive
in that case would do, nothing at all as there are no device constructs
etc.?  In that case omp_requires_mask & OMP_REQUIRES_TARGET_USED is right.
But if it should influence the behavior anyway, the restriction should be
Either all compilation units of a program that contain ... device
constructs ... should include also requires directive with one of the
unified_shared_memory, unified_address or reverse_offload clauses.
In that case the test would be
omp_requires_mask & (OMP_REQUIRES_TARGET_USED | OMP_REQUIRES_UNIFIED* | OMP_REQUIRES_REV*)

> +static void
> +gomp_requires_to_name (char *buf, size_t size, int requires_mask)
> +{
> +  char *end = buf + size, *p = buf;
> +  if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
> +    p += snprintf (p, end - p, "unified_address");
> +  if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
> +    p += snprintf (p, end - p, "%sunified_shared_memory",
> +		   (p == buf ? "" : ", "));
> +  if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
> +    p += snprintf (p, end - p, "%sreverse_offload",
> +		   (p == buf ? "" : ", "));
> +}

Same question as earlier.

>  /* This function should be called from every offload image while loading.
>     It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
>     the target, and TARGET_DATA needed by target plugin.  */
> @@ -2323,11 +2341,29 @@ GOMP_offload_register_ver (unsigned version, const void *host_table,
>  			   int target_type, const void *target_data)
>  {
>    int i;
> +  int omp_req = omp_requires_mask;
>  
>    if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
>      gomp_fatal ("Library too old for offload (version %u < %u)",
>  		GOMP_VERSION, GOMP_VERSION_LIB (version));
> -  
> +
> +  if (GOMP_VERSION_LIB (version) > 1)
> +    {
> +      omp_req = (int) (size_t) ((void **) target_data)[0];
> +      target_data = &((void **) target_data)[1];
> +      if (num_devices && (omp_req & ~omp_requires_mask))
> +	{
> +	  char buf[64];
> +	  gomp_requires_to_name (buf, sizeof (buf),
> +				 omp_req & ~omp_requires_mask);
> +	  gomp_error ("devices already initialized when registering additional "
> +		      "offload images that use the additional OpenMP 'requires'"
> +		      " directive clauses %s. Therefore, the program might not "
> +		      "run correctly", buf);
> +	}
> +      omp_requires_mask |= omp_req;
> +    }

Both omp_requires_mask and num_devices are global vars that would be
modified concurrently in some other thread, so the above is racy.

What I'd do is int omp_req = 0; early, just the omp_req + target_data in
if (GOMP_VERSION_LIB (version) > 1) otherwise.  That computes
the local omp_req only.

> +
>    gomp_mutex_lock (&register_lock);

Then under the lock, you can do the merging.
But, IMHO the runtime library should repeat what is done in the offloading
lto1, diagnose if there are differences between the masks in between
different TUs, here at runtime on the program/shared library level.
And IMHO the error you emit above is unnecessary, because (at least
hopefully) the num_devices computation / device initialization should
only happen on behalf of some device construct or device related OpenMP API
routine, so at that point the shared library or program that does that
should have its own mask and if something is dlopened later, it should
either have compatible mask (nothing is diagnosed) or incompatible, but then
it should be diagnosed like any other incompatibilities.
If you want further diagnostics after devices are initialized, it could be
just a note only in case there would be some extra devices available that
don't match it.  If all available devices satisfy it, the extra message
wouldn't tell user anything interesting.

> @@ -4125,8 +4161,30 @@ gomp_target_init (void)
>  
>  	if (gomp_load_plugin_for_device (&current_device, plugin_name))
>  	  {
> -	    new_num_devs = current_device.get_num_devices_func ();
> -	    if (new_num_devs >= 1)
> +	    new_num_devs
> +	      = current_device.get_num_devices_func (omp_requires_mask);
> +	    if (new_num_devs < 0)
> +	      {
> +		bool found = false;
> +		int type = current_device.get_type_func ();
> +		for (int img = 0; img < num_offload_images; img++)
> +		  if (type == offload_images[img].type)
> +		    found = true;
> +		if (found)
> +		  {
> +		    char buf[64];
> +		    gomp_requires_to_name (buf, sizeof (buf),
> +					   omp_requires_mask);
> +		    char *name = (char *) malloc (cur_len + 1);
> +		    memcpy (name, cur, cur_len);
> +		    name[cur_len] = '\0';
> +		    GOMP_PLUGIN_error ("note: %s devices present but 'omp "
> +				       "requires %s' cannot be fulfilled",
> +				       name, buf);
> +		    free (name);
> +		  }

This isn't an error, so IMNSHO it should be at least guarded with
GOMP_DEBUG=true in the environment, not all programs want the library to be
talkative and break its standard error...
Why do you need the malloc?  Can't you just use %.*s ... cur_len, cur
?  If malloc would be necessary, it would need to be gomp_malloc, so that
the program doesn't silently crash if malloc fails, or should handle malloc
failure itself.

> --- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
> +++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
> @@ -168,8 +168,12 @@ GOMP_OFFLOAD_get_type (void)
>  }
>  
>  extern "C" int
> -GOMP_OFFLOAD_get_num_devices (void)
> +GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
>  {
> +  /* Return -1 if no omp_requires_mask cannot be fulfilled but
> +     devices were present.  */
> +  if (num_devices > 0 && omp_requires_mask != 0)
> +    return -1;
>    TRACE ("(): return %d", num_devices);
>    return num_devices;
>  }

I thought I've mentioned earlier it would be nice to rename the
get_num_devices plugin hook because its API has changed, so that
if one mixes old plugin with new libgomp or vice versa it doesn't
break silently.

	Jakub


  reply	other threads:[~2022-07-01 14:34 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-01-13 15:07 [PATCH, OpenMP 5.0] More implementation of the requires directive Chung-Lin Tang
2021-01-13 15:27 ` Jakub Jelinek
2021-03-25 11:18 ` Thomas Schwinge
2022-03-29 13:42 ` Andrew Stubbs
2022-06-08  3:56 ` [Patch] OpenMP: Move omp requires checks to libgomp Tobias Burnus
2022-06-09 11:40   ` Jakub Jelinek
2022-06-09 12:46     ` Tobias Burnus
2022-06-09 14:19       ` Jakub Jelinek
2022-06-29 14:33         ` [Patch][v4] " Tobias Burnus
2022-06-29 17:02           ` Jakub Jelinek
2022-06-29 18:10             ` Tobias Burnus
2022-06-29 20:18               ` Jakub Jelinek
2022-07-01 13:06                 ` [Patch][v5] " Tobias Burnus
2022-07-01 14:34                   ` Jakub Jelinek [this message]
2022-07-01 16:31                     ` Tobias Burnus
2022-07-01 16:55                       ` Jakub Jelinek
2022-07-01 21:08                         ` Tobias Burnus
2022-07-04  8:31                           ` Jakub Jelinek
2022-07-07 13:26                           ` Fix one issue in OpenMP 'requires' directive diagnostics (was: [Patch][v5] OpenMP: Move omp requires checks to libgomp) Thomas Schwinge
2022-07-07 13:56                             ` Tobias Burnus
2022-07-08  6:59                               ` Thomas Schwinge
2022-07-06 10:42                   ` Restore 'GOMP_offload_unregister_ver' functionality " Thomas Schwinge
2022-07-06 13:59                     ` Tobias Burnus
2022-07-06 21:08                       ` Thomas Schwinge
2022-08-17 11:45                       ` Jakub Jelinek
2023-09-15  9:41                   ` [Patch][v5] OpenMP: Move omp requires checks to libgomp Thomas Schwinge
2022-07-07  8:37           ` Adjust 'libgomp.c-c++-common/requires-3.c' (was: [Patch][v4] OpenMP: Move omp requires checks to libgomp) Thomas Schwinge
2022-07-07  9:02             ` Tobias Burnus
2022-07-07  8:42           ` Enhance 'libgomp.c-c++-common/requires-4.c', 'libgomp.c-c++-common/requires-5.c' testing " Thomas Schwinge
2022-07-07  9:36             ` Tobias Burnus
2022-07-07 10:42               ` Thomas Schwinge
2022-07-06 10:30   ` Define 'OMP_REQUIRES_[...]', 'GOMP_REQUIRES_[...]' in a single place (was: [Patch] " Thomas Schwinge
2022-07-06 13:40     ` Tobias Burnus
2022-07-06 11:04   ` Fix Intel MIC 'mkoffload' for OpenMP 'requires' " Thomas Schwinge
2022-07-06 11:29     ` Tobias Burnus
2022-07-06 12:38       ` Thomas Schwinge
2022-07-06 13:30         ` Tobias Burnus
2022-07-07 10:46           ` Thomas Schwinge
2022-07-06 14:19     ` Tobias Burnus
2024-03-07 12:38   ` nvptx: 'cuDeviceGetCount' failure is fatal " Thomas Schwinge
2024-03-07 14:28     ` nvptx: 'cuDeviceGetCount' failure is fatal Tobias Burnus
2024-03-08 15:58       ` Thomas Schwinge

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Yr8F36AD8gtcGyMl@tucnak \
    --to=jakub@redhat.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=tobias@codesourcery.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).