public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Thomas Schwinge <thomas@codesourcery.com>
To: Jakub Jelinek <jakub@redhat.com>
Cc: Chung-Lin Tang <cltang@codesourcery.com>, <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes
Date: Thu, 06 Dec 2018 17:21:00 -0000	[thread overview]
Message-ID: <yxfpzhti8tbn.fsf@hertz.schwinge.homeip.net> (raw)
In-Reply-To: <f4d87634-f528-b2ed-104c-0acddf208890@mentor.com>

Hi Jakub!

On Tue, 25 Sep 2018 21:11:24 +0800, Chung-Lin Tang <chunglin_tang@mentor.com> wrote:
> Hi Jakub,
> This part has changes to 'struct goacc_asyncqueue*' arguments to various
> memory copying/mapping functions. To lessen the amount of code changes new 'gomp_map/unmap_vars_async'
> functions names are used (with the non-async original names defined with the asyncqueue==NULL).

Is that the way you'd like this to be done, or should instead that
"struct goacc_asyncqueue *aq" parameter be added/passed through all the
existing functions?  (The latter would be my preference, actually.)

That is, as Chung-Lin proposed:

> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -177,6 +177,22 @@ gomp_device_copy (struct gomp_device_descr *devicep,
>      }
>  }
>  
> +static inline void
> +goacc_device_copy_async (struct gomp_device_descr *devicep,
> +			 bool (*copy_func) (int, void *, const void *, size_t,
> +					    struct goacc_asyncqueue *),
> +			 const char *dst, void *dstaddr,
> +			 const char *src, const void *srcaddr,
> +			 size_t size, struct goacc_asyncqueue *aq)
> +{
> +  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
> +    {
> +      gomp_mutex_unlock (&devicep->lock);
> +      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
> +		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
> +    }
> +}

..., or should we instead add "struct goacc_asyncqueue *aq" to the
existing "gomp_device_copy", and then, recursively, also add it to the
existing plugin functions "host2dev" and "dev2host", instead of adding
new functions "openacc.async.host2dev" and "openacc.async.dev2host" (see
"GOMP_OFFLOAD_host2dev" vs. "GOMP_OFFLOAD_openacc_async_host2dev", and
"GOMP_OFFLOAD_dev2host" vs. "GOMP_OFFLOAD_openacc_async_dev2host" as
proposed in <https://gcc.gnu.org/ml/gcc-patches/2018-09/msg01430.html>
"[PATCH 6/6, OpenACC, libgomp] Async re-work, nvptx changes")?

Similarly for "gomp_map_vars"/"gomp_map_vars_async",
"gomp_unmap_vars"/"gomp_unmap_vars_async", see below.

I'd rather have one single interface (optionally called with a "NULL"
"struct goacc_asyncqueue *aq"), instead of adding more/similar async
interfaces.  Aside from avoiding adding to the cognitive load, the
rationaly also being that in the long term, for performance reasons,
we'll probably want to make more stuff asynchronous that currently is
synchronous, thus eventually obsoleting the synchronous interfaces.

For reference:

> @@ -263,8 +279,9 @@ gomp_to_device_kind_p (int kind)
>      }
>  }
>  
> -static void
> +attribute_hidden void
>  gomp_copy_host2dev (struct gomp_device_descr *devicep,
> +		    struct goacc_asyncqueue *aq,
>  		    void *d, const void *h, size_t sz,
>  		    struct gomp_coalesce_buf *cbuf)
>  {
> @@ -293,14 +310,23 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
>  	    }
>  	}
>      }
> -  gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
> +  if (aq)
> +    goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
> +			     "dev", d, "host", h, sz, aq);
> +  else
> +    gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
>  }
>  
> -static void
> +attribute_hidden void
>  gomp_copy_dev2host (struct gomp_device_descr *devicep,
> +		    struct goacc_asyncqueue *aq,
>  		    void *h, const void *d, size_t sz)
>  {
> -  gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
> +  if (aq)
> +    goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
> +			     "host", h, "dev", d, sz, aq);
> +  else
> +    gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
>  }
>  
>  static void
> @@ -318,7 +344,8 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
>     Helper function of gomp_map_vars.  */
>  
>  static inline void
> -gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
> +gomp_map_vars_existing (struct gomp_device_descr *devicep,
> +			struct goacc_asyncqueue *aq, splay_tree_key oldn,
>  			splay_tree_key newn, struct target_var_desc *tgt_var,
>  			unsigned char kind, struct gomp_coalesce_buf *cbuf)
>  {
> @@ -340,7 +367,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
>      }
>  
>    if (GOMP_MAP_ALWAYS_TO_P (kind))
> -    gomp_copy_host2dev (devicep,
> +    gomp_copy_host2dev (devicep, aq,
>  			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
>  				  + newn->host_start - oldn->host_start),
>  			(void *) newn->host_start,
> @@ -358,8 +385,8 @@ get_kind (bool short_mapkind, void *kinds, int idx)
>  }
>  
>  static void
> -gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
> -		  uintptr_t target_offset, uintptr_t bias,
> +gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
> +		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
>  		  struct gomp_coalesce_buf *cbuf)
>  {
>    struct gomp_device_descr *devicep = tgt->device_descr;
> @@ -370,7 +397,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
>    if (cur_node.host_start == (uintptr_t) NULL)
>      {
>        cur_node.tgt_offset = (uintptr_t) NULL;
> -      gomp_copy_host2dev (devicep,
> +      gomp_copy_host2dev (devicep, aq,
>  			  (void *) (tgt->tgt_start + target_offset),
>  			  (void *) &cur_node.tgt_offset,
>  			  sizeof (void *), cbuf);
> @@ -392,12 +419,13 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
>       array section.  Now subtract bias to get what we want
>       to initialize the pointer with.  */
>    cur_node.tgt_offset -= bias;
> -  gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
> +  gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
>  		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
>  }
>  
>  static void
> -gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
> +gomp_map_fields_existing (struct target_mem_desc *tgt,
> +			  struct goacc_asyncqueue *aq, splay_tree_key n,
>  			  size_t first, size_t i, void **hostaddrs,
>  			  size_t *sizes, void *kinds,
>  			  struct gomp_coalesce_buf *cbuf)
> @@ -417,7 +445,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
>        && n2->tgt == n->tgt
>        && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
>      {
> -      gomp_map_vars_existing (devicep, n2, &cur_node,
> +      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
>  			      &tgt->list[i], kind & typemask, cbuf);
>        return;
>      }
> @@ -433,8 +461,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
>  	      && n2->host_start - n->host_start
>  		 == n2->tgt_offset - n->tgt_offset)
>  	    {
> -	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
> -				      kind & typemask, cbuf);
> +	      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
> +				      &tgt->list[i], kind & typemask, cbuf);
>  	      return;
>  	    }
>  	}
> @@ -445,7 +473,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
>  	  && n2->tgt == n->tgt
>  	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
>  	{
> -	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
> +	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
>  				  kind & typemask, cbuf);
>  	  return;
>  	}
> @@ -482,6 +510,18 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
>  	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
>  {
> +  struct target_mem_desc *tgt;
> +  tgt = gomp_map_vars_async (devicep, NULL, mapnum, hostaddrs, devaddrs,
> +			     sizes, kinds, short_mapkind, pragma_kind);
> +  return tgt;
> +}
> +
> +attribute_hidden struct target_mem_desc *
> +gomp_map_vars_async (struct gomp_device_descr *devicep,
> +		     struct goacc_asyncqueue *aq, size_t mapnum,
> +		     void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
> +		     bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
> +{
>    size_t i, tgt_align, tgt_size, not_found_cnt = 0;
>    bool has_firstprivate = false;
>    const int rshift = short_mapkind ? 8 : 3;
> @@ -594,7 +634,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  	      continue;
>  	    }
>  	  for (i = first; i <= last; i++)
> -	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
> +	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
>  				      sizes, kinds, NULL);
>  	  i--;
>  	  continue;
> @@ -639,7 +679,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>        else
>  	n = splay_tree_lookup (mem_map, &cur_node);
>        if (n && n->refcount != REFCOUNT_LINK)
> -	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
> +	gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
>  				kind & typemask, NULL);
>        else
>  	{
> @@ -750,7 +790,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		tgt_size = (tgt_size + align - 1) & ~(align - 1);
>  		tgt->list[i].offset = tgt_size;
>  		len = sizes[i];
> -		gomp_copy_host2dev (devicep,
> +		gomp_copy_host2dev (devicep, aq,
>  				    (void *) (tgt->tgt_start + tgt_size),
>  				    (void *) hostaddrs[i], len, cbufp);
>  		tgt_size += len;
> @@ -784,7 +824,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		    continue;
>  		  }
>  		for (i = first; i <= last; i++)
> -		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
> +		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
>  					    sizes, kinds, cbufp);
>  		i--;
>  		continue;
> @@ -804,7 +844,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
>  		if (cur_node.tgt_offset)
>  		  cur_node.tgt_offset -= sizes[i];
> -		gomp_copy_host2dev (devicep,
> +		gomp_copy_host2dev (devicep, aq,
>  				    (void *) (n->tgt->tgt_start
>  					      + n->tgt_offset
>  					      + cur_node.host_start
> @@ -825,7 +865,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  	      k->host_end = k->host_start + sizeof (void *);
>  	    splay_tree_key n = splay_tree_lookup (mem_map, k);
>  	    if (n && n->refcount != REFCOUNT_LINK)
> -	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
> +	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
>  				      kind & typemask, cbufp);
>  	    else
>  	      {
> @@ -878,18 +918,19 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		  case GOMP_MAP_FORCE_TOFROM:
>  		  case GOMP_MAP_ALWAYS_TO:
>  		  case GOMP_MAP_ALWAYS_TOFROM:
> -		    gomp_copy_host2dev (devicep,
> +		    gomp_copy_host2dev (devicep, aq,
>  					(void *) (tgt->tgt_start
>  						  + k->tgt_offset),
>  					(void *) k->host_start,
>  					k->host_end - k->host_start, cbufp);
>  		    break;
>  		  case GOMP_MAP_POINTER:
> -		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
> +		    gomp_map_pointer (tgt, aq,
> +				      (uintptr_t) *(void **) k->host_start,
>  				      k->tgt_offset, sizes[i], cbufp);
>  		    break;
>  		  case GOMP_MAP_TO_PSET:
> -		    gomp_copy_host2dev (devicep,
> +		    gomp_copy_host2dev (devicep, aq,
>  					(void *) (tgt->tgt_start
>  						  + k->tgt_offset),
>  					(void *) k->host_start,
> @@ -911,7 +952,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  			  tgt->list[j].always_copy_from = false;
>  			  if (k->refcount != REFCOUNT_INFINITY)
>  			    k->refcount++;
> -			  gomp_map_pointer (tgt,
> +			  gomp_map_pointer (tgt, aq,
>  					    (uintptr_t) *(void **) hostaddrs[j],
>  					    k->tgt_offset
>  					    + ((uintptr_t) hostaddrs[j]
> @@ -940,7 +981,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		    break;
>  		  case GOMP_MAP_FORCE_DEVICEPTR:
>  		    assert (k->host_end - k->host_start == sizeof (void *));
> -		    gomp_copy_host2dev (devicep,
> +		    gomp_copy_host2dev (devicep, aq,
>  					(void *) (tgt->tgt_start
>  						  + k->tgt_offset),
>  					(void *) k->host_start,
> @@ -957,9 +998,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		    /* Set link pointer on target to the device address of the
>  		       mapped object.  */
>  		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
> -		    devicep->host2dev_func (devicep->target_id,
> -					    (void *) n->tgt_offset,
> -					    &tgt_addr, sizeof (void *));
> +		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
> +					&tgt_addr, sizeof (void *), cbufp);
>  		  }
>  		array++;
>  	      }
> @@ -971,7 +1011,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>        for (i = 0; i < mapnum; i++)
>  	{
>  	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
> -	  gomp_copy_host2dev (devicep,
> +	  gomp_copy_host2dev (devicep, aq,
>  			      (void *) (tgt->tgt_start + i * sizeof (void *)),
>  			      (void *) &cur_node.tgt_offset, sizeof (void *),
>  			      cbufp);
> @@ -982,7 +1022,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>      {
>        long c = 0;
>        for (c = 0; c < cbuf.chunk_cnt; ++c)
> -	gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
> +	gomp_copy_host2dev (devicep, aq,
> +			    (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
>  			    (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
>  			    cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
>        free (cbuf.buf);
> @@ -1001,7 +1042,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>    return tgt;
>  }
>  
> -static void
> +attribute_hidden void
>  gomp_unmap_tgt (struct target_mem_desc *tgt)
>  {
>    /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
> @@ -1036,6 +1077,13 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
>  attribute_hidden void
>  gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
>  {
> +  gomp_unmap_vars_async (tgt, do_copyfrom, NULL);
> +}
> +
> +attribute_hidden void
> +gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
> +		       struct goacc_asyncqueue *aq)
> +{
>    struct gomp_device_descr *devicep = tgt->device_descr;
>  
>    if (tgt->list_count == 0)
> @@ -1071,7 +1119,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
>  
>        if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
>  	  || tgt->list[i].always_copy_from)
> -	gomp_copy_dev2host (devicep,
> +	gomp_copy_dev2host (devicep, aq,
>  			    (void *) (k->host_start + tgt->list[i].offset),
>  			    (void *) (k->tgt->tgt_start + k->tgt_offset
>  				      + tgt->list[i].offset),
> @@ -1137,9 +1185,10 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
>  	    size_t size = cur_node.host_end - cur_node.host_start;
>  
>  	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
> -	      gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
> +	      gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
> +				  NULL);
>  	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
> -	      gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
> +	      gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
>  	  }
>        }
>    gomp_mutex_unlock (&devicep->lock);
> @@ -1924,7 +1985,7 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
>  
>  	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
>  	      || kind == GOMP_MAP_ALWAYS_FROM)
> -	    gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
> +	    gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
>  				(void *) (k->tgt->tgt_start + k->tgt_offset
>  					  + cur_node.host_start
>  					  - k->host_start),


Grüße
 Thomas

  reply	other threads:[~2018-12-06 17:21 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-25 13:11 Chung-Lin Tang
2018-12-06 17:21 ` Thomas Schwinge [this message]
2018-12-06 17:43   ` Jakub Jelinek
2018-12-11 13:47     ` [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes (revised, v2) Chung-Lin Tang
2018-12-13 10:19       ` Jakub Jelinek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=yxfpzhti8tbn.fsf@hertz.schwinge.homeip.net \
    --to=thomas@codesourcery.com \
    --cc=cltang@codesourcery.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jakub@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).