public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes
@ 2018-09-25 13:11 Chung-Lin Tang
  2018-12-06 17:21 ` Thomas Schwinge
  0 siblings, 1 reply; 5+ messages in thread
From: Chung-Lin Tang @ 2018-09-25 13:11 UTC (permalink / raw)
  To: gcc-patches, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 1378 bytes --]

Hi Jakub,
This part has changes to 'struct goacc_asyncqueue*' arguments to various
memory copying/mapping functions. To lessen the amount of code changes new 'gomp_map/unmap_vars_async'
functions names are used (with the non-async original names defined with the asyncqueue==NULL).

Inside gomp_target_fini, a 'gomp_fini_device' named function has been (re?)introduced,
since there's also asyncqueue destructing now need before the ->fini_device_func() call.

Thanks,
Chung-Lin


	* target.c (goacc_device_copy_async): New function.
	(gomp_copy_host2dev): Remove 'static', add goacc_asyncqueue parameter,
	add goacc_device_copy_async case.
	(gomp_copy_dev2host): Likewise.
	(gomp_map_vars_existing): Add goacc_asyncqueue parameter, adjust code.
	(gomp_map_pointer): Likewise.
	(gomp_map_fields_existing): Likewise.
	(gomp_map_vars): Add function for compatiblity.
	(gomp_map_vars_async): Adapt from gomp_map_vars, add goacc_asyncqueue
	parameter.
	(gomp_unmap_tgt): Remove statis, add attribute_hidden.
	(gomp_unmap_vars): Add function for compatiblity.
	(gomp_unmap_vars_async): Adapt from gomp_unmap_vars, add
	goacc_asyncqueue parameter.
	(gomp_fini_device): New function.
	(gomp_exit_data): Adjust gomp_copy_dev2host call.
	(gomp_load_plugin_for_device): Remove old interface, adjust to load
	new async interface.
	(gomp_target_fini): Adjust code to call gomp_fini_device.


[-- Attachment #2: async-04.libgomp-target.patch --]
[-- Type: text/plain, Size: 16504 bytes --]

diff --git a/libgomp/target.c b/libgomp/target.c
index dda041c..ff5b114 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -177,6 +177,22 @@ gomp_device_copy (struct gomp_device_descr *devicep,
     }
 }
 
+static inline void
+goacc_device_copy_async (struct gomp_device_descr *devicep,
+			 bool (*copy_func) (int, void *, const void *, size_t,
+					    struct goacc_asyncqueue *),
+			 const char *dst, void *dstaddr,
+			 const char *src, const void *srcaddr,
+			 size_t size, struct goacc_asyncqueue *aq)
+{
+  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
+		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
+    }
+}
+
 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
    host to device memory transfers.  */
 
@@ -263,8 +279,9 @@ gomp_to_device_kind_p (int kind)
     }
 }
 
-static void
+attribute_hidden void
 gomp_copy_host2dev (struct gomp_device_descr *devicep,
+		    struct goacc_asyncqueue *aq,
 		    void *d, const void *h, size_t sz,
 		    struct gomp_coalesce_buf *cbuf)
 {
@@ -293,14 +310,23 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
 	    }
 	}
     }
-  gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
+  if (aq)
+    goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
+			     "dev", d, "host", h, sz, aq);
+  else
+    gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
 }
 
-static void
+attribute_hidden void
 gomp_copy_dev2host (struct gomp_device_descr *devicep,
+		    struct goacc_asyncqueue *aq,
 		    void *h, const void *d, size_t sz)
 {
-  gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
+  if (aq)
+    goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
+			     "host", h, "dev", d, sz, aq);
+  else
+    gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
 }
 
 static void
@@ -318,7 +344,8 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
    Helper function of gomp_map_vars.  */
 
 static inline void
-gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
+gomp_map_vars_existing (struct gomp_device_descr *devicep,
+			struct goacc_asyncqueue *aq, splay_tree_key oldn,
 			splay_tree_key newn, struct target_var_desc *tgt_var,
 			unsigned char kind, struct gomp_coalesce_buf *cbuf)
 {
@@ -340,7 +367,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
     }
 
   if (GOMP_MAP_ALWAYS_TO_P (kind))
-    gomp_copy_host2dev (devicep,
+    gomp_copy_host2dev (devicep, aq,
 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
 				  + newn->host_start - oldn->host_start),
 			(void *) newn->host_start,
@@ -358,8 +385,8 @@ get_kind (bool short_mapkind, void *kinds, int idx)
 }
 
 static void
-gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
-		  uintptr_t target_offset, uintptr_t bias,
+gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
+		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
 		  struct gomp_coalesce_buf *cbuf)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
@@ -370,7 +397,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
   if (cur_node.host_start == (uintptr_t) NULL)
     {
       cur_node.tgt_offset = (uintptr_t) NULL;
-      gomp_copy_host2dev (devicep,
+      gomp_copy_host2dev (devicep, aq,
 			  (void *) (tgt->tgt_start + target_offset),
 			  (void *) &cur_node.tgt_offset,
 			  sizeof (void *), cbuf);
@@ -392,12 +419,13 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
      array section.  Now subtract bias to get what we want
      to initialize the pointer with.  */
   cur_node.tgt_offset -= bias;
-  gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
+  gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
 		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
 }
 
 static void
-gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
+gomp_map_fields_existing (struct target_mem_desc *tgt,
+			  struct goacc_asyncqueue *aq, splay_tree_key n,
 			  size_t first, size_t i, void **hostaddrs,
 			  size_t *sizes, void *kinds,
 			  struct gomp_coalesce_buf *cbuf)
@@ -417,7 +445,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
       && n2->tgt == n->tgt
       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
     {
-      gomp_map_vars_existing (devicep, n2, &cur_node,
+      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
 			      &tgt->list[i], kind & typemask, cbuf);
       return;
     }
@@ -433,8 +461,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
 	      && n2->host_start - n->host_start
 		 == n2->tgt_offset - n->tgt_offset)
 	    {
-	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
-				      kind & typemask, cbuf);
+	      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
+				      &tgt->list[i], kind & typemask, cbuf);
 	      return;
 	    }
 	}
@@ -445,7 +473,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
 	  && n2->tgt == n->tgt
 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
 	{
-	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
 				  kind & typemask, cbuf);
 	  return;
 	}
@@ -482,6 +510,18 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
 	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
 {
+  struct target_mem_desc *tgt;
+  tgt = gomp_map_vars_async (devicep, NULL, mapnum, hostaddrs, devaddrs,
+			     sizes, kinds, short_mapkind, pragma_kind);
+  return tgt;
+}
+
+attribute_hidden struct target_mem_desc *
+gomp_map_vars_async (struct gomp_device_descr *devicep,
+		     struct goacc_asyncqueue *aq, size_t mapnum,
+		     void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
+		     bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
+{
   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
   bool has_firstprivate = false;
   const int rshift = short_mapkind ? 8 : 3;
@@ -594,7 +634,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	      continue;
 	    }
 	  for (i = first; i <= last; i++)
-	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
 				      sizes, kinds, NULL);
 	  i--;
 	  continue;
@@ -639,7 +679,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
       else
 	n = splay_tree_lookup (mem_map, &cur_node);
       if (n && n->refcount != REFCOUNT_LINK)
-	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
+	gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
 				kind & typemask, NULL);
       else
 	{
@@ -750,7 +790,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
 		tgt->list[i].offset = tgt_size;
 		len = sizes[i];
-		gomp_copy_host2dev (devicep,
+		gomp_copy_host2dev (devicep, aq,
 				    (void *) (tgt->tgt_start + tgt_size),
 				    (void *) hostaddrs[i], len, cbufp);
 		tgt_size += len;
@@ -784,7 +824,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		    continue;
 		  }
 		for (i = first; i <= last; i++)
-		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
 					    sizes, kinds, cbufp);
 		i--;
 		continue;
@@ -804,7 +844,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
 		if (cur_node.tgt_offset)
 		  cur_node.tgt_offset -= sizes[i];
-		gomp_copy_host2dev (devicep,
+		gomp_copy_host2dev (devicep, aq,
 				    (void *) (n->tgt->tgt_start
 					      + n->tgt_offset
 					      + cur_node.host_start
@@ -825,7 +865,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	      k->host_end = k->host_start + sizeof (void *);
 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
 	    if (n && n->refcount != REFCOUNT_LINK)
-	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
+	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
 				      kind & typemask, cbufp);
 	    else
 	      {
@@ -878,18 +918,19 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		  case GOMP_MAP_FORCE_TOFROM:
 		  case GOMP_MAP_ALWAYS_TO:
 		  case GOMP_MAP_ALWAYS_TOFROM:
-		    gomp_copy_host2dev (devicep,
+		    gomp_copy_host2dev (devicep, aq,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
 					k->host_end - k->host_start, cbufp);
 		    break;
 		  case GOMP_MAP_POINTER:
-		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
+		    gomp_map_pointer (tgt, aq,
+				      (uintptr_t) *(void **) k->host_start,
 				      k->tgt_offset, sizes[i], cbufp);
 		    break;
 		  case GOMP_MAP_TO_PSET:
-		    gomp_copy_host2dev (devicep,
+		    gomp_copy_host2dev (devicep, aq,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
@@ -911,7 +952,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 			  tgt->list[j].always_copy_from = false;
 			  if (k->refcount != REFCOUNT_INFINITY)
 			    k->refcount++;
-			  gomp_map_pointer (tgt,
+			  gomp_map_pointer (tgt, aq,
 					    (uintptr_t) *(void **) hostaddrs[j],
 					    k->tgt_offset
 					    + ((uintptr_t) hostaddrs[j]
@@ -940,7 +981,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		    break;
 		  case GOMP_MAP_FORCE_DEVICEPTR:
 		    assert (k->host_end - k->host_start == sizeof (void *));
-		    gomp_copy_host2dev (devicep,
+		    gomp_copy_host2dev (devicep, aq,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
@@ -957,9 +998,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		    /* Set link pointer on target to the device address of the
 		       mapped object.  */
 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
-		    devicep->host2dev_func (devicep->target_id,
-					    (void *) n->tgt_offset,
-					    &tgt_addr, sizeof (void *));
+		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
+					&tgt_addr, sizeof (void *), cbufp);
 		  }
 		array++;
 	      }
@@ -971,7 +1011,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
       for (i = 0; i < mapnum; i++)
 	{
 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
-	  gomp_copy_host2dev (devicep,
+	  gomp_copy_host2dev (devicep, aq,
 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
 			      (void *) &cur_node.tgt_offset, sizeof (void *),
 			      cbufp);
@@ -982,7 +1022,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
     {
       long c = 0;
       for (c = 0; c < cbuf.chunk_cnt; ++c)
-	gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
+	gomp_copy_host2dev (devicep, aq,
+			    (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
 			    (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
 			    cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
       free (cbuf.buf);
@@ -1001,7 +1042,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
   return tgt;
 }
 
-static void
+attribute_hidden void
 gomp_unmap_tgt (struct target_mem_desc *tgt)
 {
   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
@@ -1036,6 +1077,13 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
 attribute_hidden void
 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 {
+  gomp_unmap_vars_async (tgt, do_copyfrom, NULL);
+}
+
+attribute_hidden void
+gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
+		       struct goacc_asyncqueue *aq)
+{
   struct gomp_device_descr *devicep = tgt->device_descr;
 
   if (tgt->list_count == 0)
@@ -1071,7 +1119,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 
       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
 	  || tgt->list[i].always_copy_from)
-	gomp_copy_dev2host (devicep,
+	gomp_copy_dev2host (devicep, aq,
 			    (void *) (k->host_start + tgt->list[i].offset),
 			    (void *) (k->tgt->tgt_start + k->tgt_offset
 				      + tgt->list[i].offset),
@@ -1137,9 +1185,10 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
 	    size_t size = cur_node.host_end - cur_node.host_start;
 
 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
-	      gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
+	      gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
+				  NULL);
 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
-	      gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
+	      gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
 	  }
       }
   gomp_mutex_unlock (&devicep->lock);
@@ -1432,9 +1481,21 @@ gomp_init_device (struct gomp_device_descr *devicep)
 				   false);
     }
 
+  /* Initialize OpenACC asynchronous queues.  */
+  goacc_init_asyncqueues (devicep);
+
   devicep->state = GOMP_DEVICE_INITIALIZED;
 }
 
+attribute_hidden bool
+gomp_fini_device (struct gomp_device_descr *devicep)
+{
+  bool ret = goacc_fini_asyncqueues (devicep);
+  ret &= devicep->fini_device_func (devicep->target_id);
+  devicep->state = GOMP_DEVICE_FINALIZED;
+  return ret;
+}
+
 attribute_hidden void
 gomp_unload_device (struct gomp_device_descr *devicep)
 {
@@ -1924,7 +1985,7 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
 
 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
 	      || kind == GOMP_MAP_ALWAYS_FROM)
-	    gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
+	    gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
 				(void *) (k->tgt->tgt_start + k->tgt_offset
 					  + cur_node.host_start
 					  - k->host_start),
@@ -2557,20 +2618,20 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
     {
       if (!DLSYM_OPT (openacc.exec, openacc_exec)
-	  || !DLSYM_OPT (openacc.register_async_cleanup,
-			 openacc_register_async_cleanup)
-	  || !DLSYM_OPT (openacc.async_test, openacc_async_test)
-	  || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
-	  || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
-	  || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
-	  || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
-	  || !DLSYM_OPT (openacc.async_wait_all_async,
-			 openacc_async_wait_all_async)
-	  || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
 	  || !DLSYM_OPT (openacc.create_thread_data,
 			 openacc_create_thread_data)
 	  || !DLSYM_OPT (openacc.destroy_thread_data,
-			 openacc_destroy_thread_data))
+			 openacc_destroy_thread_data)
+	  || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
+	  || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
+	  || !DLSYM_OPT (openacc.async.test, openacc_async_test)
+	  || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
+	  || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
+	  || !DLSYM_OPT (openacc.async.queue_callback,
+			 openacc_async_queue_callback)
+	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
+	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
+	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
 	{
 	  /* Require all the OpenACC handlers if we have
 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
@@ -2621,10 +2682,7 @@ gomp_target_fini (void)
       struct gomp_device_descr *devicep = &devices[i];
       gomp_mutex_lock (&devicep->lock);
       if (devicep->state == GOMP_DEVICE_INITIALIZED)
-	{
-	  ret = devicep->fini_device_func (devicep->target_id);
-	  devicep->state = GOMP_DEVICE_FINALIZED;
-	}
+	ret = gomp_fini_device (devicep);
       gomp_mutex_unlock (&devicep->lock);
       if (!ret)
 	gomp_fatal ("device finalization failed");

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes
  2018-09-25 13:11 [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes Chung-Lin Tang
@ 2018-12-06 17:21 ` Thomas Schwinge
  2018-12-06 17:43   ` Jakub Jelinek
  0 siblings, 1 reply; 5+ messages in thread
From: Thomas Schwinge @ 2018-12-06 17:21 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Chung-Lin Tang, gcc-patches

Hi Jakub!

On Tue, 25 Sep 2018 21:11:24 +0800, Chung-Lin Tang <chunglin_tang@mentor.com> wrote:
> Hi Jakub,
> This part has changes to 'struct goacc_asyncqueue*' arguments to various
> memory copying/mapping functions. To lessen the amount of code changes new 'gomp_map/unmap_vars_async'
> functions names are used (with the non-async original names defined with the asyncqueue==NULL).

Is that the way you'd like this to be done, or should instead that
"struct goacc_asyncqueue *aq" parameter be added/passed through all the
existing functions?  (The latter would be my preference, actually.)

That is, as Chung-Lin proposed:

> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -177,6 +177,22 @@ gomp_device_copy (struct gomp_device_descr *devicep,
>      }
>  }
>  
> +static inline void
> +goacc_device_copy_async (struct gomp_device_descr *devicep,
> +			 bool (*copy_func) (int, void *, const void *, size_t,
> +					    struct goacc_asyncqueue *),
> +			 const char *dst, void *dstaddr,
> +			 const char *src, const void *srcaddr,
> +			 size_t size, struct goacc_asyncqueue *aq)
> +{
> +  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
> +    {
> +      gomp_mutex_unlock (&devicep->lock);
> +      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
> +		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
> +    }
> +}

..., or should we instead add "struct goacc_asyncqueue *aq" to the
existing "gomp_device_copy", and then, recursively, also add it to the
existing plugin functions "host2dev" and "dev2host", instead of adding
new functions "openacc.async.host2dev" and "openacc.async.dev2host" (see
"GOMP_OFFLOAD_host2dev" vs. "GOMP_OFFLOAD_openacc_async_host2dev", and
"GOMP_OFFLOAD_dev2host" vs. "GOMP_OFFLOAD_openacc_async_dev2host" as
proposed in <https://gcc.gnu.org/ml/gcc-patches/2018-09/msg01430.html>
"[PATCH 6/6, OpenACC, libgomp] Async re-work, nvptx changes")?

Similarly for "gomp_map_vars"/"gomp_map_vars_async",
"gomp_unmap_vars"/"gomp_unmap_vars_async", see below.

I'd rather have one single interface (optionally called with a "NULL"
"struct goacc_asyncqueue *aq"), instead of adding more/similar async
interfaces.  Aside from avoiding adding to the cognitive load, the
rationaly also being that in the long term, for performance reasons,
we'll probably want to make more stuff asynchronous that currently is
synchronous, thus eventually obsoleting the synchronous interfaces.

For reference:

> @@ -263,8 +279,9 @@ gomp_to_device_kind_p (int kind)
>      }
>  }
>  
> -static void
> +attribute_hidden void
>  gomp_copy_host2dev (struct gomp_device_descr *devicep,
> +		    struct goacc_asyncqueue *aq,
>  		    void *d, const void *h, size_t sz,
>  		    struct gomp_coalesce_buf *cbuf)
>  {
> @@ -293,14 +310,23 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
>  	    }
>  	}
>      }
> -  gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
> +  if (aq)
> +    goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
> +			     "dev", d, "host", h, sz, aq);
> +  else
> +    gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
>  }
>  
> -static void
> +attribute_hidden void
>  gomp_copy_dev2host (struct gomp_device_descr *devicep,
> +		    struct goacc_asyncqueue *aq,
>  		    void *h, const void *d, size_t sz)
>  {
> -  gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
> +  if (aq)
> +    goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
> +			     "host", h, "dev", d, sz, aq);
> +  else
> +    gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
>  }
>  
>  static void
> @@ -318,7 +344,8 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
>     Helper function of gomp_map_vars.  */
>  
>  static inline void
> -gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
> +gomp_map_vars_existing (struct gomp_device_descr *devicep,
> +			struct goacc_asyncqueue *aq, splay_tree_key oldn,
>  			splay_tree_key newn, struct target_var_desc *tgt_var,
>  			unsigned char kind, struct gomp_coalesce_buf *cbuf)
>  {
> @@ -340,7 +367,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
>      }
>  
>    if (GOMP_MAP_ALWAYS_TO_P (kind))
> -    gomp_copy_host2dev (devicep,
> +    gomp_copy_host2dev (devicep, aq,
>  			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
>  				  + newn->host_start - oldn->host_start),
>  			(void *) newn->host_start,
> @@ -358,8 +385,8 @@ get_kind (bool short_mapkind, void *kinds, int idx)
>  }
>  
>  static void
> -gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
> -		  uintptr_t target_offset, uintptr_t bias,
> +gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
> +		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
>  		  struct gomp_coalesce_buf *cbuf)
>  {
>    struct gomp_device_descr *devicep = tgt->device_descr;
> @@ -370,7 +397,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
>    if (cur_node.host_start == (uintptr_t) NULL)
>      {
>        cur_node.tgt_offset = (uintptr_t) NULL;
> -      gomp_copy_host2dev (devicep,
> +      gomp_copy_host2dev (devicep, aq,
>  			  (void *) (tgt->tgt_start + target_offset),
>  			  (void *) &cur_node.tgt_offset,
>  			  sizeof (void *), cbuf);
> @@ -392,12 +419,13 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
>       array section.  Now subtract bias to get what we want
>       to initialize the pointer with.  */
>    cur_node.tgt_offset -= bias;
> -  gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
> +  gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
>  		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
>  }
>  
>  static void
> -gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
> +gomp_map_fields_existing (struct target_mem_desc *tgt,
> +			  struct goacc_asyncqueue *aq, splay_tree_key n,
>  			  size_t first, size_t i, void **hostaddrs,
>  			  size_t *sizes, void *kinds,
>  			  struct gomp_coalesce_buf *cbuf)
> @@ -417,7 +445,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
>        && n2->tgt == n->tgt
>        && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
>      {
> -      gomp_map_vars_existing (devicep, n2, &cur_node,
> +      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
>  			      &tgt->list[i], kind & typemask, cbuf);
>        return;
>      }
> @@ -433,8 +461,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
>  	      && n2->host_start - n->host_start
>  		 == n2->tgt_offset - n->tgt_offset)
>  	    {
> -	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
> -				      kind & typemask, cbuf);
> +	      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
> +				      &tgt->list[i], kind & typemask, cbuf);
>  	      return;
>  	    }
>  	}
> @@ -445,7 +473,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
>  	  && n2->tgt == n->tgt
>  	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
>  	{
> -	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
> +	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
>  				  kind & typemask, cbuf);
>  	  return;
>  	}
> @@ -482,6 +510,18 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
>  	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
>  {
> +  struct target_mem_desc *tgt;
> +  tgt = gomp_map_vars_async (devicep, NULL, mapnum, hostaddrs, devaddrs,
> +			     sizes, kinds, short_mapkind, pragma_kind);
> +  return tgt;
> +}
> +
> +attribute_hidden struct target_mem_desc *
> +gomp_map_vars_async (struct gomp_device_descr *devicep,
> +		     struct goacc_asyncqueue *aq, size_t mapnum,
> +		     void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
> +		     bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
> +{
>    size_t i, tgt_align, tgt_size, not_found_cnt = 0;
>    bool has_firstprivate = false;
>    const int rshift = short_mapkind ? 8 : 3;
> @@ -594,7 +634,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  	      continue;
>  	    }
>  	  for (i = first; i <= last; i++)
> -	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
> +	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
>  				      sizes, kinds, NULL);
>  	  i--;
>  	  continue;
> @@ -639,7 +679,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>        else
>  	n = splay_tree_lookup (mem_map, &cur_node);
>        if (n && n->refcount != REFCOUNT_LINK)
> -	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
> +	gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
>  				kind & typemask, NULL);
>        else
>  	{
> @@ -750,7 +790,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		tgt_size = (tgt_size + align - 1) & ~(align - 1);
>  		tgt->list[i].offset = tgt_size;
>  		len = sizes[i];
> -		gomp_copy_host2dev (devicep,
> +		gomp_copy_host2dev (devicep, aq,
>  				    (void *) (tgt->tgt_start + tgt_size),
>  				    (void *) hostaddrs[i], len, cbufp);
>  		tgt_size += len;
> @@ -784,7 +824,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		    continue;
>  		  }
>  		for (i = first; i <= last; i++)
> -		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
> +		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
>  					    sizes, kinds, cbufp);
>  		i--;
>  		continue;
> @@ -804,7 +844,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
>  		if (cur_node.tgt_offset)
>  		  cur_node.tgt_offset -= sizes[i];
> -		gomp_copy_host2dev (devicep,
> +		gomp_copy_host2dev (devicep, aq,
>  				    (void *) (n->tgt->tgt_start
>  					      + n->tgt_offset
>  					      + cur_node.host_start
> @@ -825,7 +865,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  	      k->host_end = k->host_start + sizeof (void *);
>  	    splay_tree_key n = splay_tree_lookup (mem_map, k);
>  	    if (n && n->refcount != REFCOUNT_LINK)
> -	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
> +	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
>  				      kind & typemask, cbufp);
>  	    else
>  	      {
> @@ -878,18 +918,19 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		  case GOMP_MAP_FORCE_TOFROM:
>  		  case GOMP_MAP_ALWAYS_TO:
>  		  case GOMP_MAP_ALWAYS_TOFROM:
> -		    gomp_copy_host2dev (devicep,
> +		    gomp_copy_host2dev (devicep, aq,
>  					(void *) (tgt->tgt_start
>  						  + k->tgt_offset),
>  					(void *) k->host_start,
>  					k->host_end - k->host_start, cbufp);
>  		    break;
>  		  case GOMP_MAP_POINTER:
> -		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
> +		    gomp_map_pointer (tgt, aq,
> +				      (uintptr_t) *(void **) k->host_start,
>  				      k->tgt_offset, sizes[i], cbufp);
>  		    break;
>  		  case GOMP_MAP_TO_PSET:
> -		    gomp_copy_host2dev (devicep,
> +		    gomp_copy_host2dev (devicep, aq,
>  					(void *) (tgt->tgt_start
>  						  + k->tgt_offset),
>  					(void *) k->host_start,
> @@ -911,7 +952,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  			  tgt->list[j].always_copy_from = false;
>  			  if (k->refcount != REFCOUNT_INFINITY)
>  			    k->refcount++;
> -			  gomp_map_pointer (tgt,
> +			  gomp_map_pointer (tgt, aq,
>  					    (uintptr_t) *(void **) hostaddrs[j],
>  					    k->tgt_offset
>  					    + ((uintptr_t) hostaddrs[j]
> @@ -940,7 +981,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		    break;
>  		  case GOMP_MAP_FORCE_DEVICEPTR:
>  		    assert (k->host_end - k->host_start == sizeof (void *));
> -		    gomp_copy_host2dev (devicep,
> +		    gomp_copy_host2dev (devicep, aq,
>  					(void *) (tgt->tgt_start
>  						  + k->tgt_offset),
>  					(void *) k->host_start,
> @@ -957,9 +998,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>  		    /* Set link pointer on target to the device address of the
>  		       mapped object.  */
>  		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
> -		    devicep->host2dev_func (devicep->target_id,
> -					    (void *) n->tgt_offset,
> -					    &tgt_addr, sizeof (void *));
> +		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
> +					&tgt_addr, sizeof (void *), cbufp);
>  		  }
>  		array++;
>  	      }
> @@ -971,7 +1011,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>        for (i = 0; i < mapnum; i++)
>  	{
>  	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
> -	  gomp_copy_host2dev (devicep,
> +	  gomp_copy_host2dev (devicep, aq,
>  			      (void *) (tgt->tgt_start + i * sizeof (void *)),
>  			      (void *) &cur_node.tgt_offset, sizeof (void *),
>  			      cbufp);
> @@ -982,7 +1022,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>      {
>        long c = 0;
>        for (c = 0; c < cbuf.chunk_cnt; ++c)
> -	gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
> +	gomp_copy_host2dev (devicep, aq,
> +			    (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
>  			    (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
>  			    cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
>        free (cbuf.buf);
> @@ -1001,7 +1042,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
>    return tgt;
>  }
>  
> -static void
> +attribute_hidden void
>  gomp_unmap_tgt (struct target_mem_desc *tgt)
>  {
>    /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
> @@ -1036,6 +1077,13 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
>  attribute_hidden void
>  gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
>  {
> +  gomp_unmap_vars_async (tgt, do_copyfrom, NULL);
> +}
> +
> +attribute_hidden void
> +gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
> +		       struct goacc_asyncqueue *aq)
> +{
>    struct gomp_device_descr *devicep = tgt->device_descr;
>  
>    if (tgt->list_count == 0)
> @@ -1071,7 +1119,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
>  
>        if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
>  	  || tgt->list[i].always_copy_from)
> -	gomp_copy_dev2host (devicep,
> +	gomp_copy_dev2host (devicep, aq,
>  			    (void *) (k->host_start + tgt->list[i].offset),
>  			    (void *) (k->tgt->tgt_start + k->tgt_offset
>  				      + tgt->list[i].offset),
> @@ -1137,9 +1185,10 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
>  	    size_t size = cur_node.host_end - cur_node.host_start;
>  
>  	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
> -	      gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
> +	      gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
> +				  NULL);
>  	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
> -	      gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
> +	      gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
>  	  }
>        }
>    gomp_mutex_unlock (&devicep->lock);
> @@ -1924,7 +1985,7 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
>  
>  	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
>  	      || kind == GOMP_MAP_ALWAYS_FROM)
> -	    gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
> +	    gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
>  				(void *) (k->tgt->tgt_start + k->tgt_offset
>  					  + cur_node.host_start
>  					  - k->host_start),


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes
  2018-12-06 17:21 ` Thomas Schwinge
@ 2018-12-06 17:43   ` Jakub Jelinek
  2018-12-11 13:47     ` [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes (revised, v2) Chung-Lin Tang
  0 siblings, 1 reply; 5+ messages in thread
From: Jakub Jelinek @ 2018-12-06 17:43 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: Chung-Lin Tang, gcc-patches

On Thu, Dec 06, 2018 at 06:21:16PM +0100, Thomas Schwinge wrote:
> On Tue, 25 Sep 2018 21:11:24 +0800, Chung-Lin Tang <chunglin_tang@mentor.com> wrote:
> > Hi Jakub,
> > This part has changes to 'struct goacc_asyncqueue*' arguments to various
> > memory copying/mapping functions. To lessen the amount of code changes new 'gomp_map/unmap_vars_async'
> > functions names are used (with the non-async original names defined with the asyncqueue==NULL).
> 
> Is that the way you'd like this to be done, or should instead that
> "struct goacc_asyncqueue *aq" parameter be added/passed through all the
> existing functions?  (The latter would be my preference, actually.)

I'd prefer not to increase the amount of arguments where possible, because
many of the functions already have more arguments than can be passed in
registers.  Could it be e.g. added into gomp_coalesce_buf which is already
passed around?

Another option would be to use always_inline as C template if the OpenMP and
OpenACC needs diverge too much, then have simply small wrappers that just
call the always_inline function, in one case with the argument NULL or other
constant, in another one with whatever it has been called with.

	Jakub

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes (revised, v2)
  2018-12-06 17:43   ` Jakub Jelinek
@ 2018-12-11 13:47     ` Chung-Lin Tang
  2018-12-13 10:19       ` Jakub Jelinek
  0 siblings, 1 reply; 5+ messages in thread
From: Chung-Lin Tang @ 2018-12-11 13:47 UTC (permalink / raw)
  To: Jakub Jelinek, Thomas Schwinge; +Cc: Chung-Lin Tang, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1502 bytes --]

On 2018/12/7 1:43 AM, Jakub Jelinek wrote:
> On Thu, Dec 06, 2018 at 06:21:16PM +0100, Thomas Schwinge wrote:
>> On Tue, 25 Sep 2018 21:11:24 +0800, Chung-Lin Tang <chunglin_tang@mentor.com> wrote:
>>> Hi Jakub,
>>> This part has changes to 'struct goacc_asyncqueue*' arguments to various
>>> memory copying/mapping functions. To lessen the amount of code changes new 'gomp_map/unmap_vars_async'
>>> functions names are used (with the non-async original names defined with the asyncqueue==NULL).
>>
>> Is that the way you'd like this to be done, or should instead that
>> "struct goacc_asyncqueue *aq" parameter be added/passed through all the
>> existing functions?  (The latter would be my preference, actually.)
> 
> I'd prefer not to increase the amount of arguments where possible, because
> many of the functions already have more arguments than can be passed in
> registers.  Could it be e.g. added into gomp_coalesce_buf which is already
> passed around?
> 
> Another option would be to use always_inline as C template if the OpenMP and
> OpenACC needs diverge too much, then have simply small wrappers that just
> call the always_inline function, in one case with the argument NULL or other
> constant, in another one with whatever it has been called with.
> 
> 	Jakub
> 
I have revised the patch to make both gomp_[un]map_vars and gomp_[un]map_vars_async
point to gomp_[un]map_vars_internal, which is static always_inline. This should
alleviate that part of the concerns.

Thanks,
Chung-Lin


[-- Attachment #2: async-04.libgomp-target.v2.patch --]
[-- Type: text/plain, Size: 17343 bytes --]

Index: libgomp/target.c
===================================================================
--- libgomp/target.c	(revision 266973)
+++ libgomp/target.c	(working copy)
@@ -177,6 +177,22 @@ gomp_device_copy (struct gomp_device_descr *device
     }
 }
 
+static inline void
+goacc_device_copy_async (struct gomp_device_descr *devicep,
+			 bool (*copy_func) (int, void *, const void *, size_t,
+					    struct goacc_asyncqueue *),
+			 const char *dst, void *dstaddr,
+			 const char *src, const void *srcaddr,
+			 size_t size, struct goacc_asyncqueue *aq)
+{
+  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
+		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
+    }
+}
+
 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
    host to device memory transfers.  */
 
@@ -263,8 +279,9 @@ gomp_to_device_kind_p (int kind)
     }
 }
 
-static void
+attribute_hidden void
 gomp_copy_host2dev (struct gomp_device_descr *devicep,
+		    struct goacc_asyncqueue *aq,
 		    void *d, const void *h, size_t sz,
 		    struct gomp_coalesce_buf *cbuf)
 {
@@ -293,14 +310,23 @@ gomp_copy_host2dev (struct gomp_device_descr *devi
 	    }
 	}
     }
-  gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
+  if (aq)
+    goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
+			     "dev", d, "host", h, sz, aq);
+  else
+    gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
 }
 
-static void
+attribute_hidden void
 gomp_copy_dev2host (struct gomp_device_descr *devicep,
+		    struct goacc_asyncqueue *aq,
 		    void *h, const void *d, size_t sz)
 {
-  gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
+  if (aq)
+    goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
+			     "host", h, "dev", d, sz, aq);
+  else
+    gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
 }
 
 static void
@@ -318,7 +344,8 @@ gomp_free_device_memory (struct gomp_device_descr
    Helper function of gomp_map_vars.  */
 
 static inline void
-gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
+gomp_map_vars_existing (struct gomp_device_descr *devicep,
+			struct goacc_asyncqueue *aq, splay_tree_key oldn,
 			splay_tree_key newn, struct target_var_desc *tgt_var,
 			unsigned char kind, struct gomp_coalesce_buf *cbuf)
 {
@@ -340,7 +367,7 @@ static inline void
     }
 
   if (GOMP_MAP_ALWAYS_TO_P (kind))
-    gomp_copy_host2dev (devicep,
+    gomp_copy_host2dev (devicep, aq,
 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
 				  + newn->host_start - oldn->host_start),
 			(void *) newn->host_start,
@@ -358,8 +385,8 @@ get_kind (bool short_mapkind, void *kinds, int idx
 }
 
 static void
-gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
-		  uintptr_t target_offset, uintptr_t bias,
+gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
+		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
 		  struct gomp_coalesce_buf *cbuf)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
@@ -370,7 +397,7 @@ static void
   if (cur_node.host_start == (uintptr_t) NULL)
     {
       cur_node.tgt_offset = (uintptr_t) NULL;
-      gomp_copy_host2dev (devicep,
+      gomp_copy_host2dev (devicep, aq,
 			  (void *) (tgt->tgt_start + target_offset),
 			  (void *) &cur_node.tgt_offset,
 			  sizeof (void *), cbuf);
@@ -392,12 +419,13 @@ static void
      array section.  Now subtract bias to get what we want
      to initialize the pointer with.  */
   cur_node.tgt_offset -= bias;
-  gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
+  gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
 		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
 }
 
 static void
-gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
+gomp_map_fields_existing (struct target_mem_desc *tgt,
+			  struct goacc_asyncqueue *aq, splay_tree_key n,
 			  size_t first, size_t i, void **hostaddrs,
 			  size_t *sizes, void *kinds,
 			  struct gomp_coalesce_buf *cbuf)
@@ -417,7 +445,7 @@ static void
       && n2->tgt == n->tgt
       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
     {
-      gomp_map_vars_existing (devicep, n2, &cur_node,
+      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
 			      &tgt->list[i], kind & typemask, cbuf);
       return;
     }
@@ -433,8 +461,8 @@ static void
 	      && n2->host_start - n->host_start
 		 == n2->tgt_offset - n->tgt_offset)
 	    {
-	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
-				      kind & typemask, cbuf);
+	      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
+				      &tgt->list[i], kind & typemask, cbuf);
 	      return;
 	    }
 	}
@@ -445,7 +473,7 @@ static void
 	  && n2->tgt == n->tgt
 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
 	{
-	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
 				  kind & typemask, cbuf);
 	  return;
 	}
@@ -477,10 +505,12 @@ gomp_map_val (struct target_mem_desc *tgt, void **
   return tgt->tgt_start + tgt->list[i].offset;
 }
 
-attribute_hidden struct target_mem_desc *
-gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
-	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
-	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
+static inline __attribute__((always_inline)) struct target_mem_desc *
+gomp_map_vars_internal (struct gomp_device_descr *devicep,
+			struct goacc_asyncqueue *aq, size_t mapnum,
+			void **hostaddrs, void **devaddrs, size_t *sizes,
+			void *kinds, bool short_mapkind,
+			enum gomp_map_vars_kind pragma_kind)
 {
   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
   bool has_firstprivate = false;
@@ -594,7 +624,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
 	      continue;
 	    }
 	  for (i = first; i <= last; i++)
-	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
 				      sizes, kinds, NULL);
 	  i--;
 	  continue;
@@ -639,7 +669,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
       else
 	n = splay_tree_lookup (mem_map, &cur_node);
       if (n && n->refcount != REFCOUNT_LINK)
-	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
+	gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
 				kind & typemask, NULL);
       else
 	{
@@ -750,7 +780,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
 		tgt->list[i].offset = tgt_size;
 		len = sizes[i];
-		gomp_copy_host2dev (devicep,
+		gomp_copy_host2dev (devicep, aq,
 				    (void *) (tgt->tgt_start + tgt_size),
 				    (void *) hostaddrs[i], len, cbufp);
 		tgt_size += len;
@@ -784,7 +814,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
 		    continue;
 		  }
 		for (i = first; i <= last; i++)
-		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
 					    sizes, kinds, cbufp);
 		i--;
 		continue;
@@ -804,7 +834,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
 		if (cur_node.tgt_offset)
 		  cur_node.tgt_offset -= sizes[i];
-		gomp_copy_host2dev (devicep,
+		gomp_copy_host2dev (devicep, aq,
 				    (void *) (n->tgt->tgt_start
 					      + n->tgt_offset
 					      + cur_node.host_start
@@ -825,7 +855,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
 	      k->host_end = k->host_start + sizeof (void *);
 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
 	    if (n && n->refcount != REFCOUNT_LINK)
-	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
+	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
 				      kind & typemask, cbufp);
 	    else
 	      {
@@ -878,7 +908,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
 		  case GOMP_MAP_FORCE_TOFROM:
 		  case GOMP_MAP_ALWAYS_TO:
 		  case GOMP_MAP_ALWAYS_TOFROM:
-		    gomp_copy_host2dev (devicep,
+		    gomp_copy_host2dev (devicep, aq,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
@@ -885,11 +915,12 @@ gomp_map_val (struct target_mem_desc *tgt, void **
 					k->host_end - k->host_start, cbufp);
 		    break;
 		  case GOMP_MAP_POINTER:
-		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
+		    gomp_map_pointer (tgt, aq,
+				      (uintptr_t) *(void **) k->host_start,
 				      k->tgt_offset, sizes[i], cbufp);
 		    break;
 		  case GOMP_MAP_TO_PSET:
-		    gomp_copy_host2dev (devicep,
+		    gomp_copy_host2dev (devicep, aq,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
@@ -911,7 +942,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
 			  tgt->list[j].always_copy_from = false;
 			  if (k->refcount != REFCOUNT_INFINITY)
 			    k->refcount++;
-			  gomp_map_pointer (tgt,
+			  gomp_map_pointer (tgt, aq,
 					    (uintptr_t) *(void **) hostaddrs[j],
 					    k->tgt_offset
 					    + ((uintptr_t) hostaddrs[j]
@@ -940,7 +971,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
 		    break;
 		  case GOMP_MAP_FORCE_DEVICEPTR:
 		    assert (k->host_end - k->host_start == sizeof (void *));
-		    gomp_copy_host2dev (devicep,
+		    gomp_copy_host2dev (devicep, aq,
 					(void *) (tgt->tgt_start
 						  + k->tgt_offset),
 					(void *) k->host_start,
@@ -959,7 +990,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
 		    /* We intentionally do not use coalescing here, as it's not
 		       data allocated by the current call to this function.  */
-		    gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
+		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
 					&tgt_addr, sizeof (void *), NULL);
 		  }
 		array++;
@@ -972,7 +1003,7 @@ gomp_map_val (struct target_mem_desc *tgt, void **
       for (i = 0; i < mapnum; i++)
 	{
 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
-	  gomp_copy_host2dev (devicep,
+	  gomp_copy_host2dev (devicep, aq,
 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
 			      (void *) &cur_node.tgt_offset, sizeof (void *),
 			      cbufp);
@@ -983,7 +1014,8 @@ gomp_map_val (struct target_mem_desc *tgt, void **
     {
       long c = 0;
       for (c = 0; c < cbuf.chunk_cnt; ++c)
-	gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
+	gomp_copy_host2dev (devicep, aq,
+			    (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
 			    (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
 			    cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
       free (cbuf.buf);
@@ -1002,7 +1034,27 @@ gomp_map_val (struct target_mem_desc *tgt, void **
   return tgt;
 }
 
-static void
+attribute_hidden struct target_mem_desc *
+gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
+	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
+	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
+{
+  return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
+				 sizes, kinds, short_mapkind, pragma_kind);
+}
+
+attribute_hidden struct target_mem_desc *
+gomp_map_vars_async (struct gomp_device_descr *devicep,
+		     struct goacc_asyncqueue *aq, size_t mapnum,
+		     void **hostaddrs, void **devaddrs, size_t *sizes,
+		     void *kinds, bool short_mapkind,
+		     enum gomp_map_vars_kind pragma_kind)
+{
+  return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
+				 sizes, kinds, short_mapkind, pragma_kind);
+}
+
+attribute_hidden void
 gomp_unmap_tgt (struct target_mem_desc *tgt)
 {
   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
@@ -1034,8 +1086,9 @@ gomp_remove_var (struct gomp_device_descr *devicep
    variables back from device to host: if it is false, it is assumed that this
    has been done already.  */
 
-attribute_hidden void
-gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
+static inline __attribute__((always_inline)) void
+gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
+			  struct goacc_asyncqueue *aq)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
 
@@ -1072,7 +1125,7 @@ gomp_remove_var (struct gomp_device_descr *devicep
 
       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
 	  || tgt->list[i].always_copy_from)
-	gomp_copy_dev2host (devicep,
+	gomp_copy_dev2host (devicep, aq,
 			    (void *) (k->host_start + tgt->list[i].offset),
 			    (void *) (k->tgt->tgt_start + k->tgt_offset
 				      + tgt->list[i].offset),
@@ -1089,6 +1142,19 @@ gomp_remove_var (struct gomp_device_descr *devicep
   gomp_mutex_unlock (&devicep->lock);
 }
 
+attribute_hidden void
+gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
+{
+  gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
+}
+
+attribute_hidden void
+gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
+		       struct goacc_asyncqueue *aq)
+{
+  gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
+}
+
 static void
 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
 	     size_t *sizes, void *kinds, bool short_mapkind)
@@ -1138,9 +1204,10 @@ gomp_update (struct gomp_device_descr *devicep, si
 	    size_t size = cur_node.host_end - cur_node.host_start;
 
 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
-	      gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
+	      gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
+				  NULL);
 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
-	      gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
+	      gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
 	  }
       }
   gomp_mutex_unlock (&devicep->lock);
@@ -1433,9 +1500,21 @@ gomp_init_device (struct gomp_device_descr *device
 				   false);
     }
 
+  /* Initialize OpenACC asynchronous queues.  */
+  goacc_init_asyncqueues (devicep);
+
   devicep->state = GOMP_DEVICE_INITIALIZED;
 }
 
+attribute_hidden bool
+gomp_fini_device (struct gomp_device_descr *devicep)
+{
+  bool ret = goacc_fini_asyncqueues (devicep);
+  ret &= devicep->fini_device_func (devicep->target_id);
+  devicep->state = GOMP_DEVICE_FINALIZED;
+  return ret;
+}
+
 attribute_hidden void
 gomp_unload_device (struct gomp_device_descr *devicep)
 {
@@ -1944,7 +2023,7 @@ gomp_exit_data (struct gomp_device_descr *devicep,
 
 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
 	      || kind == GOMP_MAP_ALWAYS_FROM)
-	    gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
+	    gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
 				(void *) (k->tgt->tgt_start + k->tgt_offset
 					  + cur_node.host_start
 					  - k->host_start),
@@ -2626,20 +2705,20 @@ gomp_load_plugin_for_device (struct gomp_device_de
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
     {
       if (!DLSYM_OPT (openacc.exec, openacc_exec)
-	  || !DLSYM_OPT (openacc.register_async_cleanup,
-			 openacc_register_async_cleanup)
-	  || !DLSYM_OPT (openacc.async_test, openacc_async_test)
-	  || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
-	  || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
-	  || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
-	  || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
-	  || !DLSYM_OPT (openacc.async_wait_all_async,
-			 openacc_async_wait_all_async)
-	  || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
 	  || !DLSYM_OPT (openacc.create_thread_data,
 			 openacc_create_thread_data)
 	  || !DLSYM_OPT (openacc.destroy_thread_data,
-			 openacc_destroy_thread_data))
+			 openacc_destroy_thread_data)
+	  || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
+	  || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
+	  || !DLSYM_OPT (openacc.async.test, openacc_async_test)
+	  || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
+	  || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
+	  || !DLSYM_OPT (openacc.async.queue_callback,
+			 openacc_async_queue_callback)
+	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
+	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
+	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
 	{
 	  /* Require all the OpenACC handlers if we have
 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
@@ -2690,10 +2769,7 @@ gomp_target_fini (void)
       struct gomp_device_descr *devicep = &devices[i];
       gomp_mutex_lock (&devicep->lock);
       if (devicep->state == GOMP_DEVICE_INITIALIZED)
-	{
-	  ret = devicep->fini_device_func (devicep->target_id);
-	  devicep->state = GOMP_DEVICE_FINALIZED;
-	}
+	ret = gomp_fini_device (devicep);
       gomp_mutex_unlock (&devicep->lock);
       if (!ret)
 	gomp_fatal ("device finalization failed");

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes (revised, v2)
  2018-12-11 13:47     ` [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes (revised, v2) Chung-Lin Tang
@ 2018-12-13 10:19       ` Jakub Jelinek
  0 siblings, 0 replies; 5+ messages in thread
From: Jakub Jelinek @ 2018-12-13 10:19 UTC (permalink / raw)
  To: cltang; +Cc: Thomas Schwinge, gcc-patches

On Tue, Dec 11, 2018 at 09:47:10PM +0800, Chung-Lin Tang wrote:
> I have revised the patch to make both gomp_[un]map_vars and gomp_[un]map_vars_async
> point to gomp_[un]map_vars_internal, which is static always_inline. This should
> alleviate that part of the concerns.

> @@ -263,8 +279,9 @@ gomp_to_device_kind_p (int kind)
>      }
>  }
>  
> -static void
> +attribute_hidden void
>  gomp_copy_host2dev (struct gomp_device_descr *devicep,
> +		    struct goacc_asyncqueue *aq,
>  		    void *d, const void *h, size_t sz,
>  		    struct gomp_coalesce_buf *cbuf)

Have you tried sticking the struct goacc_asyncqueue * into struct
gomp_coalesce_buf?  If that doesn't work for some reason (please explain
why), then I'd prefer that argument to come last, not second, various
targets have small limits on how many arguments they can pass in registers.

> @@ -293,14 +310,23 @@ gomp_copy_host2dev (struct gomp_device_descr *devi
>  	    }
>  	}
>      }
> -  gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
> +  if (aq)

Can you please use __builtin_expect (aq != NULL, 0) here?  Because ptr != NULL
test is by default predicted more likely than ptr == NULL and the gomp_device_copy
call is in there for both all OpenMP and for OpenACC except for async, so
more likely.

> +    goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
> +			     "dev", d, "host", h, sz, aq);
> +  else
> +    gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
>  }
>  
> -static void
> +attribute_hidden void
>  gomp_copy_dev2host (struct gomp_device_descr *devicep,
> +		    struct goacc_asyncqueue *aq,
>  		    void *h, const void *d, size_t sz)
>  {
> -  gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
> +  if (aq)

Likewise.

	Jakub

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-12-13 10:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-25 13:11 [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes Chung-Lin Tang
2018-12-06 17:21 ` Thomas Schwinge
2018-12-06 17:43   ` Jakub Jelinek
2018-12-11 13:47     ` [PATCH 4/6, OpenACC, libgomp] Async re-work, libgomp/target.c changes (revised, v2) Chung-Lin Tang
2018-12-13 10:19       ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).