public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [gomp4.1] Support #pragma omp target {enter,exit} data
@ 2015-06-30 12:25 Ilya Verbin
  2015-06-30 12:57 ` Jakub Jelinek
  0 siblings, 1 reply; 18+ messages in thread
From: Ilya Verbin @ 2015-06-30 12:25 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches; +Cc: Kirill Yukhin, Thomas Schwinge

Hi!

This patch implements GOMP_target_enter_exit_data in libgomp, also it fixes a
bug in gomp_map_vars_existing.
make check-target-libgomp passed.
However, I am afraid that there may be some hard-to-find issues (like memory
leaks) in cases of mixed (structured+unstructured) data mappings...
OK for gomp-4_1-branch?


libgomp/
	* target.c (gomp_map_vars_existing): Fix target address for 'always to'
	array sections.
	(gomp_unmap_vars): Decrement k->refcount when it's 1 and
	k->async_refcount is 0.
	(GOMP_target_enter_exit_data): Add mapping/unmapping.
	* testsuite/libgomp.c/target-11.c: Extend for testing 'always to' array
	sections.
	* testsuite/libgomp.c/target-12.c: New test.


diff --git a/libgomp/target.c b/libgomp/target.c
index a394e95..83ca827 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -171,7 +171,8 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
 
   if (GOMP_MAP_ALWAYS_TO_P (kind))
     devicep->host2dev_func (devicep->target_id,
-			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset),
+			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+				      + newn->host_start - oldn->host_start),
 			    (void *) newn->host_start,
 			    newn->host_end - newn->host_start);
   oldn->refcount++;
@@ -580,10 +581,16 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
       bool do_unmap = false;
       if (k->refcount > 1)
 	k->refcount--;
-      else if (k->async_refcount > 0)
-	k->async_refcount--;
-      else
-	do_unmap = true;
+      else if (k->refcount == 1)
+	{
+	  if (k->async_refcount > 0)
+	    k->async_refcount--;
+	  else
+	    {
+	      k->refcount--;
+	      do_unmap = true;
+	    }
+	}
 
       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
 	  || tgt->list[i].always_copy_from)
@@ -1160,13 +1167,61 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
     }
 
   if (is_enter_data)
-    {
-      /* TODO  */
-    }
+    gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, false);
   else
-    {
-      /* TODO  */
-    }
+    for (i = 0; i < mapnum; i++)
+      {
+	struct splay_tree_key_s cur_node;
+	unsigned char kind = kinds[i] & typemask;
+	switch (kind)
+	  {
+	  case GOMP_MAP_FROM:
+	  case GOMP_MAP_ALWAYS_FROM:
+	  case GOMP_MAP_DELETE:
+	  case GOMP_MAP_RELEASE:
+	    cur_node.host_start = (uintptr_t) hostaddrs[i];
+	    cur_node.host_end = cur_node.host_start + sizes[i];
+	    gomp_mutex_lock (&devicep->lock);
+	    splay_tree_key k = splay_tree_lookup (&devicep->mem_map, &cur_node);
+	    if (!k)
+	      {
+		gomp_mutex_unlock (&devicep->lock);
+		continue;
+	      }
+
+	    if (k->refcount > 0)
+	      k->refcount--;
+	    if (kind == GOMP_MAP_DELETE)
+	      k->refcount = 0;
+
+	    if ((kind == GOMP_MAP_FROM && k->refcount == 0)
+		|| kind == GOMP_MAP_ALWAYS_FROM)
+	      devicep->dev2host_func (devicep->target_id,
+				      (void *) cur_node.host_start,
+				      (void *) (k->tgt->tgt_start
+						+ k->tgt_offset
+						+ cur_node.host_start
+						- k->host_start),
+				      cur_node.host_end - cur_node.host_start);
+	    if (k->refcount == 0)
+	      {
+		splay_tree_remove (&devicep->mem_map, k);
+		if (k->tgt->refcount > 1)
+		  k->tgt->refcount--;
+		else
+		  gomp_unmap_tgt (k->tgt);
+	      }
+
+	    gomp_mutex_unlock (&devicep->lock);
+	    break;
+	  case GOMP_MAP_POINTER:
+	  case GOMP_MAP_TO_PSET:
+	    break;
+	  default:
+	    gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
+			kind);
+	  }
+      }
 }
 
 void
diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c
index b86097a..98882f0 100644
--- a/libgomp/testsuite/libgomp.c/target-11.c
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -9,6 +9,17 @@ void test_array_section (int *p)
 {
   #pragma omp target data map(alloc: p[0:N])
     {
+      int ok = 1;
+      for (int i = 10; i < 10 + 4; i++)
+	p[i] = 997 * i;
+
+      #pragma omp target map(always to:p[10:4]) map(tofrom: ok)
+	for (int i = 10; i < 10 + 4; i++)
+	  if (p[i] != 997 * i)
+	    ok = 0;
+
+      assert (ok);
+
       #pragma omp target map(always from:p[7:9])
 	for (int i = 0; i < N; i++)
 	  p[i] = i;
diff --git a/libgomp/testsuite/libgomp.c/target-12.c b/libgomp/testsuite/libgomp.c/target-12.c
new file mode 100644
index 0000000..e22f765
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-12.c
@@ -0,0 +1,98 @@
+/* { dg-require-effective-target offload_device } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 32
+
+int sum;
+int var1 = 1;
+int var2 = 2;
+
+void enter_data (int *X)
+{
+  #pragma omp target enter data map(to: var1, var2, X[:N]) map(alloc: sum)
+}
+
+void exit_data_1 ()
+{
+  #pragma omp target exit data map(from: var1)
+}
+
+void exit_data_2 ()
+{
+  #pragma omp target exit data map(from: var2)
+}
+
+void test_nested ()
+{
+  int X = 0, Y = 0, Z = 0;
+
+  #pragma omp target data map(from: X, Y, Z)
+    {
+      #pragma omp target data map(from: X, Y, Z)
+	{
+	  #pragma omp target map(from: X, Y, Z)
+	    X = Y = Z = 1337;
+	  assert (X == 0);
+	  assert (Y == 0);
+	  assert (Z == 0);
+
+	  #pragma omp target exit data map(from: X) map(release: Y)
+	  assert (X == 0);
+	  assert (Y == 0);
+
+	  #pragma omp target exit data map(release: Y) map(delete: Z)
+	  assert (Y == 0);
+	  assert (Z == 0);
+	}
+      assert (X == 1337);
+      assert (Y == 0);
+      assert (Z == 0);
+
+      #pragma omp target map(from: X)
+	X = 2448;
+      assert (X == 2448);
+      assert (Y == 0);
+      assert (Z == 0);
+
+      X = 4896;
+    }
+  assert (X == 4896);
+  assert (Y == 0);
+  assert (Z == 0);
+}
+
+int main ()
+{
+  int *X = malloc (N * sizeof (int));
+  int *Y = malloc (N * sizeof (int));
+  X[10] = 10;
+  Y[20] = 20;
+  enter_data (X);
+
+  #pragma omp target map(alloc: X[:N]) map(to: Y[:N]) map(always from: sum)
+    {
+      var1 += X[10];
+      var2 += Y[20];
+      sum = var1 + var2;
+    }
+
+  free (X);
+  free (Y);
+
+  assert (var1 == 1);
+  assert (var2 == 2);
+  assert (sum == 33);
+
+  exit_data_1 ();
+  assert (var1 == 11);
+  assert (var2 == 2);
+
+  exit_data_2 ();
+  assert (var2 == 22);
+
+  test_nested ();
+
+  return 0;
+}


    -- Ilya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-06-30 12:25 [gomp4.1] Support #pragma omp target {enter,exit} data Ilya Verbin
@ 2015-06-30 12:57 ` Jakub Jelinek
  2015-06-30 15:47   ` Ilya Verbin
  0 siblings, 1 reply; 18+ messages in thread
From: Jakub Jelinek @ 2015-06-30 12:57 UTC (permalink / raw)
  To: Ilya Verbin; +Cc: gcc-patches, Kirill Yukhin, Thomas Schwinge

On Tue, Jun 30, 2015 at 03:19:30PM +0300, Ilya Verbin wrote:
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -580,10 +581,16 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
>        bool do_unmap = false;
>        if (k->refcount > 1)
>  	k->refcount--;
> -      else if (k->async_refcount > 0)
> -	k->async_refcount--;
> -      else
> -	do_unmap = true;
> +      else if (k->refcount == 1)
> +	{
> +	  if (k->async_refcount > 0)
> +	    k->async_refcount--;
> +	  else
> +	    {
> +	      k->refcount--;
> +	      do_unmap = true;
> +	    }
> +	}

What is the rationale of this hunk change?
BTW, we'll likely need to treat also refcount == INT_MAX as special (never
decrease it), because I believe declare target vars are supposed to have
refcount of infinity rather than just 2GB-1.

> @@ -1160,13 +1167,61 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
>      }
>  
>    if (is_enter_data)
> -    {
> -      /* TODO  */
> -    }
> +    gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, false);

This will leak the return value.  Either we need to arrange not to allocate
it for enter data, or we need to assign it to some variable and free
immediately (we don't want to perform the release operations for it).

>    else
> -    {
> -      /* TODO  */
> -    }
> +    for (i = 0; i < mapnum; i++)
> +      {
> +	struct splay_tree_key_s cur_node;
> +	unsigned char kind = kinds[i] & typemask;
> +	switch (kind)
> +	  {
> +	  case GOMP_MAP_FROM:
> +	  case GOMP_MAP_ALWAYS_FROM:
> +	  case GOMP_MAP_DELETE:
> +	  case GOMP_MAP_RELEASE:
> +	    cur_node.host_start = (uintptr_t) hostaddrs[i];
> +	    cur_node.host_end = cur_node.host_start + sizes[i];
> +	    gomp_mutex_lock (&devicep->lock);

I don't really like locking the mutex for each map clause in exit data
separately.  Perhaps just add a gomp_exit_data function similar to
gomp_map_vars that will run this loop and be surrounded by the locking,
or do it inline, but with the lock/unlock around the whole loop.
exit data construct must have at least one map clause, so it doesn't make
sense not to lock immediately.

> +	    splay_tree_key k = splay_tree_lookup (&devicep->mem_map, &cur_node);
> +	    if (!k)
> +	      {
> +		gomp_mutex_unlock (&devicep->lock);
> +		continue;
> +	      }
> +
> +	    if (k->refcount > 0)
> +	      k->refcount--;
> +	    if (kind == GOMP_MAP_DELETE)
> +	      k->refcount = 0;

See above, I believe delete should not delete refcount == INT_MAX
mappings.

	Jakub

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-06-30 12:57 ` Jakub Jelinek
@ 2015-06-30 15:47   ` Ilya Verbin
  2015-06-30 16:24     ` Jakub Jelinek
  0 siblings, 1 reply; 18+ messages in thread
From: Ilya Verbin @ 2015-06-30 15:47 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Kirill Yukhin, Thomas Schwinge

On Tue, Jun 30, 2015 at 14:57:02 +0200, Jakub Jelinek wrote:
> On Tue, Jun 30, 2015 at 03:19:30PM +0300, Ilya Verbin wrote:
> > --- a/libgomp/target.c
> > +++ b/libgomp/target.c
> > @@ -580,10 +581,16 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
> >        bool do_unmap = false;
> >        if (k->refcount > 1)
> >  	k->refcount--;
> > -      else if (k->async_refcount > 0)
> > -	k->async_refcount--;
> > -      else
> > -	do_unmap = true;
> > +      else if (k->refcount == 1)
> > +	{
> > +	  if (k->async_refcount > 0)
> > +	    k->async_refcount--;
> > +	  else
> > +	    {
> > +	      k->refcount--;
> > +	      do_unmap = true;
> > +	    }
> > +	}
> 
> What is the rationale of this hunk change?

Without whis change, when k->refcount == 1, do_unmap is true, but refcount is
not decremented.  So, if gomp_unmap_vars is called multiple times (now it's
possible for 4.1), refcount will remain 1, and it will try to unmap k at each
next call, that is wrong.  That's why I decrement refcount to zero, and do
nothing when hit gomp_unmap_vars next time with k->refcount == 0.

> BTW, we'll likely need to treat also refcount == INT_MAX as special (never
> decrease it), because I believe declare target vars are supposed to have
> refcount of infinity rather than just 2GB-1.

I'll add special refcount for declare target vars.

> > @@ -1160,13 +1167,61 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
> >      }
> >  
> >    if (is_enter_data)
> > -    {
> > -      /* TODO  */
> > -    }
> > +    gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, false);
> 
> This will leak the return value.  Either we need to arrange not to allocate
> it for enter data, or we need to assign it to some variable and free
> immediately (we don't want to perform the release operations for it).

But we can't not allocate or free immediately it, since it's used later through
splay_tree_key_s::tgt, e.g. here:

  if (is_target)
    {
      for (i = 0; i < mapnum; i++)
	{
	  if (tgt->list[i].key == NULL)
	    cur_node.tgt_offset = (uintptr_t) NULL;
	  else
	    cur_node.tgt_offset = tgt->list[i].key->tgt->tgt_start
				  + tgt->list[i].key->tgt_offset;

My plan was to free tgt here:

+	    if (k->refcount == 0)
+	      {
+		splay_tree_remove (&devicep->mem_map, k);
+		if (k->tgt->refcount > 1)
+		  k->tgt->refcount--;
+		else
+		  gomp_unmap_tgt (k->tgt);
+	      }

But now I understood that this will work only for simple cases like:

  #pragma omp target enter data ...
  ...
  #pragma omp target exit data ...

And will leak e.g. in:

  #pragma omp target data ...
    {
      #pragma omp target enter data ...
    }

> >    else
> > -    {
> > -      /* TODO  */
> > -    }
> > +    for (i = 0; i < mapnum; i++)
> > +      {
> > +	struct splay_tree_key_s cur_node;
> > +	unsigned char kind = kinds[i] & typemask;
> > +	switch (kind)
> > +	  {
> > +	  case GOMP_MAP_FROM:
> > +	  case GOMP_MAP_ALWAYS_FROM:
> > +	  case GOMP_MAP_DELETE:
> > +	  case GOMP_MAP_RELEASE:
> > +	    cur_node.host_start = (uintptr_t) hostaddrs[i];
> > +	    cur_node.host_end = cur_node.host_start + sizes[i];
> > +	    gomp_mutex_lock (&devicep->lock);
> 
> I don't really like locking the mutex for each map clause in exit data
> separately.  Perhaps just add a gomp_exit_data function similar to
> gomp_map_vars that will run this loop and be surrounded by the locking,
> or do it inline, but with the lock/unlock around the whole loop.
> exit data construct must have at least one map clause, so it doesn't make
> sense not to lock immediately.

I'll move locks outside of the loop.

> > +	    splay_tree_key k = splay_tree_lookup (&devicep->mem_map, &cur_node);
> > +	    if (!k)
> > +	      {
> > +		gomp_mutex_unlock (&devicep->lock);
> > +		continue;
> > +	      }
> > +
> > +	    if (k->refcount > 0)
> > +	      k->refcount--;
> > +	    if (kind == GOMP_MAP_DELETE)
> > +	      k->refcount = 0;
> 
> See above, I believe delete should not delete refcount == INT_MAX
> mappings.

Will do that.

  -- Ilya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-06-30 15:47   ` Ilya Verbin
@ 2015-06-30 16:24     ` Jakub Jelinek
  2015-07-01 21:07       ` Ilya Verbin
  0 siblings, 1 reply; 18+ messages in thread
From: Jakub Jelinek @ 2015-06-30 16:24 UTC (permalink / raw)
  To: Ilya Verbin; +Cc: gcc-patches, Kirill Yukhin, Thomas Schwinge

On Tue, Jun 30, 2015 at 06:42:01PM +0300, Ilya Verbin wrote:
> On Tue, Jun 30, 2015 at 14:57:02 +0200, Jakub Jelinek wrote:
> > On Tue, Jun 30, 2015 at 03:19:30PM +0300, Ilya Verbin wrote:
> > > --- a/libgomp/target.c
> > > +++ b/libgomp/target.c
> > > @@ -580,10 +581,16 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
> > >        bool do_unmap = false;
> > >        if (k->refcount > 1)
> > >  	k->refcount--;
> > > -      else if (k->async_refcount > 0)
> > > -	k->async_refcount--;
> > > -      else
> > > -	do_unmap = true;
> > > +      else if (k->refcount == 1)
> > > +	{
> > > +	  if (k->async_refcount > 0)
> > > +	    k->async_refcount--;
> > > +	  else
> > > +	    {
> > > +	      k->refcount--;
> > > +	      do_unmap = true;
> > > +	    }
> > > +	}
> > 
> > What is the rationale of this hunk change?
> 
> Without whis change, when k->refcount == 1, do_unmap is true, but refcount is
> not decremented.  So, if gomp_unmap_vars is called multiple times (now it's
> possible for 4.1), refcount will remain 1, and it will try to unmap k at each
> next call, that is wrong.  That's why I decrement refcount to zero, and do
> nothing when hit gomp_unmap_vars next time with k->refcount == 0.

Ok.

> > >    if (is_enter_data)
> > > -    {
> > > -      /* TODO  */
> > > -    }
> > > +    gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, false);
> > 
> > This will leak the return value.  Either we need to arrange not to allocate
> > it for enter data, or we need to assign it to some variable and free
> > immediately (we don't want to perform the release operations for it).
> 
> But we can't not allocate or free immediately it, since it's used later through
> splay_tree_key_s::tgt, e.g. here:
> 
>   if (is_target)
>     {
>       for (i = 0; i < mapnum; i++)
> 	{
> 	  if (tgt->list[i].key == NULL)
> 	    cur_node.tgt_offset = (uintptr_t) NULL;
> 	  else
> 	    cur_node.tgt_offset = tgt->list[i].key->tgt->tgt_start
> 				  + tgt->list[i].key->tgt_offset;

The thing is whether it is actually a good idea to allocate the enter data
allocated objects together.
In OpenMP 4.0, generally objects would be allocated and deallocated at the
same times, except for multiple host threads trying to map the same variables
into the target.  In OpenMP 4.1, due to enter data/exit data, they can be
allocated and freed quite independently, and it is true that is the case
even for target data, one can either target data, then target enter data
to prevent something from being deallocated, then target data end freeing
only parts, etc.  So the question is if we think in real-world the
allocation or deallocation will be usually together or not.

	Jakub

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-06-30 16:24     ` Jakub Jelinek
@ 2015-07-01 21:07       ` Ilya Verbin
  2015-07-06 15:34         ` Ilya Verbin
  0 siblings, 1 reply; 18+ messages in thread
From: Ilya Verbin @ 2015-07-01 21:07 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Kirill Yukhin, Thomas Schwinge

On Tue, Jun 30, 2015 at 18:10:44 +0200, Jakub Jelinek wrote:
> The thing is whether it is actually a good idea to allocate the enter data
> allocated objects together.
> In OpenMP 4.0, generally objects would be allocated and deallocated at the
> same times, except for multiple host threads trying to map the same variables
> into the target.  In OpenMP 4.1, due to enter data/exit data, they can be
> allocated and freed quite independently, and it is true that is the case
> even for target data, one can either target data, then target enter data
> to prevent something from being deallocated, then target data end freeing
> only parts, etc.  So the question is if we think in real-world the
> allocation or deallocation will be usually together or not.

IMHO, it's OK to allocate "target data" objects together and "target enter data"
objects one by one.  I've implemented this approach in the patch bellow.

However, if someone writes a program like this:

  #pragma omp target data map(tofrom: small, arr[:big])
    {
      #pragma omp target enter data map(to: small)
    }
  do_a_lot_of_something ();
  #pragma omp target exit data map(from: small)

Big array will be deallocated on target only with 'small' at the end.
Is this acceptable?


The patch is not ready though, I don't know how to unmap GOMP_MAP_POINTER vars.
In gomp_unmap_vars they're unmapped through tgt->list[], but in gomp_exit_data
it's impossible to find such var in the splay tree, because hostaddr differs
from the address, used at mapping.


libgomp/
	* target.c (gomp_map_vars_existing): Fix target address for 'always to'
	array sections.  Handle special refcount UINTPTR_MAX.
	(gomp_map_vars): Handle special refcount UINTPTR_MAX.                                
	(gomp_unmap_vars): Decrement k->refcount when it's 1 and
	k->async_refcount is 0.
	(gomp_offload_image_to_device): Set refcounts to UINTPTR_MAX.
	(gomp_exit_data): New static function.
	(GOMP_target_enter_exit_data): Add mapping/unmapping.
	* testsuite/libgomp.c/target-11.c: Extend for testing 'always to' array
	sections.
	* testsuite/libgomp.c/target-12.c: New test.


diff --git a/libgomp/target.c b/libgomp/target.c
index a394e95..20e32f8 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -171,10 +171,13 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
 
   if (GOMP_MAP_ALWAYS_TO_P (kind))
     devicep->host2dev_func (devicep->target_id,
-			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset),
+			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+				      + newn->host_start - oldn->host_start),
 			    (void *) newn->host_start,
 			    newn->host_end - newn->host_start);
-  oldn->refcount++;
+
+  if (oldn->refcount != UINTPTR_MAX)
+    oldn->refcount++;
 }
 
 static int
@@ -439,7 +442,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 			  tgt->list[j].key = k;
 			  tgt->list[j].copy_from = false;
 			  tgt->list[j].always_copy_from = false;
-			  k->refcount++;
+			  if (k->refcount != UINTPTR_MAX)
+			    k->refcount++;
 			  gomp_map_pointer (tgt,
 					    (uintptr_t) *(void **) hostaddrs[j],
 					    k->tgt_offset
@@ -578,12 +582,18 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 	continue;
 
       bool do_unmap = false;
-      if (k->refcount > 1)
+      if (k->refcount > 1 && k->refcount != UINTPTR_MAX)
 	k->refcount--;
-      else if (k->async_refcount > 0)
-	k->async_refcount--;
-      else
-	do_unmap = true;
+      else if (k->refcount == 1)
+	{
+	  if (k->async_refcount > 0)
+	    k->async_refcount--;
+	  else
+	    {
+	      k->refcount--;
+	      do_unmap = true;
+	    }
+	}
 
       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
 	  || tgt->list[i].always_copy_from)
@@ -709,7 +719,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
   /* Insert host-target address mapping into splay tree.  */
   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
-  tgt->refcount = 1;
+  tgt->refcount = UINTPTR_MAX;
   tgt->tgt_start = 0;
   tgt->tgt_end = 0;
   tgt->to_free = NULL;
@@ -725,7 +735,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
       k->host_end = k->host_start + 1;
       k->tgt = tgt;
       k->tgt_offset = target_table[i].start;
-      k->refcount = 1;
+      k->refcount = UINTPTR_MAX;
       k->async_refcount = 0;
       array->left = NULL;
       array->right = NULL;
@@ -750,7 +760,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
       k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
       k->tgt = tgt;
       k->tgt_offset = target_var->start;
-      k->refcount = 1;
+      k->refcount = UINTPTR_MAX;
       k->async_refcount = 0;
       array->left = NULL;
       array->right = NULL;
@@ -1121,6 +1131,63 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
 }
 
+static void
+gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
+		void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+  const int typemask = 0xff;
+  size_t i;
+  gomp_mutex_lock (&devicep->lock);
+  for (i = 0; i < mapnum; i++)
+    {
+      struct splay_tree_key_s cur_node;
+      unsigned char kind = kinds[i] & typemask;
+      switch (kind)
+	{
+	case GOMP_MAP_FROM:
+	case GOMP_MAP_ALWAYS_FROM:
+	case GOMP_MAP_DELETE:
+	case GOMP_MAP_RELEASE:
+	case GOMP_MAP_POINTER:
+	  cur_node.host_start = (uintptr_t) hostaddrs[i];
+	  cur_node.host_end = cur_node.host_start + sizes[i];
+	  splay_tree_key k = splay_tree_lookup (&devicep->mem_map, &cur_node);
+	  if (!k)
+	    continue;
+
+	  if (k->refcount > 0 && k->refcount != UINTPTR_MAX)
+	    k->refcount--;
+	  if (kind == GOMP_MAP_DELETE && k->refcount != UINTPTR_MAX)
+	    k->refcount = 0;
+
+	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
+	      || kind == GOMP_MAP_ALWAYS_FROM)
+	    devicep->dev2host_func (devicep->target_id,
+				    (void *) cur_node.host_start,
+				    (void *) (k->tgt->tgt_start + k->tgt_offset
+					      + cur_node.host_start
+					      - k->host_start),
+				    cur_node.host_end - cur_node.host_start);
+	  if (k->refcount == 0)
+	    {
+	      splay_tree_remove (&devicep->mem_map, k);
+	      if (k->tgt->refcount > 1)
+		k->tgt->refcount--;
+	      else
+		gomp_unmap_tgt (k->tgt);
+	    }
+
+	  break;
+	default:
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
+		      kind);
+	}
+    }
+
+  gomp_mutex_unlock (&devicep->lock);
+}
+
 void
 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
 			     size_t *sizes, unsigned short *kinds)
@@ -1160,13 +1227,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
     }
 
   if (is_enter_data)
-    {
-      /* TODO  */
-    }
+    for (i = 0; i < mapnum; i++)
+      {
+	struct target_mem_desc *tgt_var
+	  = gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i],
+			   &kinds[i], true, false);
+	tgt_var->refcount--;
+
+	/* If the variable was already mapped, tgt_var is not needed.  Otherwise
+	   tgt_var will be freed by gomp_unmap_vars or gomp_exit_data.  */
+	if (tgt_var->refcount == 0)
+	  free (tgt_var);
+      }
   else
-    {
-      /* TODO  */
-    }
+    gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
 }
 
 void
diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c
index b86097a..98882f0 100644
--- a/libgomp/testsuite/libgomp.c/target-11.c
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -9,6 +9,17 @@ void test_array_section (int *p)
 {
   #pragma omp target data map(alloc: p[0:N])
     {
+      int ok = 1;
+      for (int i = 10; i < 10 + 4; i++)
+	p[i] = 997 * i;
+
+      #pragma omp target map(always to:p[10:4]) map(tofrom: ok)
+	for (int i = 10; i < 10 + 4; i++)
+	  if (p[i] != 997 * i)
+	    ok = 0;
+
+      assert (ok);
+
       #pragma omp target map(always from:p[7:9])
 	for (int i = 0; i < N; i++)
 	  p[i] = i;
diff --git a/libgomp/testsuite/libgomp.c/target-12.c b/libgomp/testsuite/libgomp.c/target-12.c
new file mode 100644
index 0000000..abc6c0a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-12.c
@@ -0,0 +1,110 @@
+/* { dg-require-effective-target offload_device } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 40
+
+int sum;
+int var1 = 1;
+int var2 = 2;
+
+#pragma omp declare target
+int D[N];
+#pragma omp end declare target
+
+void enter_data (int *X)
+{
+  #pragma omp target enter data map(to: var1, var2, X[:N]) map(alloc: sum)
+}
+
+void exit_data_0 (int *D)
+{
+  #pragma omp target exit data map(delete: D[:N])
+}
+
+void exit_data_1 ()
+{
+  #pragma omp target exit data map(from: var1)
+}
+
+void exit_data_2 (int *X)
+{
+  #pragma omp target exit data map(from: var2) map(release: X[:N], sum)
+}
+
+void test_nested ()
+{
+  int X = 0, Y = 0, Z = 0;
+
+  #pragma omp target data map(from: X, Y, Z)
+    {
+      #pragma omp target data map(from: X, Y, Z)
+	{
+	  #pragma omp target map(from: X, Y, Z)
+	    X = Y = Z = 1337;
+	  assert (X == 0);
+	  assert (Y == 0);
+	  assert (Z == 0);
+
+	  #pragma omp target exit data map(from: X) map(release: Y)
+	  assert (X == 0);
+	  assert (Y == 0);
+
+	  #pragma omp target exit data map(release: Y) map(delete: Z)
+	  assert (Y == 0);
+	  assert (Z == 0);
+	}
+      assert (X == 1337);
+      assert (Y == 0);
+      assert (Z == 0);
+
+      #pragma omp target map(from: X)
+	X = 2448;
+      assert (X == 2448);
+      assert (Y == 0);
+      assert (Z == 0);
+
+      X = 4896;
+    }
+  assert (X == 4896);
+  assert (Y == 0);
+  assert (Z == 0);
+}
+
+int main ()
+{
+  int *X = malloc (N * sizeof (int));
+  int *Y = malloc (N * sizeof (int));
+  X[10] = 10;
+  Y[20] = 20;
+  enter_data (X);
+
+  exit_data_0 (D); /* This should have no effect on D.  */
+
+  #pragma omp target map(alloc: X[:N]) map(to: Y[:N]) map(always from: sum)
+    {
+      var1 += X[10];
+      var2 += Y[20];
+      sum = var1 + var2;
+      D[sum]++;
+    }
+
+  assert (var1 == 1);
+  assert (var2 == 2);
+  assert (sum == 33);
+
+  exit_data_1 ();
+  assert (var1 == 11);
+  assert (var2 == 2);
+
+  exit_data_2 (X);
+  assert (var2 == 22);
+
+  free (X);
+  free (Y);
+
+  test_nested ();
+
+  return 0;
+}


  -- Ilya


P.S. Also I found an ICE:

  #pragma omp declare target
  int arr[10];
  #pragma omp end declare target

  void foo (int x)
  {
    #pragma omp target map(always from: arr[0:10], x)
      arr[0];
  }

$ gcc -fopenmp -c test.c 

test.c: In function ‘foo’:
test.c:7:11: internal compiler error: Segmentation fault
   #pragma omp target map(always from: arr[0:10], x)
           ^
0xdc6562 crash_signal
	gcc/toplev.c:366
0xc574f4 lookup_sfield
	gcc/omp-low.c:1080
0xc5830d build_sender_ref
	gcc/omp-low.c:1364
0xc88be4 lower_omp_target
	gcc/omp-low.c:12898

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-01 21:07       ` Ilya Verbin
@ 2015-07-06 15:34         ` Ilya Verbin
  2015-07-06 17:25           ` Jakub Jelinek
  0 siblings, 1 reply; 18+ messages in thread
From: Ilya Verbin @ 2015-07-06 15:34 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Kirill Yukhin

On Thu, Jul 02, 2015 at 00:06:58 +0300, Ilya Verbin wrote:
> On Tue, Jun 30, 2015 at 18:10:44 +0200, Jakub Jelinek wrote:
> > The thing is whether it is actually a good idea to allocate the enter data
> > allocated objects together.
> > In OpenMP 4.0, generally objects would be allocated and deallocated at the
> > same times, except for multiple host threads trying to map the same variables
> > into the target.  In OpenMP 4.1, due to enter data/exit data, they can be
> > allocated and freed quite independently, and it is true that is the case
> > even for target data, one can either target data, then target enter data
> > to prevent something from being deallocated, then target data end freeing
> > only parts, etc.  So the question is if we think in real-world the
> > allocation or deallocation will be usually together or not.
> 
> IMHO, it's OK to allocate "target data" objects together and "target enter data"
> objects one by one.  I've implemented this approach in the patch bellow.
> 
> However, if someone writes a program like this:
> 
>   #pragma omp target data map(tofrom: small, arr[:big])
>     {
>       #pragma omp target enter data map(to: small)
>     }
>   do_a_lot_of_something ();
>   #pragma omp target exit data map(from: small)
> 
> Big array will be deallocated on target only with 'small' at the end.
> Is this acceptable?

Ping?

> The patch is not ready though, I don't know how to unmap GOMP_MAP_POINTER vars.
> In gomp_unmap_vars they're unmapped through tgt->list[], but in gomp_exit_data
> it's impossible to find such var in the splay tree, because hostaddr differs
> from the address, used at mapping.

I can keep a splay_tree_key of the GOMP_MAP_POINTER in the new field in
target_mem_desc of the previous var (i.e. corresponding memory block).
Or could you suggest a better approach?

Thanks,
  -- Ilya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-06 15:34         ` Ilya Verbin
@ 2015-07-06 17:25           ` Jakub Jelinek
  2015-07-06 18:45             ` Ilya Verbin
  0 siblings, 1 reply; 18+ messages in thread
From: Jakub Jelinek @ 2015-07-06 17:25 UTC (permalink / raw)
  To: Ilya Verbin; +Cc: gcc-patches, Kirill Yukhin

On Mon, Jul 06, 2015 at 06:34:25PM +0300, Ilya Verbin wrote:
> On Thu, Jul 02, 2015 at 00:06:58 +0300, Ilya Verbin wrote:
> > On Tue, Jun 30, 2015 at 18:10:44 +0200, Jakub Jelinek wrote:
> > > The thing is whether it is actually a good idea to allocate the enter data
> > > allocated objects together.
> > > In OpenMP 4.0, generally objects would be allocated and deallocated at the
> > > same times, except for multiple host threads trying to map the same variables
> > > into the target.  In OpenMP 4.1, due to enter data/exit data, they can be
> > > allocated and freed quite independently, and it is true that is the case
> > > even for target data, one can either target data, then target enter data
> > > to prevent something from being deallocated, then target data end freeing
> > > only parts, etc.  So the question is if we think in real-world the
> > > allocation or deallocation will be usually together or not.
> > 
> > IMHO, it's OK to allocate "target data" objects together and "target enter data"
> > objects one by one.  I've implemented this approach in the patch bellow.
> > 
> > However, if someone writes a program like this:
> > 
> >   #pragma omp target data map(tofrom: small, arr[:big])
> >     {
> >       #pragma omp target enter data map(to: small)
> >     }
> >   do_a_lot_of_something ();
> >   #pragma omp target exit data map(from: small)
> > 
> > Big array will be deallocated on target only with 'small' at the end.
> > Is this acceptable?
> 
> Ping?

I think it is.

> > The patch is not ready though, I don't know how to unmap GOMP_MAP_POINTER vars.
> > In gomp_unmap_vars they're unmapped through tgt->list[], but in gomp_exit_data
> > it's impossible to find such var in the splay tree, because hostaddr differs
> > from the address, used at mapping.
> 
> I can keep a splay_tree_key of the GOMP_MAP_POINTER in the new field in
> target_mem_desc of the previous var (i.e. corresponding memory block).
> Or could you suggest a better approach?

What exactly do you have in mind here?

void foo (int *p)
{
#pragma omp enter data (to:p[10])
...
#pragma omp exit data (from:p[10])
}

where the latter will only deallocate &p[0] ... &p[9], but not &p?
I've asked for clarification in that case, but if it should deallocate (or
decrease the counter) for &p too, then I think this is something for the
frontends to handle during handling of array sections in map clause, or
during gimplification or omp lowering.

	Jakub

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-06 17:25           ` Jakub Jelinek
@ 2015-07-06 18:45             ` Ilya Verbin
  2015-07-06 20:42               ` Jakub Jelinek
  0 siblings, 1 reply; 18+ messages in thread
From: Ilya Verbin @ 2015-07-06 18:45 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Kirill Yukhin

On Mon, Jul 06, 2015 at 19:25:09 +0200, Jakub Jelinek wrote:
> On Mon, Jul 06, 2015 at 06:34:25PM +0300, Ilya Verbin wrote:
> > On Thu, Jul 02, 2015 at 00:06:58 +0300, Ilya Verbin wrote:
> > > The patch is not ready though, I don't know how to unmap GOMP_MAP_POINTER vars.
> > > In gomp_unmap_vars they're unmapped through tgt->list[], but in gomp_exit_data
> > > it's impossible to find such var in the splay tree, because hostaddr differs
> > > from the address, used at mapping.
> > 
> > I can keep a splay_tree_key of the GOMP_MAP_POINTER in the new field in
> > target_mem_desc of the previous var (i.e. corresponding memory block).
> > Or could you suggest a better approach?
> 
> What exactly do you have in mind here?
> 
> void foo (int *p)
> {
> #pragma omp enter data (to:p[10])
> ...
> #pragma omp exit data (from:p[10])
> }
> 
> where the latter will only deallocate &p[0] ... &p[9], but not &p?
> I've asked for clarification in that case, but if it should deallocate (or
> decrease the counter) for &p too, then I think this is something for the
> frontends to handle during handling of array sections in map clause, or
> during gimplification or omp lowering.

I mean, in enter data map(to:p[10]):
1. Map GOMP_MAP_TO var as usual, and save returned target_mem_desc *tgt_var into
   last_tgt_var.
2. Map GOMP_MAP_POINTER var, and save returned tgt_var->list[0].key into
   last_tgt_var->new_special_field_for_pointer.

And in exit data map(from:p[10]):
1. Unmap GOMP_MAP_FROM var as usual, *and* deallocate (or decrease refcount) of
   k->tgt->new_special_field_for_pointer.
2. Do nothing for GOMP_MAP_POINTER var.

But I don't like this plan, there may be corner cases.

  -- Ilya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-06 18:45             ` Ilya Verbin
@ 2015-07-06 20:42               ` Jakub Jelinek
  2015-07-29 19:24                 ` Ilya Verbin
  0 siblings, 1 reply; 18+ messages in thread
From: Jakub Jelinek @ 2015-07-06 20:42 UTC (permalink / raw)
  To: Ilya Verbin; +Cc: gcc-patches, Kirill Yukhin

On Mon, Jul 06, 2015 at 09:45:30PM +0300, Ilya Verbin wrote:
> > What exactly do you have in mind here?
> > 
> > void foo (int *p)
> > {
> > #pragma omp enter data (to:p[10])
> > ...
> > #pragma omp exit data (from:p[10])
> > }
> > 
> > where the latter will only deallocate &p[0] ... &p[9], but not &p?
> > I've asked for clarification in that case, but if it should deallocate (or
> > decrease the counter) for &p too, then I think this is something for the
> > frontends to handle during handling of array sections in map clause, or
> > during gimplification or omp lowering.
> 
> I mean, in enter data map(to:p[10]):
> 1. Map GOMP_MAP_TO var as usual, and save returned target_mem_desc *tgt_var into
>    last_tgt_var.
> 2. Map GOMP_MAP_POINTER var, and save returned tgt_var->list[0].key into
>    last_tgt_var->new_special_field_for_pointer.
> 
> And in exit data map(from:p[10]):
> 1. Unmap GOMP_MAP_FROM var as usual, *and* deallocate (or decrease refcount) of
>    k->tgt->new_special_field_for_pointer.
> 2. Do nothing for GOMP_MAP_POINTER var.
> 
> But I don't like this plan, there may be corner cases.

As has been clarified on omp-lang, we actually shouldn't be mapping or
unmapping the pointer and/or reference, only the array slice itself, except
in target construct (and even for that it is changing from mapping to
private + pointer assignment).

	Jakub

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-06 20:42               ` Jakub Jelinek
@ 2015-07-29 19:24                 ` Ilya Verbin
  2015-07-30  8:27                   ` Jakub Jelinek
  0 siblings, 1 reply; 18+ messages in thread
From: Ilya Verbin @ 2015-07-29 19:24 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Kirill Yukhin

On Mon, Jul 06, 2015 at 22:42:10 +0200, Jakub Jelinek wrote:
> As has been clarified on omp-lang, we actually shouldn't be mapping or
> unmapping the pointer and/or reference, only the array slice itself, except
> in target construct (and even for that it is changing from mapping to
> private + pointer assignment).

I've updated this patch.  make check-target-libgomp passed.


libgomp/
	* target.c (gomp_map_vars_existing): Fix target address for 'always to'
	array sections.
	(gomp_unmap_vars): Decrement k->refcount when it is 1 and
	k->async_refcount is 0.
	(gomp_offload_image_to_device): Set tgt's refcount to infinity.
	(gomp_exit_data): New static function.
	(GOMP_target_enter_exit_data): Support mapping/unmapping.
	* testsuite/libgomp.c/target-11.c: Extend for testing 'always to' array
	sections.
	* testsuite/libgomp.c/target-20.c: New test.


diff --git a/libgomp/target.c b/libgomp/target.c
index ef74d43..ad375c9 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -191,7 +191,8 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
 
   if (GOMP_MAP_ALWAYS_TO_P (kind))
     devicep->host2dev_func (devicep->target_id,
-			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset),
+			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+				      + newn->host_start - oldn->host_start),
 			    (void *) newn->host_start,
 			    newn->host_end - newn->host_start);
   if (oldn->refcount != REFCOUNT_INFINITY)
@@ -664,15 +665,18 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 	continue;
 
       bool do_unmap = false;
-      if (k->refcount > 1)
+      if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
+	k->refcount--;
+      else if (k->refcount == 1)
 	{
-	  if (k->refcount != REFCOUNT_INFINITY)
-	    k->refcount--;
+	  if (k->async_refcount > 0)
+	    k->async_refcount--;
+	  else
+	    {
+	      k->refcount--;
+	      do_unmap = true;
+	    }
 	}
-      else if (k->async_refcount > 0)
-	k->async_refcount--;
-      else
-	do_unmap = true;
 
       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
 	  || tgt->list[i].always_copy_from)
@@ -798,7 +802,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
   /* Insert host-target address mapping into splay tree.  */
   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
-  tgt->refcount = 1;
+  tgt->refcount = REFCOUNT_INFINITY;
   tgt->tgt_start = 0;
   tgt->tgt_end = 0;
   tgt->to_free = NULL;
@@ -1241,6 +1245,62 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
 }
 
+static void
+gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
+		void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+  const int typemask = 0xff;
+  size_t i;
+  gomp_mutex_lock (&devicep->lock);
+  for (i = 0; i < mapnum; i++)
+    {
+      struct splay_tree_key_s cur_node;
+      unsigned char kind = kinds[i] & typemask;
+      switch (kind)
+	{
+	case GOMP_MAP_FROM:
+	case GOMP_MAP_ALWAYS_FROM:
+	case GOMP_MAP_DELETE:
+	case GOMP_MAP_RELEASE:
+	  cur_node.host_start = (uintptr_t) hostaddrs[i];
+	  cur_node.host_end = cur_node.host_start + sizes[i];
+	  splay_tree_key k = splay_tree_lookup (&devicep->mem_map, &cur_node);
+	  if (!k)
+	    continue;
+
+	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
+	    k->refcount--;
+	  if (kind == GOMP_MAP_DELETE && k->refcount != REFCOUNT_INFINITY)
+	    k->refcount = 0;
+
+	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
+	      || kind == GOMP_MAP_ALWAYS_FROM)
+	    devicep->dev2host_func (devicep->target_id,
+				    (void *) cur_node.host_start,
+				    (void *) (k->tgt->tgt_start + k->tgt_offset
+					      + cur_node.host_start
+					      - k->host_start),
+				    cur_node.host_end - cur_node.host_start);
+	  if (k->refcount == 0)
+	    {
+	      splay_tree_remove (&devicep->mem_map, k);
+	      if (k->tgt->refcount > 1)
+		k->tgt->refcount--;
+	      else
+		gomp_unmap_tgt (k->tgt);
+	    }
+
+	  break;
+	default:
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
+		      kind);
+	}
+    }
+
+  gomp_mutex_unlock (&devicep->lock);
+}
+
 void
 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
 			     size_t *sizes, unsigned short *kinds)
@@ -1259,9 +1319,6 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
     {
       unsigned char kind = kinds[i] & typemask;
 
-      if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
-	continue;
-
       if (kind == GOMP_MAP_ALLOC
 	  || kind == GOMP_MAP_TO
 	  || kind == GOMP_MAP_ALWAYS_TO)
@@ -1280,13 +1337,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
     }
 
   if (is_enter_data)
-    {
-      /* TODO  */
-    }
+    for (i = 0; i < mapnum; i++)
+      {
+	struct target_mem_desc *tgt_var
+	  = gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i],
+			   &kinds[i], true, false);
+	tgt_var->refcount--;
+
+	/* If the variable was already mapped, tgt_var is not needed.  Otherwise
+	   tgt_var will be freed by gomp_unmap_vars or gomp_exit_data.  */
+	if (tgt_var->refcount == 0)
+	  free (tgt_var);
+      }
   else
-    {
-      /* TODO  */
-    }
+    gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
 }
 
 void
diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c
index b86097a..98882f0 100644
--- a/libgomp/testsuite/libgomp.c/target-11.c
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -9,6 +9,17 @@ void test_array_section (int *p)
 {
   #pragma omp target data map(alloc: p[0:N])
     {
+      int ok = 1;
+      for (int i = 10; i < 10 + 4; i++)
+	p[i] = 997 * i;
+
+      #pragma omp target map(always to:p[10:4]) map(tofrom: ok)
+	for (int i = 10; i < 10 + 4; i++)
+	  if (p[i] != 997 * i)
+	    ok = 0;
+
+      assert (ok);
+
       #pragma omp target map(always from:p[7:9])
 	for (int i = 0; i < N; i++)
 	  p[i] = i;
diff --git a/libgomp/testsuite/libgomp.c/target-20.c b/libgomp/testsuite/libgomp.c/target-20.c
new file mode 100644
index 0000000..ec7e245
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-20.c
@@ -0,0 +1,111 @@
+/* { dg-require-effective-target offload_device } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 40
+
+int sum;
+int var1 = 1;
+int var2 = 2;
+
+#pragma omp declare target
+int D[N];
+#pragma omp end declare target
+
+void enter_data (int *X)
+{
+  #pragma omp target enter data map(to: var1, var2, X[:N]) map(alloc: sum)
+}
+
+void exit_data_0 (int *D)
+{
+  #pragma omp target exit data map(delete: D[:N])
+}
+
+void exit_data_1 ()
+{
+  #pragma omp target exit data map(from: var1)
+}
+
+void exit_data_2 (int *X)
+{
+  #pragma omp target exit data map(from: var2) map(release: X[:N], sum)
+}
+
+void test_nested ()
+{
+  int X = 0, Y = 0, Z = 0;
+
+  #pragma omp target data map(from: X, Y, Z)
+    {
+      #pragma omp target data map(from: X, Y, Z)
+	{
+	  #pragma omp target map(from: X, Y, Z)
+	    X = Y = Z = 1337;
+	  assert (X == 0);
+	  assert (Y == 0);
+	  assert (Z == 0);
+
+	  #pragma omp target exit data map(from: X) map(release: Y)
+	  assert (X == 0);
+	  assert (Y == 0);
+
+	  #pragma omp target exit data map(release: Y) map(delete: Z)
+	  assert (Y == 0);
+	  assert (Z == 0);
+	}
+      assert (X == 1337);
+      assert (Y == 0);
+      assert (Z == 0);
+
+      #pragma omp target map(from: X)
+	X = 2448;
+      assert (X == 2448);
+      assert (Y == 0);
+      assert (Z == 0);
+
+      X = 4896;
+    }
+  assert (X == 4896);
+  assert (Y == 0);
+  assert (Z == 0);
+}
+
+int main ()
+{
+  int *X = malloc (N * sizeof (int));
+  int *Y = malloc (N * sizeof (int));
+  X[10] = 10;
+  Y[20] = 20;
+  enter_data (X);
+
+  exit_data_0 (D); /* This should have no effect on D.  */
+
+  #pragma omp target map(alloc: var1, var2, X[:N]) map(to: Y[:N]) \
+    map(always from: sum)
+    {
+      var1 += X[10];
+      var2 += Y[20];
+      sum = var1 + var2;
+      D[sum]++;
+    }
+
+  assert (var1 == 1);
+  assert (var2 == 2);
+  assert (sum == 33);
+
+  exit_data_1 ();
+  assert (var1 == 11);
+  assert (var2 == 2);
+
+  exit_data_2 (X);
+  assert (var2 == 22);
+
+  free (X);
+  free (Y);
+
+  test_nested ();
+
+  return 0;
+}


  -- Ilya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-29 19:24                 ` Ilya Verbin
@ 2015-07-30  8:27                   ` Jakub Jelinek
  2015-07-30 14:46                     ` Ilya Verbin
  2015-07-30 20:30                     ` Ilya Verbin
  0 siblings, 2 replies; 18+ messages in thread
From: Jakub Jelinek @ 2015-07-30  8:27 UTC (permalink / raw)
  To: Ilya Verbin; +Cc: gcc-patches, Kirill Yukhin

On Wed, Jul 29, 2015 at 10:06:52PM +0300, Ilya Verbin wrote:
> @@ -1241,6 +1245,62 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
>    gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
>  }
>  
> +static void
> +gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
> +		void **hostaddrs, size_t *sizes, unsigned short *kinds)
> +{
> +  const int typemask = 0xff;
> +  size_t i;
> +  gomp_mutex_lock (&devicep->lock);
> +  for (i = 0; i < mapnum; i++)
> +    {
> +      struct splay_tree_key_s cur_node;
> +      unsigned char kind = kinds[i] & typemask;
> +      switch (kind)
> +	{
> +	case GOMP_MAP_FROM:
> +	case GOMP_MAP_ALWAYS_FROM:
> +	case GOMP_MAP_DELETE:
> +	case GOMP_MAP_RELEASE:

Please handle here GOMP_MAP_ZERO_LEN_ARRAY_SECTION too.
It should use gomp_map_lookup (while all others splay_tree_lookup),
otherwise it is the same as GOMP_MAP_RELEASE.

> @@ -1280,13 +1337,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
>      }
>  
>    if (is_enter_data)
> -    {
> -      /* TODO  */
> -    }
> +    for (i = 0; i < mapnum; i++)
> +      {
> +	struct target_mem_desc *tgt_var
> +	  = gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i],
> +			   &kinds[i], true, false);
> +	tgt_var->refcount--;
> +
> +	/* If the variable was already mapped, tgt_var is not needed.  Otherwise
> +	   tgt_var will be freed by gomp_unmap_vars or gomp_exit_data.  */
> +	if (tgt_var->refcount == 0)
> +	  free (tgt_var);

This is racy, you don't hold the device lock here anymore, so you shouldn't
decrease refcounts or test it etc.
I think better would be to change the bool is_target argument to
gomp_map_vars into an enum, and use 3 values there for now
- GOMP_VARS_MAP_TARGET, GOMP_VARS_MAP_DATA, GOMP_VARS_MAP_ENTER_DATA or so,
and for GOMP_VARS_MAP_ENTER_DATA perform the decrement of refcount and
freeing if it is zero (but then also better return NULL).

> diff --git a/libgomp/testsuite/libgomp.c/target-20.c b/libgomp/testsuite/libgomp.c/target-20.c
> new file mode 100644
> index 0000000..ec7e245
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/target-20.c
> @@ -0,0 +1,111 @@
> +/* { dg-require-effective-target offload_device } */

This test will fail on HSA, you don't assume just that it doesn't
fallback to host, but also non-shared address space.
I think it would be better to start with some check for non-shared address
space, like:
/* This test relies on non-shared address space.  Punt otherwise.  */
void ensure_nonshared_as (void)
{
  int a = 8;
  #pragma omp target map(to:a)
  {
    a++;
  }
  if (a == 8)
    exit (0);
}

And generally, it is better to have most of the tests not relying on
offloading only or even non-shared address space, so that we also test
shared address space and host fallback.  But a few tests won't hurt...

	Jakub

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-30  8:27                   ` Jakub Jelinek
@ 2015-07-30 14:46                     ` Ilya Verbin
  2015-07-30 15:09                       ` Jakub Jelinek
  2015-07-30 20:30                     ` Ilya Verbin
  1 sibling, 1 reply; 18+ messages in thread
From: Ilya Verbin @ 2015-07-30 14:46 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Kirill Yukhin

On Thu, Jul 30, 2015 at 10:12:59 +0200, Jakub Jelinek wrote:
> This test will fail on HSA, you don't assume just that it doesn't
> fallback to host, but also non-shared address space.
> I think it would be better to start with some check for non-shared address
> space, like:
> /* This test relies on non-shared address space.  Punt otherwise.  */
> void ensure_nonshared_as (void)
> {
>   int a = 8;
>   #pragma omp target map(to:a)
>   {
>     a++;
>   }
>   if (a == 8)
>     exit (0);
> }
> 
> And generally, it is better to have most of the tests not relying on
> offloading only or even non-shared address space, so that we also test
> shared address space and host fallback.  But a few tests won't hurt...

Sure, but it's not possible to fully test data mapping without non-shared
address space.  I've created new check_effective_target, ok for gomp-4_1-branch?


	* testsuite/lib/libgomp.exp
	(check_effective_target_offload_device_nonshared_as): New.
	* testsuite/libgomp.c++/examples-4/e.53.2.C: Replace offload_device with
	offload_device_nonshared_as.
	* testsuite/libgomp.c/target-11.c: Ditto.


diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index 438777f..3a29b78 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -320,6 +320,19 @@ proc check_effective_target_offload_device { } {
     } ]
 }
 
+# Return 1 if offload device is available and it has non-shared address space.
+proc check_effective_target_offload_device_nonshared_as { } {
+    return [check_runtime_nocache offload_device_nonshared_as {
+      int main ()
+	{
+	  int a = 8;
+	  #pragma omp target map(to: a)
+	    a++;
+	  return a != 8;
+	}
+    } ]
+}
+
 # Return 1 if at least one nvidia board is present.
 
 proc check_effective_target_openacc_nvidia_accel_present { } {
diff --git a/libgomp/testsuite/libgomp.c++/examples-4/e.53.2.C b/libgomp/testsuite/libgomp.c++/examples-4/e.53.2.C
index 75276e7..6d5b5e4 100644
--- a/libgomp/testsuite/libgomp.c++/examples-4/e.53.2.C
+++ b/libgomp/testsuite/libgomp.c++/examples-4/e.53.2.C
@@ -1,5 +1,5 @@
 // { dg-do run }
-// { dg-require-effective-target offload_device }
+// { dg-require-effective-target offload_device_nonshared_as }
 
 #include <stdlib.h>
 
diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c
index b86097a..ed6a17a 100644
--- a/libgomp/testsuite/libgomp.c/target-11.c
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -1,4 +1,4 @@
-/* { dg-require-effective-target offload_device } */
+/* { dg-require-effective-target offload_device_nonshared_as } */
 
 #include <stdlib.h>
 #include <assert.h>


  -- Ilya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-30 14:46                     ` Ilya Verbin
@ 2015-07-30 15:09                       ` Jakub Jelinek
  0 siblings, 0 replies; 18+ messages in thread
From: Jakub Jelinek @ 2015-07-30 15:09 UTC (permalink / raw)
  To: Ilya Verbin; +Cc: gcc-patches, Kirill Yukhin

On Thu, Jul 30, 2015 at 05:40:15PM +0300, Ilya Verbin wrote:
> Sure, but it's not possible to fully test data mapping without non-shared
> address space.  I've created new check_effective_target, ok for gomp-4_1-branch?
> 
> 
> 	* testsuite/lib/libgomp.exp
> 	(check_effective_target_offload_device_nonshared_as): New.
> 	* testsuite/libgomp.c++/examples-4/e.53.2.C: Replace offload_device with
> 	offload_device_nonshared_as.
> 	* testsuite/libgomp.c/target-11.c: Ditto.

Ok.

	Jakub

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-30  8:27                   ` Jakub Jelinek
  2015-07-30 14:46                     ` Ilya Verbin
@ 2015-07-30 20:30                     ` Ilya Verbin
  2015-07-30 21:37                       ` Jakub Jelinek
  2019-11-14 11:09                       ` Thomas Schwinge
  1 sibling, 2 replies; 18+ messages in thread
From: Ilya Verbin @ 2015-07-30 20:30 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Kirill Yukhin

On Thu, Jul 30, 2015 at 10:12:59 +0200, Jakub Jelinek wrote:
> On Wed, Jul 29, 2015 at 10:06:52PM +0300, Ilya Verbin wrote:
> > @@ -1241,6 +1245,62 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
> >    gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
> >  }
> >  
> > +static void
> > +gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
> > +		void **hostaddrs, size_t *sizes, unsigned short *kinds)
> > +{
> > +  const int typemask = 0xff;
> > +  size_t i;
> > +  gomp_mutex_lock (&devicep->lock);
> > +  for (i = 0; i < mapnum; i++)
> > +    {
> > +      struct splay_tree_key_s cur_node;
> > +      unsigned char kind = kinds[i] & typemask;
> > +      switch (kind)
> > +	{
> > +	case GOMP_MAP_FROM:
> > +	case GOMP_MAP_ALWAYS_FROM:
> > +	case GOMP_MAP_DELETE:
> > +	case GOMP_MAP_RELEASE:
> 
> Please handle here GOMP_MAP_ZERO_LEN_ARRAY_SECTION too.
> It should use gomp_map_lookup (while all others splay_tree_lookup),
> otherwise it is the same as GOMP_MAP_RELEASE.

Done.

> > @@ -1280,13 +1337,20 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
> >      }
> >  
> >    if (is_enter_data)
> > -    {
> > -      /* TODO  */
> > -    }
> > +    for (i = 0; i < mapnum; i++)
> > +      {
> > +	struct target_mem_desc *tgt_var
> > +	  = gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i],
> > +			   &kinds[i], true, false);
> > +	tgt_var->refcount--;
> > +
> > +	/* If the variable was already mapped, tgt_var is not needed.  Otherwise
> > +	   tgt_var will be freed by gomp_unmap_vars or gomp_exit_data.  */
> > +	if (tgt_var->refcount == 0)
> > +	  free (tgt_var);
> 
> This is racy, you don't hold the device lock here anymore, so you shouldn't
> decrease refcounts or test it etc.
> I think better would be to change the bool is_target argument to
> gomp_map_vars into an enum, and use 3 values there for now
> - GOMP_VARS_MAP_TARGET, GOMP_VARS_MAP_DATA, GOMP_VARS_MAP_ENTER_DATA or so,
> and for GOMP_VARS_MAP_ENTER_DATA perform the decrement of refcount and
> freeing if it is zero (but then also better return NULL).

Fixed.

> > diff --git a/libgomp/testsuite/libgomp.c/target-20.c b/libgomp/testsuite/libgomp.c/target-20.c
> > new file mode 100644
> > index 0000000..ec7e245
> > --- /dev/null
> > +++ b/libgomp/testsuite/libgomp.c/target-20.c
> > @@ -0,0 +1,111 @@
> > +/* { dg-require-effective-target offload_device } */
> 
> This test will fail on HSA, you don't assume just that it doesn't
> fallback to host, but also non-shared address space.

Fixed.

make check-target-libgomp passed.  ok?


libgomp/
	* libgomp.h (enum gomp_map_vars_kind): New.
	(gomp_map_vars): Change type of the argument from bool to enum
	gomp_map_vars_kind.
	* oacc-mem.c (acc_map_data, present_create_copy,
	gomp_acc_insert_pointer): Pass GOMP_MAP_VARS_OPENACC instead of false to
	gomp_map_vars.
	* oacc-parallel.c (GOACC_parallel, GOACC_data_start): Likewise.
	* target.c (gomp_map_vars_existing): Fix target address for 'always to'
	array sections.
	(gomp_map_vars): Change type of the argument from bool to enum
	gomp_map_vars_kind, fixup its usage.  Set tgt->refcount to 0 if called
	from GOMP_target_enter_exit_data.  Free tgt if called from
	GOMP_target_enter_exit_data and nothing has been mapped.
	(gomp_unmap_vars): Decrement k->refcount when it is 1 and
	k->async_refcount is 0.
	(gomp_offload_image_to_device): Set tgt's refcount to infinity.
	(GOMP_target, GOMP_target_41): Pass GOMP_MAP_VARS_TARGET instead of true
	to gomp_map_vars.
	(gomp_target_data_fallback, GOMP_target_data, GOMP_target_data_41): Pass
	GOMP_MAP_VARS_DATA instead of false to gomp_map_vars.
	(gomp_exit_data): New static function.
	(GOMP_target_enter_exit_data): Support mapping/unmapping.
	* testsuite/libgomp.c/target-11.c: Extend for testing 'always to' array
	sections.
	* testsuite/libgomp.c/target-20.c: New test.


diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 707acaf..9031649 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -787,12 +787,22 @@ struct gomp_device_descr
   acc_dispatch_t openacc;
 };
 
+/* Kind of the pragma, for which gomp_map_vars () is called.  */
+enum gomp_map_vars_kind
+{
+  GOMP_MAP_VARS_OPENACC,
+  GOMP_MAP_VARS_TARGET,
+  GOMP_MAP_VARS_DATA,
+  GOMP_MAP_VARS_ENTER_DATA
+};
+
 extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
 extern void gomp_acc_remove_pointer (void *, bool, int, int);
 
 extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
 					      size_t, void **, void **,
-					      size_t *, void *, bool, bool);
+					      size_t *, void *, bool,
+					      enum gomp_map_vars_kind);
 extern void gomp_copy_from_async (struct target_mem_desc *);
 extern void gomp_unmap_vars (struct target_mem_desc *, bool);
 extern void gomp_init_device (struct gomp_device_descr *);
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index c0fcb07..af067d6 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -289,7 +289,8 @@ acc_map_data (void *h, void *d, size_t s)
       if (d != h)
         gomp_fatal ("cannot map data on shared-memory system");
 
-      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
+      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
+			   GOMP_MAP_VARS_OPENACC);
     }
   else
     {
@@ -318,7 +319,7 @@ acc_map_data (void *h, void *d, size_t s)
       gomp_mutex_unlock (&acc_dev->lock);
 
       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
-			   &kinds, true, false);
+			   &kinds, true, GOMP_MAP_VARS_OPENACC);
     }
 
   gomp_mutex_lock (&acc_dev->lock);
@@ -447,7 +448,7 @@ present_create_copy (unsigned f, void *h, size_t s)
       gomp_mutex_unlock (&acc_dev->lock);
 
       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
-			   false);
+			   GOMP_MAP_VARS_OPENACC);
 
       gomp_mutex_lock (&acc_dev->lock);
 
@@ -594,7 +595,7 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
 
   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
-		       NULL, sizes, kinds, true, false);
+		       NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
 
   gomp_mutex_lock (&acc_dev->lock);
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 8ea3dd1..38c4770 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -131,7 +131,7 @@ GOACC_parallel (int device, void (*fn) (void *),
     tgt_fn = (void (*)) fn;
 
   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
-		       false);
+		       GOMP_MAP_VARS_OPENACC);
 
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
   for (i = 0; i < mapnum; i++)
@@ -178,7 +178,8 @@ GOACC_data_start (int device, size_t mapnum,
   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
       || host_fallback)
     {
-      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
+      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
+			   GOMP_MAP_VARS_OPENACC);
       tgt->prev = thr->mapped_data;
       thr->mapped_data = tgt;
 
@@ -187,7 +188,7 @@ GOACC_data_start (int device, size_t mapnum,
 
   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
-		       false);
+		       GOMP_MAP_VARS_OPENACC);
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
   tgt->prev = thr->mapped_data;
   thr->mapped_data = tgt;
diff --git a/libgomp/target.c b/libgomp/target.c
index d7f4693..565982b 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -188,7 +188,8 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
 
   if (GOMP_MAP_ALWAYS_TO_P (kind))
     devicep->host2dev_func (devicep->target_id,
-			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset),
+			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+				      + newn->host_start - oldn->host_start),
 			    (void *) newn->host_start,
 			    newn->host_end - newn->host_start);
   if (oldn->refcount != REFCOUNT_INFINITY)
@@ -247,7 +248,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
 attribute_hidden struct target_mem_desc *
 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
-	       bool short_mapkind, bool is_target)
+	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
 {
   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
   bool has_firstprivate = false;
@@ -258,7 +259,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
   struct target_mem_desc *tgt
     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
   tgt->list_count = mapnum;
-  tgt->refcount = 1;
+  tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
   tgt->device_descr = devicep;
 
   if (mapnum == 0)
@@ -266,7 +267,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 
   tgt_align = sizeof (void *);
   tgt_size = 0;
-  if (is_target)
+  if (pragma_kind == GOMP_MAP_VARS_TARGET)
     {
       size_t align = 4 * sizeof (void *);
       tgt_align = align;
@@ -377,7 +378,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
       tgt->tgt_start = (uintptr_t) tgt->to_free;
       tgt->tgt_end = tgt->tgt_start + sizes[0];
     }
-  else if (not_found_cnt || is_target)
+  else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
     {
       /* Allocate tgt_align aligned tgt_size block of memory.  */
       /* FIXME: Perhaps change interface to allocate properly aligned
@@ -396,7 +397,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
     }
 
   tgt_size = 0;
-  if (is_target)
+  if (pragma_kind == GOMP_MAP_VARS_TARGET)
     tgt_size = mapnum * sizeof (void *);
 
   tgt->array = NULL;
@@ -560,7 +561,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	  }
     }
 
-  if (is_target)
+  if (pragma_kind == GOMP_MAP_VARS_TARGET)
     {
       for (i = 0; i < mapnum; i++)
 	{
@@ -587,6 +588,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	}
     }
 
+  /* If the variable from "omp target enter data" map-list was already mapped,
+     tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
+     gomp_exit_data.  */
+  if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
+    {
+      free (tgt);
+      tgt = NULL;
+    }
+
   gomp_mutex_unlock (&devicep->lock);
   return tgt;
 }
@@ -661,15 +671,18 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 	continue;
 
       bool do_unmap = false;
-      if (k->refcount > 1)
+      if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
+	k->refcount--;
+      else if (k->refcount == 1)
 	{
-	  if (k->refcount != REFCOUNT_INFINITY)
-	    k->refcount--;
+	  if (k->async_refcount > 0)
+	    k->async_refcount--;
+	  else
+	    {
+	      k->refcount--;
+	      do_unmap = true;
+	    }
 	}
-      else if (k->async_refcount > 0)
-	k->async_refcount--;
-      else
-	do_unmap = true;
 
       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
 	  || tgt->list[i].always_copy_from)
@@ -794,7 +807,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
   /* Insert host-target address mapping into splay tree.  */
   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
-  tgt->refcount = 1;
+  tgt->refcount = REFCOUNT_INFINITY;
   tgt->tgt_start = 0;
   tgt->tgt_end = 0;
   tgt->to_free = NULL;
@@ -1080,7 +1093,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
 
   struct target_mem_desc *tgt_vars
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
-		     true);
+		     GOMP_MAP_VARS_TARGET);
   struct gomp_thread old_thr, *thr = gomp_thread ();
   old_thr = *thr;
   memset (thr, '\0', sizeof (*thr));
@@ -1140,7 +1153,7 @@ GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum,
 
   struct target_mem_desc *tgt_vars
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
-		     true);
+		     GOMP_MAP_VARS_TARGET);
   struct gomp_thread old_thr, *thr = gomp_thread ();
   old_thr = *thr;
   memset (thr, '\0', sizeof (*thr));
@@ -1168,7 +1181,8 @@ gomp_target_data_fallback (void)
          new #pragma omp target data, otherwise GOMP_target_end_data
          would get out of sync.  */
       struct target_mem_desc *tgt
-	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
+	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
+			 GOMP_MAP_VARS_DATA);
       tgt->prev = icv->target_data;
       icv->target_data = tgt;
     }
@@ -1186,7 +1200,7 @@ GOMP_target_data (int device, const void *unused, size_t mapnum,
 
   struct target_mem_desc *tgt
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
-		     false);
+		     GOMP_MAP_VARS_DATA);
   struct gomp_task_icv *icv = gomp_icv (true);
   tgt->prev = icv->target_data;
   icv->target_data = tgt;
@@ -1204,7 +1218,7 @@ GOMP_target_data_41 (int device, size_t mapnum, void **hostaddrs, size_t *sizes,
 
   struct target_mem_desc *tgt
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
-		     false);
+		     GOMP_MAP_VARS_DATA);
   struct gomp_task_icv *icv = gomp_icv (true);
   tgt->prev = icv->target_data;
   icv->target_data = tgt;
@@ -1235,6 +1249,65 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
 }
 
+static void
+gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
+		void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+  const int typemask = 0xff;
+  size_t i;
+  gomp_mutex_lock (&devicep->lock);
+  for (i = 0; i < mapnum; i++)
+    {
+      struct splay_tree_key_s cur_node;
+      unsigned char kind = kinds[i] & typemask;
+      switch (kind)
+	{
+	case GOMP_MAP_FROM:
+	case GOMP_MAP_ALWAYS_FROM:
+	case GOMP_MAP_DELETE:
+	case GOMP_MAP_RELEASE:
+	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
+	  cur_node.host_start = (uintptr_t) hostaddrs[i];
+	  cur_node.host_end = cur_node.host_start + sizes[i];
+	  splay_tree_key k = kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+	    ? gomp_map_lookup (&devicep->mem_map, &cur_node)
+	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
+	  if (!k)
+	    continue;
+
+	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
+	    k->refcount--;
+	  if (kind == GOMP_MAP_DELETE && k->refcount != REFCOUNT_INFINITY)
+	    k->refcount = 0;
+
+	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
+	      || kind == GOMP_MAP_ALWAYS_FROM)
+	    devicep->dev2host_func (devicep->target_id,
+				    (void *) cur_node.host_start,
+				    (void *) (k->tgt->tgt_start + k->tgt_offset
+					      + cur_node.host_start
+					      - k->host_start),
+				    cur_node.host_end - cur_node.host_start);
+	  if (k->refcount == 0)
+	    {
+	      splay_tree_remove (&devicep->mem_map, k);
+	      if (k->tgt->refcount > 1)
+		k->tgt->refcount--;
+	      else
+		gomp_unmap_tgt (k->tgt);
+	    }
+
+	  break;
+	default:
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
+		      kind);
+	}
+    }
+
+  gomp_mutex_unlock (&devicep->lock);
+}
+
 void
 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
 			     size_t *sizes, unsigned short *kinds)
@@ -1253,9 +1326,6 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
     {
       unsigned char kind = kinds[i] & typemask;
 
-      if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
-	continue;
-
       if (kind == GOMP_MAP_ALLOC
 	  || kind == GOMP_MAP_TO
 	  || kind == GOMP_MAP_ALWAYS_TO)
@@ -1267,20 +1337,19 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
       if (kind == GOMP_MAP_FROM
 	  || kind == GOMP_MAP_ALWAYS_FROM
 	  || kind == GOMP_MAP_DELETE
-	  || kind == GOMP_MAP_RELEASE)
+	  || kind == GOMP_MAP_RELEASE
+	  || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
 	break;
 
       gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", kind);
     }
 
   if (is_enter_data)
-    {
-      /* TODO  */
-    }
+    for (i = 0; i < mapnum; i++)
+      gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
+		     true, GOMP_MAP_VARS_ENTER_DATA);
   else
-    {
-      /* TODO  */
-    }
+    gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
 }
 
 void
diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c
index ed6a17a..625c286 100644
--- a/libgomp/testsuite/libgomp.c/target-11.c
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -9,6 +9,17 @@ void test_array_section (int *p)
 {
   #pragma omp target data map(alloc: p[0:N])
     {
+      int ok = 1;
+      for (int i = 10; i < 10 + 4; i++)
+	p[i] = 997 * i;
+
+      #pragma omp target map(always to:p[10:4]) map(tofrom: ok)
+	for (int i = 10; i < 10 + 4; i++)
+	  if (p[i] != 997 * i)
+	    ok = 0;
+
+      assert (ok);
+
       #pragma omp target map(always from:p[7:9])
 	for (int i = 0; i < N; i++)
 	  p[i] = i;
diff --git a/libgomp/testsuite/libgomp.c/target-20.c b/libgomp/testsuite/libgomp.c/target-20.c
new file mode 100644
index 0000000..3f4e798
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-20.c
@@ -0,0 +1,120 @@
+/* { dg-require-effective-target offload_device_nonshared_as } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 40
+
+int sum;
+int var1 = 1;
+int var2 = 2;
+
+#pragma omp declare target
+int D[N];
+#pragma omp end declare target
+
+void enter_data (int *X)
+{
+  #pragma omp target enter data map(to: var1, var2, X[:N]) map(alloc: sum)
+}
+
+void exit_data_0 (int *D)
+{
+  #pragma omp target exit data map(delete: D[:N])
+}
+
+void exit_data_1 ()
+{
+  #pragma omp target exit data map(from: var1)
+}
+
+void exit_data_2 (int *X)
+{
+  #pragma omp target exit data map(from: var2) map(release: X[:N], sum)
+}
+
+void exit_data_3 (int *p)
+{
+  #pragma omp target exit data map(from: p[:0])
+}
+
+void test_nested ()
+{
+  int X = 0, Y = 0, Z = 0;
+
+  #pragma omp target data map(from: X, Y, Z)
+    {
+      #pragma omp target data map(from: X, Y, Z)
+	{
+	  #pragma omp target map(from: X, Y, Z)
+	    X = Y = Z = 1337;
+	  assert (X == 0);
+	  assert (Y == 0);
+	  assert (Z == 0);
+
+	  #pragma omp target exit data map(from: X) map(release: Y)
+	  assert (X == 0);
+	  assert (Y == 0);
+
+	  #pragma omp target exit data map(release: Y) map(delete: Z)
+	  assert (Y == 0);
+	  assert (Z == 0);
+	}
+      assert (X == 1337);
+      assert (Y == 0);
+      assert (Z == 0);
+
+      #pragma omp target map(from: X)
+	X = 2448;
+      assert (X == 2448);
+      assert (Y == 0);
+      assert (Z == 0);
+
+      X = 4896;
+    }
+  assert (X == 4896);
+  assert (Y == 0);
+  assert (Z == 0);
+}
+
+int main ()
+{
+  int *X = malloc (N * sizeof (int));
+  int *Y = malloc (N * sizeof (int));
+  X[10] = 10;
+  Y[20] = 20;
+  enter_data (X);
+
+  exit_data_0 (D); /* This should have no effect on D.  */
+
+  #pragma omp target map(alloc: var1, var2, X[:N]) map(to: Y[:N]) map(always from: sum)
+    {
+      var1 += X[10];
+      var2 += Y[20];
+      sum = var1 + var2;
+      D[sum]++;
+    }
+
+  assert (var1 == 1);
+  assert (var2 == 2);
+  assert (sum == 33);
+
+  exit_data_1 ();
+  assert (var1 == 11);
+  assert (var2 == 2);
+
+  /* Increase refcount of already mapped X[0:N].  */
+  #pragma omp target enter data map(alloc: X[16:1])
+
+  exit_data_2 (X);
+  assert (var2 == 22);
+
+  exit_data_3 (X + 5); /* Unmap X[0:N].  */
+
+  free (X);
+  free (Y);
+
+  test_nested ();
+
+  return 0;
+}


  -- Ilya

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-30 20:30                     ` Ilya Verbin
@ 2015-07-30 21:37                       ` Jakub Jelinek
  2019-11-14 11:09                       ` Thomas Schwinge
  1 sibling, 0 replies; 18+ messages in thread
From: Jakub Jelinek @ 2015-07-30 21:37 UTC (permalink / raw)
  To: Ilya Verbin; +Cc: gcc-patches, Kirill Yukhin

On Thu, Jul 30, 2015 at 10:44:33PM +0300, Ilya Verbin wrote:
> libgomp/
> 	* libgomp.h (enum gomp_map_vars_kind): New.
> 	(gomp_map_vars): Change type of the argument from bool to enum
> 	gomp_map_vars_kind.
> 	* oacc-mem.c (acc_map_data, present_create_copy,
> 	gomp_acc_insert_pointer): Pass GOMP_MAP_VARS_OPENACC instead of false to
> 	gomp_map_vars.
> 	* oacc-parallel.c (GOACC_parallel, GOACC_data_start): Likewise.
> 	* target.c (gomp_map_vars_existing): Fix target address for 'always to'
> 	array sections.
> 	(gomp_map_vars): Change type of the argument from bool to enum
> 	gomp_map_vars_kind, fixup its usage.  Set tgt->refcount to 0 if called
> 	from GOMP_target_enter_exit_data.  Free tgt if called from
> 	GOMP_target_enter_exit_data and nothing has been mapped.
> 	(gomp_unmap_vars): Decrement k->refcount when it is 1 and
> 	k->async_refcount is 0.
> 	(gomp_offload_image_to_device): Set tgt's refcount to infinity.
> 	(GOMP_target, GOMP_target_41): Pass GOMP_MAP_VARS_TARGET instead of true
> 	to gomp_map_vars.
> 	(gomp_target_data_fallback, GOMP_target_data, GOMP_target_data_41): Pass
> 	GOMP_MAP_VARS_DATA instead of false to gomp_map_vars.
> 	(gomp_exit_data): New static function.
> 	(GOMP_target_enter_exit_data): Support mapping/unmapping.
> 	* testsuite/libgomp.c/target-11.c: Extend for testing 'always to' array
> 	sections.
> 	* testsuite/libgomp.c/target-20.c: New test.

Ok, thanks.

	Jakub

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2015-07-30 20:30                     ` Ilya Verbin
  2015-07-30 21:37                       ` Jakub Jelinek
@ 2019-11-14 11:09                       ` Thomas Schwinge
  2019-11-14 11:28                         ` Jakub Jelinek
  2019-11-14 12:00                         ` Julian Brown
  1 sibling, 2 replies; 18+ messages in thread
From: Thomas Schwinge @ 2019-11-14 11:09 UTC (permalink / raw)
  To: Ilya Verbin, Jakub Jelinek; +Cc: gcc-patches, Kirill Yukhin, Julian Brown

[-- Attachment #1: Type: text/plain, Size: 1565 bytes --]

Hi!

In context of reviewing Julian's "OpenACC reference count overhaul", I'm
generally reviewing (also known as: trying to understand) the libgomp
OpenMP 'target' "refcount"ing , and I noticed something strange (?):

On 2015-07-30T22:44:33+0300, Ilya Verbin <iverbin@gmail.com> wrote:
> make check-target-libgomp passed.  ok?

(This eventually got into trunk in r228777 "Merge from gomp-4_1-branch to
trunk".)

> libgomp/

> 	* target.c

> 	(gomp_offload_image_to_device): Set tgt's refcount to infinity.

> --- a/libgomp/target.c
> +++ b/libgomp/target.c

> @@ -794,7 +807,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
>    /* Insert host-target address mapping into splay tree.  */
>    struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
>    tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
> -  tgt->refcount = 1;
> +  tgt->refcount = REFCOUNT_INFINITY;
>    tgt->tgt_start = 0;
>    tgt->tgt_end = 0;
>    tgt->to_free = NULL;

I had understood that 'REFCOUNT_INFINITY' is only meant to be used for
the 'refcount' in 'struct splay_tree_key_s', but here it's used for the
'refcount' in 'struct target_mem_desc'.  However, all the other 'struct
target_mem_desc' 'refcount' handling doesn't seem to take care of the
special 'REFCOUNT_INFINITY' value.

This might not be an actually observable bug (I have not verified, have
not tried to construct a test case), but should this be changed anyway?
(Back to 'tgt->refcount = 1'; not yet tested?)


Grüße
 Thomas

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 658 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2019-11-14 11:09                       ` Thomas Schwinge
@ 2019-11-14 11:28                         ` Jakub Jelinek
  2019-11-14 12:00                         ` Julian Brown
  1 sibling, 0 replies; 18+ messages in thread
From: Jakub Jelinek @ 2019-11-14 11:28 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: Ilya Verbin, gcc-patches, Kirill Yukhin, Julian Brown

On Thu, Nov 14, 2019 at 12:08:45PM +0100, Thomas Schwinge wrote:
> > @@ -794,7 +807,7 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
> >    /* Insert host-target address mapping into splay tree.  */
> >    struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
> >    tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
> > -  tgt->refcount = 1;
> > +  tgt->refcount = REFCOUNT_INFINITY;
> >    tgt->tgt_start = 0;
> >    tgt->tgt_end = 0;
> >    tgt->to_free = NULL;
> 
> I had understood that 'REFCOUNT_INFINITY' is only meant to be used for
> the 'refcount' in 'struct splay_tree_key_s', but here it's used for the
> 'refcount' in 'struct target_mem_desc'.  However, all the other 'struct
> target_mem_desc' 'refcount' handling doesn't seem to take care of the
> special 'REFCOUNT_INFINITY' value.
> 
> This might not be an actually observable bug (I have not verified, have
> not tried to construct a test case), but should this be changed anyway?
> (Back to 'tgt->refcount = 1'; not yet tested?)

No, we certainly don't want the code to free this unless the image is
unloaded.  So, if anything, REFCOUNT_INFINITY needs to be special cased
even in the target_mem_desc handling.  But, do you actually see any code
path where the current code doesn't work properly?

	Jakub

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [gomp4.1] Support #pragma omp target {enter,exit} data
  2019-11-14 11:09                       ` Thomas Schwinge
  2019-11-14 11:28                         ` Jakub Jelinek
@ 2019-11-14 12:00                         ` Julian Brown
  1 sibling, 0 replies; 18+ messages in thread
From: Julian Brown @ 2019-11-14 12:00 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: Ilya Verbin, Jakub Jelinek, gcc-patches, Kirill Yukhin

On Thu, 14 Nov 2019 12:08:45 +0100
Thomas Schwinge <thomas@codesourcery.com> wrote:

> Hi!
> 
> In context of reviewing Julian's "OpenACC reference count overhaul",
> I'm generally reviewing (also known as: trying to understand) the
> libgomp OpenMP 'target' "refcount"ing , and I noticed something
> strange (?):
> 
> On 2015-07-30T22:44:33+0300, Ilya Verbin <iverbin@gmail.com> wrote:
> > make check-target-libgomp passed.  ok?  
> 
> (This eventually got into trunk in r228777 "Merge from
> gomp-4_1-branch to trunk".)
> 
> > libgomp/  
> 
> > 	* target.c  
> 
> > 	(gomp_offload_image_to_device): Set tgt's refcount to
> > infinity.  
> 
> > --- a/libgomp/target.c
> > +++ b/libgomp/target.c  
> 
> > @@ -794,7 +807,7 @@ gomp_offload_image_to_device (struct
> > gomp_device_descr *devicep, /* Insert host-target address mapping
> > into splay tree.  */ struct target_mem_desc *tgt = gomp_malloc
> > (sizeof (*tgt)); tgt->array = gomp_malloc ((num_funcs + num_vars) *
> > sizeof (*tgt->array));
> > -  tgt->refcount = 1;
> > +  tgt->refcount = REFCOUNT_INFINITY;
> >    tgt->tgt_start = 0;
> >    tgt->tgt_end = 0;
> >    tgt->to_free = NULL;  
> 
> I had understood that 'REFCOUNT_INFINITY' is only meant to be used for
> the 'refcount' in 'struct splay_tree_key_s', but here it's used for
> the 'refcount' in 'struct target_mem_desc'.  However, all the other
> 'struct target_mem_desc' 'refcount' handling doesn't seem to take
> care of the special 'REFCOUNT_INFINITY' value.
> 
> This might not be an actually observable bug (I have not verified,
> have not tried to construct a test case), but should this be changed
> anyway? (Back to 'tgt->refcount = 1'; not yet tested?)

These function- or variable-mapping blocks will never interact with the
rest of the reference-counting machinery, I don't think, so it's
possibly a bit weird but it's unlikely to cause a problem in practice.
Just IMO.

Julian

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2019-11-14 11:57 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-30 12:25 [gomp4.1] Support #pragma omp target {enter,exit} data Ilya Verbin
2015-06-30 12:57 ` Jakub Jelinek
2015-06-30 15:47   ` Ilya Verbin
2015-06-30 16:24     ` Jakub Jelinek
2015-07-01 21:07       ` Ilya Verbin
2015-07-06 15:34         ` Ilya Verbin
2015-07-06 17:25           ` Jakub Jelinek
2015-07-06 18:45             ` Ilya Verbin
2015-07-06 20:42               ` Jakub Jelinek
2015-07-29 19:24                 ` Ilya Verbin
2015-07-30  8:27                   ` Jakub Jelinek
2015-07-30 14:46                     ` Ilya Verbin
2015-07-30 15:09                       ` Jakub Jelinek
2015-07-30 20:30                     ` Ilya Verbin
2015-07-30 21:37                       ` Jakub Jelinek
2019-11-14 11:09                       ` Thomas Schwinge
2019-11-14 11:28                         ` Jakub Jelinek
2019-11-14 12:00                         ` Julian Brown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).