public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, OpenACC 2.7] Adjust acc_map_data/acc_unmap_data interaction with reference counters
@ 2023-06-22 10:03 Chung-Lin Tang
  2023-10-31 14:06 ` Thomas Schwinge
  0 siblings, 1 reply; 7+ messages in thread
From: Chung-Lin Tang @ 2023-06-22 10:03 UTC (permalink / raw)
  To: gcc-patches, Thomas Schwinge, Catherine Moore

[-- Attachment #1: Type: text/plain, Size: 1820 bytes --]

Hi Thomas,
This patch adjusts the implementation of acc_map_data/acc_unmap_data API library
routines to more fit the description in the OpenACC 2.7 specification.

Instead of using REFCOUNT_INFINITY, we now define a REFCOUNT_ACC_MAP_DATA
special value to mark acc_map_data-created mappings, and allow adjustment of
dynamic_refcount of such mappings by other constructs. Enforcing of an initial
value of 1 for such mappings, and only allowing acc_unmap_data to delete such
mappings, is implemented as specified.

Actually, there is no real change (or improvement) in behavior of the API (thus
no new tests) I've looked at the related OpenACC spec issues, and it seems that
this part of the 2.7 spec change is mostly a clarification (see no downside in
current REFCOUNT_INFINITY based implementation either).
But this patch does make the internals more close to the spec description.

Tested without regressions using powerpc64le-linux/nvptx, okay for trunk?

Thanks,
Chung-Lin

2023-06-22  Chung-Lin Tang  <cltang@codesourcery.com>

libgomp/ChangeLog:

	* libgomp.h (REFCOUNT_ACC_MAP_DATA): Define as (REFCOUNT_SPECIAL | 2).
	* oacc-mem.c (acc_map_data): Adjust to use REFCOUNT_ACC_MAP_DATA,
	initialize dynamic_refcount as 1.
	(acc_unmap_data): Adjust to use REFCOUNT_ACC_MAP_DATA,
	(goacc_map_var_existing): Add REFCOUNT_ACC_MAP_DATA case.
	(goacc_exit_datum_1): Add REFCOUNT_ACC_MAP_DATA case, respect
	REFCOUNT_ACC_MAP_DATA when decrementing/finalizing. Force lowest
	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
	* target.c (gomp_increment_refcount): Add REFCOUNT_ACC_MAP_DATA case.
	(gomp_decrement_refcount): Add REFCOUNT_ACC_MAP_DATA case, force lowest
	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
	* testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c: Adjust
	testcase error output scan test.

[-- Attachment #2: acc_map_data.patch --]
[-- Type: text/plain, Size: 5316 bytes --]

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 4d2bfab4b71..fb8ef651dfb 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1166,6 +1166,8 @@ struct target_mem_desc;
 /* Special value for refcount - tgt_offset contains target address of the
    artificial pointer to "omp declare target link" object.  */
 #define REFCOUNT_LINK     (REFCOUNT_SPECIAL | 1)
+/* Special value for refcount - created through acc_map_data.  */
+#define REFCOUNT_ACC_MAP_DATA (REFCOUNT_SPECIAL | 2)
 
 /* Special value for refcount - structure element sibling list items.
    All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index fe632740769..2a782ac22c1 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -411,7 +411,8 @@ acc_map_data (void *h, void *d, size_t s)
       assert (n->refcount == 1);
       assert (n->dynamic_refcount == 0);
       /* Special reference counting behavior.  */
-      n->refcount = REFCOUNT_INFINITY;
+      n->refcount = REFCOUNT_ACC_MAP_DATA;
+      n->dynamic_refcount = 1;
 
       if (profiling_p)
 	{
@@ -460,7 +461,7 @@ acc_unmap_data (void *h)
      the different 'REFCOUNT_INFINITY' cases, or simply separate
      'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
      etc.)?  */
-  else if (n->refcount != REFCOUNT_INFINITY)
+  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
@@ -519,7 +520,8 @@ goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
     }
 
   assert (n->refcount != REFCOUNT_LINK);
-  if (n->refcount != REFCOUNT_INFINITY)
+  if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA)
     n->refcount++;
   n->dynamic_refcount++;
 
@@ -683,6 +685,7 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
 
   assert (n->refcount != REFCOUNT_LINK);
   if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA
       && n->refcount < n->dynamic_refcount)
     {
       gomp_mutex_unlock (&acc_dev->lock);
@@ -691,15 +694,27 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
 
   if (finalize)
     {
-      if (n->refcount != REFCOUNT_INFINITY)
+      if (n->refcount != REFCOUNT_INFINITY
+	  && n->refcount != REFCOUNT_ACC_MAP_DATA)
 	n->refcount -= n->dynamic_refcount;
-      n->dynamic_refcount = 0;
+
+      if (n->refcount == REFCOUNT_ACC_MAP_DATA)
+	/* Mappings created by acc_map_data are returned to initial
+	   dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
+	n->dynamic_refcount = 1;
+      else
+	n->dynamic_refcount = 0;
     }
   else if (n->dynamic_refcount)
     {
-      if (n->refcount != REFCOUNT_INFINITY)
+      if (n->refcount != REFCOUNT_INFINITY
+	  && n->refcount != REFCOUNT_ACC_MAP_DATA)
 	n->refcount--;
-      n->dynamic_refcount--;
+
+      /* When mapping is created by acc_map_data, dynamic_refcount must be
+	 maintained at >= 1.  */
+      if (n->refcount != REFCOUNT_ACC_MAP_DATA || n->dynamic_refcount > 1)
+	n->dynamic_refcount--;
     }
 
   if (n->refcount == 0)
diff --git a/libgomp/target.c b/libgomp/target.c
index 80c25a16f1e..d3176eac344 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -480,7 +480,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
 
   uintptr_t *refcount_ptr = &k->refcount;
 
-  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
+    refcount_ptr = &k->dynamic_refcount;
+  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
     refcount_ptr = &k->structelem_refcount;
   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
     refcount_ptr = k->structelem_refcount_ptr;
@@ -527,7 +529,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
 
   uintptr_t *refcount_ptr = &k->refcount;
 
-  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
+    refcount_ptr = &k->dynamic_refcount;
+  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
     refcount_ptr = &k->structelem_refcount;
   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
     refcount_ptr = k->structelem_refcount_ptr;
@@ -560,6 +564,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
   else if (*refcount_ptr > 0)
     *refcount_ptr -= 1;
 
+  /* Force back to 1 if this is an acc_map_data mapping.  */
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
+    *refcount_ptr = 1;
+
  end:
   if (*refcount_ptr == 0)
     {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
index 872f0c1de5c..9ed9fa7e413 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
@@ -10,7 +10,7 @@ main (int argc, char *argv[])
 {
   acc_init (acc_device_default);
   acc_unmap_data ((void *) foo);
-/* { dg-output "libgomp: cannot unmap target block" } */
+/* { dg-output "libgomp: refusing to unmap block \\\[\[0-9a-fA-FxX\]+,\\\+64\\\] that has not been mapped by 'acc_map_data'" } */
   return 0;
 }
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, OpenACC 2.7] Adjust acc_map_data/acc_unmap_data interaction with reference counters
  2023-06-22 10:03 [PATCH, OpenACC 2.7] Adjust acc_map_data/acc_unmap_data interaction with reference counters Chung-Lin Tang
@ 2023-10-31 14:06 ` Thomas Schwinge
  2024-03-04 16:18   ` [PATCH, OpenACC 2.7, v2] " Chung-Lin Tang
  0 siblings, 1 reply; 7+ messages in thread
From: Thomas Schwinge @ 2023-10-31 14:06 UTC (permalink / raw)
  To: Chung-Lin Tang; +Cc: gcc-patches, Catherine Moore

Hi Chung-Lin!

On 2023-06-22T18:03:37+0800, Chung-Lin Tang via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> This patch adjusts the implementation of acc_map_data/acc_unmap_data API library
> routines to more fit the description in the OpenACC 2.7 specification.

Thanks!

> Instead of using REFCOUNT_INFINITY, we now define a REFCOUNT_ACC_MAP_DATA
> special value to mark acc_map_data-created mappings, and allow adjustment of
> dynamic_refcount of such mappings by other constructs. Enforcing of an initial
> value of 1 for such mappings, and only allowing acc_unmap_data to delete such
> mappings, is implemented as specified.
>
> Actually, there is no real change (or improvement) in behavior of the API (thus
> no new tests) I've looked at the related OpenACC spec issues, and it seems that
> this part of the 2.7 spec change is mostly a clarification (see no downside in
> current REFCOUNT_INFINITY based implementation either).
> But this patch does make the internals more close to the spec description.

ACK, thanks.

> Tested without regressions using powerpc64le-linux/nvptx, okay for trunk?

A few comments, should be easy to work in:

> --- a/libgomp/libgomp.h
> +++ b/libgomp/libgomp.h
> @@ -1166,6 +1166,8 @@ struct target_mem_desc;
>  /* Special value for refcount - tgt_offset contains target address of the
>     artificial pointer to "omp declare target link" object.  */
>  #define REFCOUNT_LINK     (REFCOUNT_SPECIAL | 1)
> +/* Special value for refcount - created through acc_map_data.  */
> +#define REFCOUNT_ACC_MAP_DATA (REFCOUNT_SPECIAL | 2)
>
>  /* Special value for refcount - structure element sibling list items.
>     All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST

> --- a/libgomp/oacc-mem.c
> +++ b/libgomp/oacc-mem.c
> @@ -411,7 +411,8 @@ acc_map_data (void *h, void *d, size_t s)
>        assert (n->refcount == 1);
>        assert (n->dynamic_refcount == 0);
>        /* Special reference counting behavior.  */
> -      n->refcount = REFCOUNT_INFINITY;
> +      n->refcount = REFCOUNT_ACC_MAP_DATA;
> +      n->dynamic_refcount = 1;
>
>        if (profiling_p)
>       {
> @@ -460,7 +461,7 @@ acc_unmap_data (void *h)
>       the different 'REFCOUNT_INFINITY' cases, or simply separate
>       'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
>       etc.)?  */
> -  else if (n->refcount != REFCOUNT_INFINITY)
> +  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
>      {
>        gomp_mutex_unlock (&acc_dev->lock);
>        gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"

Thus remove the TODO comment before this 'else if' block?  :-)

We should add a comment here that we're unmapping without consideration
of 'n->dynamic_refcount' (that is, 'acc_unmap_data' has implicit
'finalize' semantics -- at least per my reading of the specification; do
you agree?), that is:

    acc_map_data([var]); // 'dynamic_refcount = 1'
    acc_copyin([var]); // 'dynamic_refcount++'
    acc_unmap_data([var]); // does un-map, despite 'dynamic_refcount == 2'?
    assert (!acc_is_present([var]));

Do we have such a test case?  If not, please add one.

To complement 'goacc_exit_datum_1' (see below), we should add here:

    assert (n->dynamic_refcount >= 1);

The subsequenct code:

    if (tgt->refcount == REFCOUNT_INFINITY)
      {
        gomp_mutex_unlock (&acc_dev->lock);
        gomp_fatal ("cannot unmap target block");
      }

... is now unreachable, I think, and may thus be removed -- and any
inconsistency is caught by the subsequent:

    /* Above, we've verified that the mapping must have been set up by
       'acc_map_data'.  */
    assert (tgt->refcount == 1);

> @@ -519,7 +520,8 @@ goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
>      }
>
>    assert (n->refcount != REFCOUNT_LINK);
> -  if (n->refcount != REFCOUNT_INFINITY)
> +  if (n->refcount != REFCOUNT_INFINITY
> +      && n->refcount != REFCOUNT_ACC_MAP_DATA)
>      n->refcount++;
>    n->dynamic_refcount++;
>
> @@ -683,6 +685,7 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
>
>    assert (n->refcount != REFCOUNT_LINK);
>    if (n->refcount != REFCOUNT_INFINITY
> +      && n->refcount != REFCOUNT_ACC_MAP_DATA
>        && n->refcount < n->dynamic_refcount)
>      {
>        gomp_mutex_unlock (&acc_dev->lock);
> @@ -691,15 +694,27 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
>
>    if (finalize)
>      {
> -      if (n->refcount != REFCOUNT_INFINITY)
> +      if (n->refcount != REFCOUNT_INFINITY
> +       && n->refcount != REFCOUNT_ACC_MAP_DATA)
>       n->refcount -= n->dynamic_refcount;
> -      n->dynamic_refcount = 0;
> +
> +      if (n->refcount == REFCOUNT_ACC_MAP_DATA)
> +     /* Mappings created by acc_map_data are returned to initial
> +        dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
> +     n->dynamic_refcount = 1;
> +      else
> +     n->dynamic_refcount = 0;
>      }
>    else if (n->dynamic_refcount)
>      {
> -      if (n->refcount != REFCOUNT_INFINITY)
> +      if (n->refcount != REFCOUNT_INFINITY
> +       && n->refcount != REFCOUNT_ACC_MAP_DATA)
>       n->refcount--;
> -      n->dynamic_refcount--;
> +
> +      /* When mapping is created by acc_map_data, dynamic_refcount must be
> +      maintained at >= 1.  */
> +      if (n->refcount != REFCOUNT_ACC_MAP_DATA || n->dynamic_refcount > 1)
> +     n->dynamic_refcount--;
>      }

I'd find those changes more concise to understand if done the following
way: restore both 'if (finalize)' and 'else if (n->dynamic_refcount)'
branches to their original form (other than excluding 'n->refcount'
modification for 'REFCOUNT_ACC_MAP_DATA', as you have), and instead then
afterwards (that is, here), do:

    /* Mappings created by 'acc_map_data' can only be deleted by 'acc_unmap_data'.  */
    if (n->refcount == REFCOUNT_ACC_MAP_DATA
        && n->dynamic_refcount == 0)
      n->dynamic_refcount = 1;

That does have the same semantics, please verify?

>
>    if (n->refcount == 0)

> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -480,7 +480,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>
>    uintptr_t *refcount_ptr = &k->refcount;
>
> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
> +    refcount_ptr = &k->dynamic_refcount;
> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>      refcount_ptr = &k->structelem_refcount;
>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>      refcount_ptr = k->structelem_refcount_ptr;
> @@ -527,7 +529,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>
>    uintptr_t *refcount_ptr = &k->refcount;
>
> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
> +    refcount_ptr = &k->dynamic_refcount;
> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>      refcount_ptr = &k->structelem_refcount;
>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>      refcount_ptr = k->structelem_refcount_ptr;
> @@ -560,6 +564,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>    else if (*refcount_ptr > 0)
>      *refcount_ptr -= 1;
>
> +  /* Force back to 1 if this is an acc_map_data mapping.  */
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
> +    *refcount_ptr = 1;
> +
>   end:
>    if (*refcount_ptr == 0)
>      {

It's not clear to me why you need this handling -- instead of just
handling 'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' here, that is,
early 'return'?

Per my understanding, this code is for OpenACC only exercised for
structured data regions, and it seems strange (unnecessary?) to adjust
the 'dynamic_refcount' for these for 'acc_map_data'-mapped data?  Or am I
missing anything?

> --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> @@ -10,7 +10,7 @@ main (int argc, char *argv[])
>  {
>    acc_init (acc_device_default);
>    acc_unmap_data ((void *) foo);
> -/* { dg-output "libgomp: cannot unmap target block" } */
> +/* { dg-output "libgomp: refusing to unmap block \\\[\[0-9a-fA-FxX\]+,\\\+64\\\] that has not been mapped by 'acc_map_data'" } */
>    return 0;
>  }
>

Overall, your changes regress the
commit 3e888f94624294d2b9b34ebfee0916768e5d9c3f
"Add OpenACC 'acc_map_data' variant to 'libgomp.oacc-c-c++-common/deep-copy-8.c'"
that I just pushed.  I think you just need to handle
'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' in
'libgomp/oacc-mem.c:goacc_enter_data_internal', 'if (n && struct_p)'?
Please verify.

But please also to the "Minimal OpenACC variant corresponding to PR96668"
code in 'libgomp/oacc-mem.c:goacc_enter_data_internal' add a safeguard
that we're not running into 'REFCOUNT_ACC_MAP_DATA' there.  I think
that's currently not (reasonably easily) possible, given that
'acc_map_data' isn't available in OpenACC/Fortran, but it'll be available
later, and then I'd rather have an 'assert' trigger there, instead of
random behavior.  (I'm not asking you to write a mixed OpenACC/Fortran
plus C test case for that scenario -- if feasible at all.)


Grüße
 Thomas
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH, OpenACC 2.7, v2] Adjust acc_map_data/acc_unmap_data interaction with reference counters
  2023-10-31 14:06 ` Thomas Schwinge
@ 2024-03-04 16:18   ` Chung-Lin Tang
  2024-03-15 11:24     ` Thomas Schwinge
  0 siblings, 1 reply; 7+ messages in thread
From: Chung-Lin Tang @ 2024-03-04 16:18 UTC (permalink / raw)
  To: Thomas Schwinge, Chung-Lin Tang; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 8693 bytes --]

Hi Thomas,

On 2023/10/31 11:06 PM, Thomas Schwinge wrote:
> A few comments, should be easy to work in:

>> @@ -460,7 +461,7 @@ acc_unmap_data (void *h)
>>       the different 'REFCOUNT_INFINITY' cases, or simply separate
>>       'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
>>       etc.)?  */
>> -  else if (n->refcount != REFCOUNT_INFINITY)
>> +  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
>>      {
>>        gomp_mutex_unlock (&acc_dev->lock);
>>        gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
> 
> Thus remove the TODO comment before this 'else if' block?  :-)

Removed TODO block, comments added below around new assert.

> We should add a comment here that we're unmapping without consideration
> of 'n->dynamic_refcount' (that is, 'acc_unmap_data' has implicit
> 'finalize' semantics -- at least per my reading of the specification; do
> you agree?), that is:
> 
>     acc_map_data([var]); // 'dynamic_refcount = 1'
>     acc_copyin([var]); // 'dynamic_refcount++'
>     acc_unmap_data([var]); // does un-map, despite 'dynamic_refcount == 2'?
>     assert (!acc_is_present([var]));
> 
> Do we have such a test case?  If not, please add one.

I've added a new testsuite/libgomp.oacc-c-c++-common/lib-96.c testcase for this.

> To complement 'goacc_exit_datum_1' (see below), we should add here:
> 
>     assert (n->dynamic_refcount >= 1);

Added.

> 
> The subsequenct code:
> 
>     if (tgt->refcount == REFCOUNT_INFINITY)
>       {
>         gomp_mutex_unlock (&acc_dev->lock);
>         gomp_fatal ("cannot unmap target block");
>       }
> 
> ... is now unreachable, I think, and may thus be removed -- and any
> inconsistency is caught by the subsequent:
> 
>     /* Above, we've verified that the mapping must have been set up by
>        'acc_map_data'.  */
>     assert (tgt->refcount == 1);

Removed the 'if (tgt->refcount == REFCOUNT_INFINITY)' block.

>> @@ -691,15 +694,27 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
>>
>>    if (finalize)
>>      {
>> -      if (n->refcount != REFCOUNT_INFINITY)
>> +      if (n->refcount != REFCOUNT_INFINITY
>> +       && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>       n->refcount -= n->dynamic_refcount;
>> -      n->dynamic_refcount = 0;
>> +
>> +      if (n->refcount == REFCOUNT_ACC_MAP_DATA)
>> +     /* Mappings created by acc_map_data are returned to initial
>> +        dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
>> +     n->dynamic_refcount = 1;
>> +      else
>> +     n->dynamic_refcount = 0;
>>      }
>>    else if (n->dynamic_refcount)
>>      {
>> -      if (n->refcount != REFCOUNT_INFINITY)
>> +      if (n->refcount != REFCOUNT_INFINITY
>> +       && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>       n->refcount--;
>> -      n->dynamic_refcount--;
>> +
>> +      /* When mapping is created by acc_map_data, dynamic_refcount must be
>> +      maintained at >= 1.  */
>> +      if (n->refcount != REFCOUNT_ACC_MAP_DATA || n->dynamic_refcount > 1)
>> +     n->dynamic_refcount--;
>>      }
> 
> I'd find those changes more concise to understand if done the following
> way: restore both 'if (finalize)' and 'else if (n->dynamic_refcount)'
> branches to their original form (other than excluding 'n->refcount'
> modification for 'REFCOUNT_ACC_MAP_DATA', as you have), and instead then
> afterwards (that is, here), do:
> 
>     /* Mappings created by 'acc_map_data' can only be deleted by 'acc_unmap_data'.  */
>     if (n->refcount == REFCOUNT_ACC_MAP_DATA
>         && n->dynamic_refcount == 0)
>       n->dynamic_refcount = 1;
> 
> That does have the same semantics, please verify?

This does not have the same semantics, because if the original finalize/n->dynamic_refcount
cases are left unmodified, they will treat REFCOUNT_ACC_MAP_DATA like a normal refcount and
decrement n->refcount, and handling n->refcount == REFCOUNT_ACC_MAP_DATA later won't work either.

I have however, adjusted the nesting of cases to split the 'n->refcount == REFCOUNT_ACC_MAP_DATA'
case away. This should be easier to read.

>> @@ -480,7 +480,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>>
>>    uintptr_t *refcount_ptr = &k->refcount;
>>
>> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>> +    refcount_ptr = &k->dynamic_refcount;
>> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>      refcount_ptr = &k->structelem_refcount;
>>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>>      refcount_ptr = k->structelem_refcount_ptr;
>> @@ -527,7 +529,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>>
>>    uintptr_t *refcount_ptr = &k->refcount;
>>
>> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>> +    refcount_ptr = &k->dynamic_refcount;
>> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>      refcount_ptr = &k->structelem_refcount;
>>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>>      refcount_ptr = k->structelem_refcount_ptr;
>> @@ -560,6 +564,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>>    else if (*refcount_ptr > 0)
>>      *refcount_ptr -= 1;
>>
>> +  /* Force back to 1 if this is an acc_map_data mapping.  */
>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
>> +    *refcount_ptr = 1;
>> +
>>   end:
>>    if (*refcount_ptr == 0)
>>      {
> 
> It's not clear to me why you need this handling -- instead of just
> handling 'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' here, that is,
> early 'return'?
> 
> Per my understanding, this code is for OpenACC only exercised for
> structured data regions, and it seems strange (unnecessary?) to adjust
> the 'dynamic_refcount' for these for 'acc_map_data'-mapped data?  Or am I
> missing anything?

No, that is not true. It goes through almost everything through gomp_map_vars_existing/_internal.
This is what happens when you acc_create/acc_copyin on a mapping created by acc_map_data.

> Overall, your changes regress the
> commit 3e888f94624294d2b9b34ebfee0916768e5d9c3f
> "Add OpenACC 'acc_map_data' variant to 'libgomp.oacc-c-c++-common/deep-copy-8.c'"
> that I just pushed.  I think you just need to handle
> 'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' in
> 'libgomp/oacc-mem.c:goacc_enter_data_internal', 'if (n && struct_p)'?
> Please verify.

Fixed by adding another '&& n->refcount != REFCOUNT_ACC_MAP_DATA' check in goacc_enter_data_internal.

> But please also to the "Minimal OpenACC variant corresponding to PR96668"
> code in 'libgomp/oacc-mem.c:goacc_enter_data_internal' add a safeguard
> that we're not running into 'REFCOUNT_ACC_MAP_DATA' there.  I think
> that's currently not (reasonably easily) possible, given that
> 'acc_map_data' isn't available in OpenACC/Fortran, but it'll be available
> later, and then I'd rather have an 'assert' trigger there, instead of
> random behavior.  (I'm not asking you to write a mixed OpenACC/Fortran
> plus C test case for that scenario -- if feasible at all.)

I am not really sure what you want me to do here, but REFCOUNT_ACC_MAP_DATA mappings
are all created through a single GOMP_MAP_ALLOC kind. The complex stuff of MAP_STRUCT, MAP_TO_PSET, etc.
should all be not related here (I presume even if Fortran eventually gets acc_map_data, it would be the
compiler side which should take care of passing the raw data-pointer/array-size to the acc_map_data routine)

I have re-tested this on x86_64-linux + nvptx. Please see if this is okay for committing to mainline.

Thanks,
Chung-Lin

2024-03-04  Chung-Lin Tang  <cltang@baylibre.com>

libgomp/ChangeLog:

	* libgomp.h (REFCOUNT_ACC_MAP_DATA): Define as (REFCOUNT_SPECIAL | 2).
	* oacc-mem.c (acc_map_data): Adjust to use REFCOUNT_ACC_MAP_DATA,
	initialize dynamic_refcount as 1.
	(acc_unmap_data): Adjust to use REFCOUNT_ACC_MAP_DATA, remove TODO
	comments. Add assert of 'n->dynamic_refcount >= 1' and comments.
	(goacc_map_var_existing): Add REFCOUNT_ACC_MAP_DATA case.
	(goacc_exit_datum_1): Add REFCOUNT_ACC_MAP_DATA case, respect
	REFCOUNT_ACC_MAP_DATA when decrementing/finalizing. Force lowest
	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
	(goacc_enter_data_internal): Add REFCOUNT_ACC_MAP_DATA case.
	* target.c (gomp_increment_refcount): Add REFCOUNT_ACC_MAP_DATA case.
	(gomp_decrement_refcount): Add REFCOUNT_ACC_MAP_DATA case, force lowest
	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
	* testsuite/libgomp.oacc-c-c++-common/lib-96.c: New testcase.
	* testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c: Adjust
	testcase error output scan test.



[-- Attachment #2: acc_map_data-v2.patch --]
[-- Type: text/plain, Size: 7149 bytes --]

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index f98cccd8b66..089393846d1 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1163,6 +1163,8 @@ struct target_mem_desc;
 /* Special value for refcount - tgt_offset contains target address of the
    artificial pointer to "omp declare target link" object.  */
 #define REFCOUNT_LINK     (REFCOUNT_SPECIAL | 1)
+/* Special value for refcount - created through acc_map_data.  */
+#define REFCOUNT_ACC_MAP_DATA (REFCOUNT_SPECIAL | 2)
 
 /* Special value for refcount - structure element sibling list items.
    All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index ba48a1ccbb7..ffd56e397f6 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -411,7 +411,8 @@ acc_map_data (void *h, void *d, size_t s)
       assert (n->refcount == 1);
       assert (n->dynamic_refcount == 0);
       /* Special reference counting behavior.  */
-      n->refcount = REFCOUNT_INFINITY;
+      n->refcount = REFCOUNT_ACC_MAP_DATA;
+      n->dynamic_refcount = 1;
 
       if (profiling_p)
 	{
@@ -455,12 +456,7 @@ acc_unmap_data (void *h)
       gomp_fatal ("[%p,%d] surrounds %p",
 		  (void *) n->host_start, (int) host_size, (void *) h);
     }
-  /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
-     'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
-     the different 'REFCOUNT_INFINITY' cases, or simply separate
-     'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
-     etc.)?  */
-  else if (n->refcount != REFCOUNT_INFINITY)
+  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
@@ -468,13 +464,12 @@ acc_unmap_data (void *h)
 		  (void *) h, (int) host_size);
     }
 
-  struct target_mem_desc *tgt = n->tgt;
+  /* This should hold for all mappings created by acc_map_data. We are however
+     removing this mapping in a "finalize" manner, so dynamic_refcount > 1 does
+     not matter.  */
+  assert (n->dynamic_refcount >= 1);
 
-  if (tgt->refcount == REFCOUNT_INFINITY)
-    {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("cannot unmap target block");
-    }
+  struct target_mem_desc *tgt = n->tgt;
 
   /* Above, we've verified that the mapping must have been set up by
      'acc_map_data'.  */
@@ -519,7 +514,8 @@ goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
     }
 
   assert (n->refcount != REFCOUNT_LINK);
-  if (n->refcount != REFCOUNT_INFINITY)
+  if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA)
     n->refcount++;
   n->dynamic_refcount++;
 
@@ -683,13 +679,30 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
 
   assert (n->refcount != REFCOUNT_LINK);
   if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA
       && n->refcount < n->dynamic_refcount)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("Dynamic reference counting assert fail\n");
     }
 
-  if (finalize)
+  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
+    {
+      if (finalize)
+	{
+	  /* Mappings created by acc_map_data are returned to initial
+	     dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
+	  n->dynamic_refcount = 1;
+	}
+      else if (n->dynamic_refcount)
+	{
+	  /* When mapping is created by acc_map_data, dynamic_refcount must be
+	     maintained at >= 1.  */
+	  if (n->dynamic_refcount > 1)
+	    n->dynamic_refcount--;
+	}
+    }
+  else if (finalize)
     {
       if (n->refcount != REFCOUNT_INFINITY)
 	n->refcount -= n->dynamic_refcount;
@@ -1131,7 +1144,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 	    }
 	  /* This is a special case because we must increment the refcount by
 	     the number of mapped struct elements, rather than by one.  */
-	  if (n->refcount != REFCOUNT_INFINITY)
+	  if (n->refcount != REFCOUNT_INFINITY
+	      && n->refcount != REFCOUNT_ACC_MAP_DATA)
 	    n->refcount += groupnum - 1;
 	  n->dynamic_refcount += groupnum - 1;
 	}
diff --git a/libgomp/target.c b/libgomp/target.c
index 1367e9cce6c..1851feba271 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -481,7 +481,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
 
   uintptr_t *refcount_ptr = &k->refcount;
 
-  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
+    refcount_ptr = &k->dynamic_refcount;
+  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
     refcount_ptr = &k->structelem_refcount;
   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
     refcount_ptr = k->structelem_refcount_ptr;
@@ -528,7 +530,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
 
   uintptr_t *refcount_ptr = &k->refcount;
 
-  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
+    refcount_ptr = &k->dynamic_refcount;
+  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
     refcount_ptr = &k->structelem_refcount;
   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
     refcount_ptr = k->structelem_refcount_ptr;
@@ -561,6 +565,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
   else if (*refcount_ptr > 0)
     *refcount_ptr -= 1;
 
+  /* Force back to 1 if this is an acc_map_data mapping.  */
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
+    *refcount_ptr = 1;
+
  end:
   if (*refcount_ptr == 0)
     {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
new file mode 100644
index 00000000000..7bc55b94f33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+/* Test if acc_unmap_data has implicit finalize semantics.  */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  acc_copyin (h, N);
+  acc_copyin (h, N);
+  acc_copyin (h, N);
+
+  acc_unmap_data (h);
+
+  if (acc_is_present (h, N))
+    abort ();
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
index 872f0c1de5c..9ed9fa7e413 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
@@ -10,7 +10,7 @@ main (int argc, char *argv[])
 {
   acc_init (acc_device_default);
   acc_unmap_data ((void *) foo);
-/* { dg-output "libgomp: cannot unmap target block" } */
+/* { dg-output "libgomp: refusing to unmap block \\\[\[0-9a-fA-FxX\]+,\\\+64\\\] that has not been mapped by 'acc_map_data'" } */
   return 0;
 }
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, OpenACC 2.7, v2] Adjust acc_map_data/acc_unmap_data interaction with reference counters
  2024-03-04 16:18   ` [PATCH, OpenACC 2.7, v2] " Chung-Lin Tang
@ 2024-03-15 11:24     ` Thomas Schwinge
  2024-04-11 14:08       ` [PATCH, OpenACC 2.7, v3] " Chung-Lin Tang
  0 siblings, 1 reply; 7+ messages in thread
From: Thomas Schwinge @ 2024-03-15 11:24 UTC (permalink / raw)
  To: Chung-Lin Tang; +Cc: gcc-patches, Tobias Burnus

Hi Chung-Lin!

I realized: please add "PR libgomp/92840" to the Git commit log, as your
changes are directly a continuation of my earlier changes.


On 2024-03-05T01:18:28+0900, Chung-Lin Tang <cltang@pllab.cs.nthu.edu.tw> wrote:
> On 2023/10/31 11:06 PM, Thomas Schwinge wrote:
>>> @@ -691,15 +694,27 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
>>>
>>>    if (finalize)
>>>      {
>>> -      if (n->refcount != REFCOUNT_INFINITY)
>>> +      if (n->refcount != REFCOUNT_INFINITY
>>> +       && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>>       n->refcount -= n->dynamic_refcount;
>>> -      n->dynamic_refcount = 0;
>>> +
>>> +      if (n->refcount == REFCOUNT_ACC_MAP_DATA)
>>> +     /* Mappings created by acc_map_data are returned to initial
>>> +        dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
>>> +     n->dynamic_refcount = 1;
>>> +      else
>>> +     n->dynamic_refcount = 0;
>>>      }
>>>    else if (n->dynamic_refcount)
>>>      {
>>> -      if (n->refcount != REFCOUNT_INFINITY)
>>> +      if (n->refcount != REFCOUNT_INFINITY
>>> +       && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>>       n->refcount--;
>>> -      n->dynamic_refcount--;
>>> +
>>> +      /* When mapping is created by acc_map_data, dynamic_refcount must be
>>> +      maintained at >= 1.  */
>>> +      if (n->refcount != REFCOUNT_ACC_MAP_DATA || n->dynamic_refcount > 1)
>>> +     n->dynamic_refcount--;
>>>      }
>> 
>> I'd find those changes more concise to understand if done the following
>> way: restore both 'if (finalize)' and 'else if (n->dynamic_refcount)'
>> branches to their original form (other than excluding 'n->refcount'
>> modification for 'REFCOUNT_ACC_MAP_DATA', as you have), and instead then
>> afterwards (that is, here), do:
>> 
>>     /* Mappings created by 'acc_map_data' can only be deleted by 'acc_unmap_data'.  */
>>     if (n->refcount == REFCOUNT_ACC_MAP_DATA
>>         && n->dynamic_refcount == 0)
>>       n->dynamic_refcount = 1;
>> 
>> That does have the same semantics, please verify?
>
> This does not have the same semantics, because if the original finalize/n->dynamic_refcount
> cases are left unmodified, they will treat REFCOUNT_ACC_MAP_DATA like a normal refcount and
> decrement n->refcount, and handling n->refcount == REFCOUNT_ACC_MAP_DATA later won't work either.

That's why I said: "restore [...] excluding 'n->refcount' modification
for 'REFCOUNT_ACC_MAP_DATA', as you have [...]".  Sorry if that was
unclear.

> I have however, adjusted the nesting of cases to split the 'n->refcount == REFCOUNT_ACC_MAP_DATA'
> case away. This should be easier to read.

Thanks, that easier to follow indeed.  I had meant (on top of your v2):

    --- a/libgomp/oacc-mem.c
    +++ b/libgomp/oacc-mem.c
    @@ -686,35 +686,27 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
           gomp_fatal ("Dynamic reference counting assert fail\n");
         }
     
    -  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
    +  if (finalize)
         {
    -      if (finalize)
    -	{
    -	  /* Mappings created by acc_map_data are returned to initial
    -	     dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
    -	  n->dynamic_refcount = 1;
    -	}
    -      else if (n->dynamic_refcount)
    -	{
    -	  /* When mapping is created by acc_map_data, dynamic_refcount must be
    -	     maintained at >= 1.  */
    -	  if (n->dynamic_refcount > 1)
    -	    n->dynamic_refcount--;
    -	}
    -    }
    -  else if (finalize)
    -    {
    -      if (n->refcount != REFCOUNT_INFINITY)
    +      if (n->refcount != REFCOUNT_INFINITY
    +	  && n->refcount != REFCOUNT_ACC_MAP_DATA)
     	n->refcount -= n->dynamic_refcount;
           n->dynamic_refcount = 0;
         }
       else if (n->dynamic_refcount)
         {
    -      if (n->refcount != REFCOUNT_INFINITY)
    +      if (n->refcount != REFCOUNT_INFINITY
    +	  && n->refcount != REFCOUNT_ACC_MAP_DATA)
     	n->refcount--;
           n->dynamic_refcount--;
         }
     
    +  /* Mappings created by 'acc_map_data' may only be deleted by
    +     'acc_unmap_data'.  */
    +  if (n->refcount == REFCOUNT_ACC_MAP_DATA
    +      && n->dynamic_refcount == 0)
    +    n->dynamic_refcount = 1;
    +
       if (n->refcount == 0)
         {
           bool copyout = (kind == GOMP_MAP_FROM

..., which really should have the same semantics?  No strong opinion on
which of the two variants you now chose.


>>> @@ -480,7 +480,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>>>
>>>    uintptr_t *refcount_ptr = &k->refcount;
>>>
>>> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>>> +    refcount_ptr = &k->dynamic_refcount;
>>> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>>      refcount_ptr = &k->structelem_refcount;
>>>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>>>      refcount_ptr = k->structelem_refcount_ptr;
>>> @@ -527,7 +529,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>>>
>>>    uintptr_t *refcount_ptr = &k->refcount;
>>>
>>> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>>> +    refcount_ptr = &k->dynamic_refcount;
>>> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>>      refcount_ptr = &k->structelem_refcount;
>>>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>>>      refcount_ptr = k->structelem_refcount_ptr;
>>> @@ -560,6 +564,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>>>    else if (*refcount_ptr > 0)
>>>      *refcount_ptr -= 1;
>>>
>>> +  /* Force back to 1 if this is an acc_map_data mapping.  */
>>> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
>>> +    *refcount_ptr = 1;
>>> +
>>>   end:
>>>    if (*refcount_ptr == 0)
>>>      {
>> 
>> It's not clear to me why you need this handling -- instead of just
>> handling 'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' here, that is,
>> early 'return'?
>> 
>> Per my understanding, this code is for OpenACC only exercised for
>> structured data regions, and it seems strange (unnecessary?) to adjust
>> the 'dynamic_refcount' for these for 'acc_map_data'-mapped data?  Or am I
>> missing anything?
>
> No, that is not true. It goes through almost everything through gomp_map_vars_existing/_internal.
> This is what happens when you acc_create/acc_copyin on a mapping created by acc_map_data.

But I don't understand what you foresee breaking with the following (on
top of your v2):

    --- a/libgomp/target.c
    +++ b/libgomp/target.c
    @@ -476,14 +476,14 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
     static inline void
     gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
     {
    -  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
    +  if (k == NULL
    +      || k->refcount == REFCOUNT_INFINITY
    +      || k->refcount == REFCOUNT_ACC_MAP_DATA)
         return;
     
       uintptr_t *refcount_ptr = &k->refcount;
     
    -  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
    -    refcount_ptr = &k->dynamic_refcount;
    -  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
    +  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
         refcount_ptr = &k->structelem_refcount;
       else if (REFCOUNT_STRUCTELEM_P (k->refcount))
         refcount_ptr = k->structelem_refcount_ptr;
    @@ -522,7 +522,9 @@ static inline void
     gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
     			 bool *do_copy, bool *do_remove)
     {
    -  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
    +  if (k == NULL
    +      || k->refcount == REFCOUNT_INFINITY
    +      || k->refcount == REFCOUNT_ACC_MAP_DATA)
         {
           *do_copy = *do_remove = false;
           return;
    @@ -530,9 +532,7 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
     
       uintptr_t *refcount_ptr = &k->refcount;
     
    -  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
    -    refcount_ptr = &k->dynamic_refcount;
    -  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
    +  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
         refcount_ptr = &k->structelem_refcount;
       else if (REFCOUNT_STRUCTELEM_P (k->refcount))
         refcount_ptr = k->structelem_refcount_ptr;
    @@ -565,10 +565,6 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
       else if (*refcount_ptr > 0)
         *refcount_ptr -= 1;
     
    -  /* Force back to 1 if this is an acc_map_data mapping.  */
    -  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
    -    *refcount_ptr = 1;
    -
      end:
       if (*refcount_ptr == 0)
         {

Can you please show a test case?


>> But please also to the "Minimal OpenACC variant corresponding to PR96668"
>> code in 'libgomp/oacc-mem.c:goacc_enter_data_internal' add a safeguard
>> that we're not running into 'REFCOUNT_ACC_MAP_DATA' there.  I think
>> that's currently not (reasonably easily) possible, given that
>> 'acc_map_data' isn't available in OpenACC/Fortran, but it'll be available
>> later

('acc_map_data' now is available in OpenACC/Fortran as of Tobias' recent
commit r14-9196-g8b3f1edf9b38cb8a88c0a101a675d092bf6135d2
"OpenACC: Add Fortran routines acc_{alloc,free,hostptr,deviceptr,memcpy_{to,from}_device*}".
See 'libgomp.oacc-fortran/acc_host_device_ptr.f90' for a simple test
case.)

>> and then I'd rather have an 'assert' trigger there, instead of
>> random behavior.  (I'm not asking you to write a mixed OpenACC/Fortran
>> plus C test case for that scenario -- if feasible at all.)
>
> I am not really sure what you want me to do here, but REFCOUNT_ACC_MAP_DATA mappings
> are all created through a single GOMP_MAP_ALLOC kind. The complex stuff of MAP_STRUCT, MAP_TO_PSET, etc.
> should all be not related here (I presume even if Fortran eventually gets acc_map_data, it would be the
> compiler side which should take care of passing the raw data-pointer/array-size to the acc_map_data routine)

I see we already have:

    if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET
        && tgt->list_count == 0)
      {
        /* 'declare target'.  */
        assert (n->refcount == REFCOUNT_INFINITY);

I think I wanted to you to add:

    --- a/libgomp/oacc-mem.c
    +++ b/libgomp/oacc-mem.c
    @@ -1217,7 +1209,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
     	      processed = true;
     	    }
     	  else
    -	    assert (n->refcount != REFCOUNT_INFINITY);
    +	    assert (n->refcount != REFCOUNT_INFINITY
    +		    && n->refcount != REFCOUNT_ACC_MAP_DATA);
     
     	  for (size_t j = 0; j < tgt->list_count; j++)
     	    if (tgt->list[j].key == n)


Please check these items, and then we're good to go.


Grüße
 Thomas


> libgomp/ChangeLog:
>
> 	* libgomp.h (REFCOUNT_ACC_MAP_DATA): Define as (REFCOUNT_SPECIAL | 2).
> 	* oacc-mem.c (acc_map_data): Adjust to use REFCOUNT_ACC_MAP_DATA,
> 	initialize dynamic_refcount as 1.
> 	(acc_unmap_data): Adjust to use REFCOUNT_ACC_MAP_DATA, remove TODO
> 	comments. Add assert of 'n->dynamic_refcount >= 1' and comments.
> 	(goacc_map_var_existing): Add REFCOUNT_ACC_MAP_DATA case.
> 	(goacc_exit_datum_1): Add REFCOUNT_ACC_MAP_DATA case, respect
> 	REFCOUNT_ACC_MAP_DATA when decrementing/finalizing. Force lowest
> 	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
> 	(goacc_enter_data_internal): Add REFCOUNT_ACC_MAP_DATA case.
> 	* target.c (gomp_increment_refcount): Add REFCOUNT_ACC_MAP_DATA case.
> 	(gomp_decrement_refcount): Add REFCOUNT_ACC_MAP_DATA case, force lowest
> 	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
> 	* testsuite/libgomp.oacc-c-c++-common/lib-96.c: New testcase.
> 	* testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c: Adjust
> 	testcase error output scan test.
>
>
> diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
> index f98cccd8b66..089393846d1 100644
> --- a/libgomp/libgomp.h
> +++ b/libgomp/libgomp.h
> @@ -1163,6 +1163,8 @@ struct target_mem_desc;
>  /* Special value for refcount - tgt_offset contains target address of the
>     artificial pointer to "omp declare target link" object.  */
>  #define REFCOUNT_LINK     (REFCOUNT_SPECIAL | 1)
> +/* Special value for refcount - created through acc_map_data.  */
> +#define REFCOUNT_ACC_MAP_DATA (REFCOUNT_SPECIAL | 2)
>  
>  /* Special value for refcount - structure element sibling list items.
>     All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST
> diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
> index ba48a1ccbb7..ffd56e397f6 100644
> --- a/libgomp/oacc-mem.c
> +++ b/libgomp/oacc-mem.c
> @@ -411,7 +411,8 @@ acc_map_data (void *h, void *d, size_t s)
>        assert (n->refcount == 1);
>        assert (n->dynamic_refcount == 0);
>        /* Special reference counting behavior.  */
> -      n->refcount = REFCOUNT_INFINITY;
> +      n->refcount = REFCOUNT_ACC_MAP_DATA;
> +      n->dynamic_refcount = 1;
>  
>        if (profiling_p)
>  	{
> @@ -455,12 +456,7 @@ acc_unmap_data (void *h)
>        gomp_fatal ("[%p,%d] surrounds %p",
>  		  (void *) n->host_start, (int) host_size, (void *) h);
>      }
> -  /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
> -     'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
> -     the different 'REFCOUNT_INFINITY' cases, or simply separate
> -     'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
> -     etc.)?  */
> -  else if (n->refcount != REFCOUNT_INFINITY)
> +  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
>      {
>        gomp_mutex_unlock (&acc_dev->lock);
>        gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
> @@ -468,13 +464,12 @@ acc_unmap_data (void *h)
>  		  (void *) h, (int) host_size);
>      }
>  
> -  struct target_mem_desc *tgt = n->tgt;
> +  /* This should hold for all mappings created by acc_map_data. We are however
> +     removing this mapping in a "finalize" manner, so dynamic_refcount > 1 does
> +     not matter.  */
> +  assert (n->dynamic_refcount >= 1);
>  
> -  if (tgt->refcount == REFCOUNT_INFINITY)
> -    {
> -      gomp_mutex_unlock (&acc_dev->lock);
> -      gomp_fatal ("cannot unmap target block");
> -    }
> +  struct target_mem_desc *tgt = n->tgt;
>  
>    /* Above, we've verified that the mapping must have been set up by
>       'acc_map_data'.  */
> @@ -519,7 +514,8 @@ goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
>      }
>  
>    assert (n->refcount != REFCOUNT_LINK);
> -  if (n->refcount != REFCOUNT_INFINITY)
> +  if (n->refcount != REFCOUNT_INFINITY
> +      && n->refcount != REFCOUNT_ACC_MAP_DATA)
>      n->refcount++;
>    n->dynamic_refcount++;
>  
> @@ -683,13 +679,30 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
>  
>    assert (n->refcount != REFCOUNT_LINK);
>    if (n->refcount != REFCOUNT_INFINITY
> +      && n->refcount != REFCOUNT_ACC_MAP_DATA
>        && n->refcount < n->dynamic_refcount)
>      {
>        gomp_mutex_unlock (&acc_dev->lock);
>        gomp_fatal ("Dynamic reference counting assert fail\n");
>      }
>  
> -  if (finalize)
> +  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
> +    {
> +      if (finalize)
> +	{
> +	  /* Mappings created by acc_map_data are returned to initial
> +	     dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
> +	  n->dynamic_refcount = 1;
> +	}
> +      else if (n->dynamic_refcount)
> +	{
> +	  /* When mapping is created by acc_map_data, dynamic_refcount must be
> +	     maintained at >= 1.  */
> +	  if (n->dynamic_refcount > 1)
> +	    n->dynamic_refcount--;
> +	}
> +    }
> +  else if (finalize)
>      {
>        if (n->refcount != REFCOUNT_INFINITY)
>  	n->refcount -= n->dynamic_refcount;
> @@ -1131,7 +1144,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
>  	    }
>  	  /* This is a special case because we must increment the refcount by
>  	     the number of mapped struct elements, rather than by one.  */
> -	  if (n->refcount != REFCOUNT_INFINITY)
> +	  if (n->refcount != REFCOUNT_INFINITY
> +	      && n->refcount != REFCOUNT_ACC_MAP_DATA)
>  	    n->refcount += groupnum - 1;
>  	  n->dynamic_refcount += groupnum - 1;
>  	}
> diff --git a/libgomp/target.c b/libgomp/target.c
> index 1367e9cce6c..1851feba271 100644
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -481,7 +481,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>  
>    uintptr_t *refcount_ptr = &k->refcount;
>  
> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
> +    refcount_ptr = &k->dynamic_refcount;
> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>      refcount_ptr = &k->structelem_refcount;
>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>      refcount_ptr = k->structelem_refcount_ptr;
> @@ -528,7 +530,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>  
>    uintptr_t *refcount_ptr = &k->refcount;
>  
> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
> +    refcount_ptr = &k->dynamic_refcount;
> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>      refcount_ptr = &k->structelem_refcount;
>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>      refcount_ptr = k->structelem_refcount_ptr;
> @@ -561,6 +565,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>    else if (*refcount_ptr > 0)
>      *refcount_ptr -= 1;
>  
> +  /* Force back to 1 if this is an acc_map_data mapping.  */
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
> +    *refcount_ptr = 1;
> +
>   end:
>    if (*refcount_ptr == 0)
>      {
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
> new file mode 100644
> index 00000000000..7bc55b94f33
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
> +
> +/* Test if acc_unmap_data has implicit finalize semantics.  */
> +
> +#include <stdlib.h>
> +#include <openacc.h>
> +
> +int
> +main (int argc, char **argv)
> +{
> +  const int N = 256;
> +  unsigned char *h;
> +  void *d;
> +
> +  h = (unsigned char *) malloc (N);
> +
> +  d = acc_malloc (N);
> +
> +  acc_map_data (h, d, N);
> +
> +  acc_copyin (h, N);
> +  acc_copyin (h, N);
> +  acc_copyin (h, N);
> +
> +  acc_unmap_data (h);
> +
> +  if (acc_is_present (h, N))
> +    abort ();
> +
> +  acc_free (d);
> +
> +  free (h);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> index 872f0c1de5c..9ed9fa7e413 100644
> --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> @@ -10,7 +10,7 @@ main (int argc, char *argv[])
>  {
>    acc_init (acc_device_default);
>    acc_unmap_data ((void *) foo);
> -/* { dg-output "libgomp: cannot unmap target block" } */
> +/* { dg-output "libgomp: refusing to unmap block \\\[\[0-9a-fA-FxX\]+,\\\+64\\\] that has not been mapped by 'acc_map_data'" } */
>    return 0;
>  }
>  

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH, OpenACC 2.7, v3] Adjust acc_map_data/acc_unmap_data interaction with reference counters
  2024-03-15 11:24     ` Thomas Schwinge
@ 2024-04-11 14:08       ` Chung-Lin Tang
  2024-04-12  7:14         ` Thomas Schwinge
  0 siblings, 1 reply; 7+ messages in thread
From: Chung-Lin Tang @ 2024-04-11 14:08 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: gcc-patches, Tobias Burnus

[-- Attachment #1: Type: text/plain, Size: 5722 bytes --]

Hi Thomas,

On 2024/3/15 7:24 PM, Thomas Schwinge wrote:
> Hi Chung-Lin!
> 
> I realized: please add "PR libgomp/92840" to the Git commit log, as your
> changes are directly a continuation of my earlier changes.

Okay, I'll remember to do that.

...
>     -      if (n->refcount != REFCOUNT_INFINITY)
>     +      if (n->refcount != REFCOUNT_INFINITY
>     +	  && n->refcount != REFCOUNT_ACC_MAP_DATA)
>      	n->refcount--;
>            n->dynamic_refcount--;
>          }
>      
>     +  /* Mappings created by 'acc_map_data' may only be deleted by
>     +     'acc_unmap_data'.  */
>     +  if (n->refcount == REFCOUNT_ACC_MAP_DATA
>     +      && n->dynamic_refcount == 0)
>     +    n->dynamic_refcount = 1;
>     +
>        if (n->refcount == 0)
>          {
>            bool copyout = (kind == GOMP_MAP_FROM
> 
> ..., which really should have the same semantics?  No strong opinion on
> which of the two variants you now chose.

My guess is that breaking off the REFCOUNT_ACC_MAP_DATA case separately will
be lighter on any branch predictors (faster performing overall), so I will
stick with my version here.


>>>
>>> It's not clear to me why you need this handling -- instead of just
>>> handling 'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' here, that is,
>>> early 'return'?
>>>
>>> Per my understanding, this code is for OpenACC only exercised for
>>> structured data regions, and it seems strange (unnecessary?) to adjust
>>> the 'dynamic_refcount' for these for 'acc_map_data'-mapped data?  Or am I
>>> missing anything?
>>
>> No, that is not true. It goes through almost everything through gomp_map_vars_existing/_internal.
>> This is what happens when you acc_create/acc_copyin on a mapping created by acc_map_data.
> 
> But I don't understand what you foresee breaking with the following (on
> top of your v2):
> 
>     --- a/libgomp/target.c
>     +++ b/libgomp/target.c
>     @@ -476,14 +476,14 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
>      static inline void
>      gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>      {
>     -  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
>     +  if (k == NULL
>     +      || k->refcount == REFCOUNT_INFINITY
>     +      || k->refcount == REFCOUNT_ACC_MAP_DATA)
>          return;
>      
>        uintptr_t *refcount_ptr = &k->refcount;
>      
>     -  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>     -    refcount_ptr = &k->dynamic_refcount;
>     -  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>     +  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>          refcount_ptr = &k->structelem_refcount;
...
> Can you please show a test case?

I have re-tested the patch *without* the gomp_increment/decrement_refcount changes,
and have these regressions (just to demonstrate what is affected):
+FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
+FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
+FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
+FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
+FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
+FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
+FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
+FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test

Now, I have also re-tested your version (aka, just break early and return when k->refcount == REFCOUNT_ACC_MAP_DATA)
And for the record, that also works (no regressions).

However, I strongly suggest we use my version here where we adjust the dynamic_refcount, simply because: *It is the whole point of this project item in OpenACC 2.7*

The 2.7 spec articulated how increment/decrement interacts with acc_map_data/acc_unmap_data and this patch was supposed to make libgomp more conforming to it implementation-wise.
(otherwise, no point in working on this at all, as there wasn't really anything behaviorally wrong about our implementation before)

> I see we already have:
> 
>     if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET
>         && tgt->list_count == 0)
>       {
>         /* 'declare target'.  */
>         assert (n->refcount == REFCOUNT_INFINITY);
> 
> I think I wanted to you to add:
> 
>     --- a/libgomp/oacc-mem.c
>     +++ b/libgomp/oacc-mem.c
>     @@ -1217,7 +1209,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
>      	      processed = true;
>      	    }
>      	  else
>     -	    assert (n->refcount != REFCOUNT_INFINITY);
>     +	    assert (n->refcount != REFCOUNT_INFINITY
>     +		    && n->refcount != REFCOUNT_ACC_MAP_DATA);
>      
>      	  for (size_t j = 0; j < tgt->list_count; j++)
>      	    if (tgt->list[j].key == n)

I have added this fragment to the patch, thanks.

> 
> Please check these items, and then we're good to go.

I have attached v3 of this patch, of course re-tested without regressions.
If there are no objections I will commit this before end of week (maybe weekend)

Thanks,
Chung-Lin

[-- Attachment #2: acc_map_data-v3.patch --]
[-- Type: text/plain, Size: 7520 bytes --]

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index f98cccd8b66..089393846d1 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1163,6 +1163,8 @@ struct target_mem_desc;
 /* Special value for refcount - tgt_offset contains target address of the
    artificial pointer to "omp declare target link" object.  */
 #define REFCOUNT_LINK     (REFCOUNT_SPECIAL | 1)
+/* Special value for refcount - created through acc_map_data.  */
+#define REFCOUNT_ACC_MAP_DATA (REFCOUNT_SPECIAL | 2)
 
 /* Special value for refcount - structure element sibling list items.
    All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index ba48a1ccbb7..d590806b5cd 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -411,7 +411,8 @@ acc_map_data (void *h, void *d, size_t s)
       assert (n->refcount == 1);
       assert (n->dynamic_refcount == 0);
       /* Special reference counting behavior.  */
-      n->refcount = REFCOUNT_INFINITY;
+      n->refcount = REFCOUNT_ACC_MAP_DATA;
+      n->dynamic_refcount = 1;
 
       if (profiling_p)
 	{
@@ -455,12 +456,7 @@ acc_unmap_data (void *h)
       gomp_fatal ("[%p,%d] surrounds %p",
 		  (void *) n->host_start, (int) host_size, (void *) h);
     }
-  /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
-     'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
-     the different 'REFCOUNT_INFINITY' cases, or simply separate
-     'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
-     etc.)?  */
-  else if (n->refcount != REFCOUNT_INFINITY)
+  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
@@ -468,13 +464,12 @@ acc_unmap_data (void *h)
 		  (void *) h, (int) host_size);
     }
 
-  struct target_mem_desc *tgt = n->tgt;
+  /* This should hold for all mappings created by acc_map_data. We are however
+     removing this mapping in a "finalize" manner, so dynamic_refcount > 1 does
+     not matter.  */
+  assert (n->dynamic_refcount >= 1);
 
-  if (tgt->refcount == REFCOUNT_INFINITY)
-    {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("cannot unmap target block");
-    }
+  struct target_mem_desc *tgt = n->tgt;
 
   /* Above, we've verified that the mapping must have been set up by
      'acc_map_data'.  */
@@ -519,7 +514,8 @@ goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
     }
 
   assert (n->refcount != REFCOUNT_LINK);
-  if (n->refcount != REFCOUNT_INFINITY)
+  if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA)
     n->refcount++;
   n->dynamic_refcount++;
 
@@ -683,13 +679,30 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
 
   assert (n->refcount != REFCOUNT_LINK);
   if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA
       && n->refcount < n->dynamic_refcount)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("Dynamic reference counting assert fail\n");
     }
 
-  if (finalize)
+  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
+    {
+      if (finalize)
+	{
+	  /* Mappings created by acc_map_data are returned to initial
+	     dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
+	  n->dynamic_refcount = 1;
+	}
+      else if (n->dynamic_refcount)
+	{
+	  /* When mapping is created by acc_map_data, dynamic_refcount must be
+	     maintained at >= 1.  */
+	  if (n->dynamic_refcount > 1)
+	    n->dynamic_refcount--;
+	}
+    }
+  else if (finalize)
     {
       if (n->refcount != REFCOUNT_INFINITY)
 	n->refcount -= n->dynamic_refcount;
@@ -1131,7 +1144,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 	    }
 	  /* This is a special case because we must increment the refcount by
 	     the number of mapped struct elements, rather than by one.  */
-	  if (n->refcount != REFCOUNT_INFINITY)
+	  if (n->refcount != REFCOUNT_INFINITY
+	      && n->refcount != REFCOUNT_ACC_MAP_DATA)
 	    n->refcount += groupnum - 1;
 	  n->dynamic_refcount += groupnum - 1;
 	}
@@ -1203,7 +1217,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 	      processed = true;
 	    }
 	  else
-	    assert (n->refcount != REFCOUNT_INFINITY);
+	    assert (n->refcount != REFCOUNT_INFINITY
+		    && n->refcount != REFCOUNT_ACC_MAP_DATA);
 
 	  for (size_t j = 0; j < tgt->list_count; j++)
 	    if (tgt->list[j].key == n)
diff --git a/libgomp/target.c b/libgomp/target.c
index bcc86051601..c9dcb8761e5 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -481,7 +481,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
 
   uintptr_t *refcount_ptr = &k->refcount;
 
-  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
+    refcount_ptr = &k->dynamic_refcount;
+  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
     refcount_ptr = &k->structelem_refcount;
   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
     refcount_ptr = k->structelem_refcount_ptr;
@@ -528,7 +530,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
 
   uintptr_t *refcount_ptr = &k->refcount;
 
-  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
+    refcount_ptr = &k->dynamic_refcount;
+  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
     refcount_ptr = &k->structelem_refcount;
   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
     refcount_ptr = k->structelem_refcount_ptr;
@@ -561,6 +565,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
   else if (*refcount_ptr > 0)
     *refcount_ptr -= 1;
 
+  /* Force back to 1 if this is an acc_map_data mapping.  */
+  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
+    *refcount_ptr = 1;
+
  end:
   if (*refcount_ptr == 0)
     {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
new file mode 100644
index 00000000000..7bc55b94f33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+/* Test if acc_unmap_data has implicit finalize semantics.  */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  acc_copyin (h, N);
+  acc_copyin (h, N);
+  acc_copyin (h, N);
+
+  acc_unmap_data (h);
+
+  if (acc_is_present (h, N))
+    abort ();
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
index 872f0c1de5c..9ed9fa7e413 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
@@ -10,7 +10,7 @@ main (int argc, char *argv[])
 {
   acc_init (acc_device_default);
   acc_unmap_data ((void *) foo);
-/* { dg-output "libgomp: cannot unmap target block" } */
+/* { dg-output "libgomp: refusing to unmap block \\\[\[0-9a-fA-FxX\]+,\\\+64\\\] that has not been mapped by 'acc_map_data'" } */
   return 0;
 }
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, OpenACC 2.7, v3] Adjust acc_map_data/acc_unmap_data interaction with reference counters
  2024-04-11 14:08       ` [PATCH, OpenACC 2.7, v3] " Chung-Lin Tang
@ 2024-04-12  7:14         ` Thomas Schwinge
  2024-04-16  9:12           ` Chung-Lin Tang
  0 siblings, 1 reply; 7+ messages in thread
From: Thomas Schwinge @ 2024-04-12  7:14 UTC (permalink / raw)
  To: Chung-Lin Tang; +Cc: gcc-patches, Tobias Burnus

Hi Chung-Lin!

On 2024-04-11T22:08:47+0800, Chung-Lin Tang <cltang@baylibre.com> wrote:
> On 2024/3/15 7:24 PM, Thomas Schwinge wrote:
>>     -      if (n->refcount != REFCOUNT_INFINITY)
>>     +      if (n->refcount != REFCOUNT_INFINITY
>>     +	  && n->refcount != REFCOUNT_ACC_MAP_DATA)
>>      	n->refcount--;
>>            n->dynamic_refcount--;
>>          }
>>      
>>     +  /* Mappings created by 'acc_map_data' may only be deleted by
>>     +     'acc_unmap_data'.  */
>>     +  if (n->refcount == REFCOUNT_ACC_MAP_DATA
>>     +      && n->dynamic_refcount == 0)
>>     +    n->dynamic_refcount = 1;
>>     +
>>        if (n->refcount == 0)
>>          {
>>            bool copyout = (kind == GOMP_MAP_FROM
>> 
>> ..., which really should have the same semantics?  No strong opinion on
>> which of the two variants you now chose.
>
> My guess is that breaking off the REFCOUNT_ACC_MAP_DATA case separately will
> be lighter on any branch predictors (faster performing overall)

Eh, OK...

> so I will
> stick with my version here.


>>>> It's not clear to me why you need this handling -- instead of just
>>>> handling 'REFCOUNT_ACC_MAP_DATA' like 'REFCOUNT_INFINITY' here, that is,
>>>> early 'return'?
>>>>
>>>> Per my understanding, this code is for OpenACC only exercised for
>>>> structured data regions, and it seems strange (unnecessary?) to adjust
>>>> the 'dynamic_refcount' for these for 'acc_map_data'-mapped data?  Or am I
>>>> missing anything?
>>>
>>> No, that is not true. It goes through almost everything through gomp_map_vars_existing/_internal.
>>> This is what happens when you acc_create/acc_copyin on a mapping created by acc_map_data.

I still don't follow.  If you 'acc_map_data' something, and then
'acc_create' the same memory region, then that's handled, with
'dynamic_refcount', via 'acc_create' -> 'goacc_enter_datum' ->
'goacc_map_var_existing', all in 'libgomp/oacc-mem.c'.  Agree?

>> But I don't understand what you foresee breaking with the following (on
>> top of your v2):
>> 
>>     --- a/libgomp/target.c
>>     +++ b/libgomp/target.c
>>     @@ -476,14 +476,14 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
>>      static inline void
>>      gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>>      {
>>     -  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
>>     +  if (k == NULL
>>     +      || k->refcount == REFCOUNT_INFINITY
>>     +      || k->refcount == REFCOUNT_ACC_MAP_DATA)
>>          return;
>>      
>>        uintptr_t *refcount_ptr = &k->refcount;
>>      
>>     -  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
>>     -    refcount_ptr = &k->dynamic_refcount;
>>     -  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>     +  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>>          refcount_ptr = &k->structelem_refcount;
> ...
>> Can you please show a test case?

That is, a test case where the 'libgomp/target.c:gomp_increment_refcount'
etc. handling is relevant.  Those test cases:

> I have re-tested the patch *without* the gomp_increment/decrement_refcount changes,
> and have these regressions (just to demonstrate what is affected):
> +FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
> +FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
> +FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
> +FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
> +FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
> +FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
> +FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
> +FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test

... are cases where we 'acc_map_data' something, and then invoke an
OpenACC compute constuct with a data clause for the same memory region...

> Now, I have also re-tested your version (aka, just break early and return when k->refcount == REFCOUNT_ACC_MAP_DATA)
> And for the record, that also works (no regressions).
>
> However, I strongly suggest we use my version here where we adjust the dynamic_refcount

..., and it's confusing to me why such an OpenACC compute constuct (which
is to use the structured reference counter) should then use the dynamic
reference counter, for 'acc_map_data'-mapped data?

> simply because: *It is the whole point of this project item in OpenACC 2.7*
>
> The 2.7 spec articulated how increment/decrement interacts with acc_map_data/acc_unmap_data and this patch was supposed to make libgomp more conforming to it implementation-wise.
> (otherwise, no point in working on this at all, as there wasn't really anything behaviorally wrong about our implementation before)

That is, in my understanding, those 'gomp_increment_refcount' changes
don't affect the 'acc_map_data' reference counting, but instead, they
change the reference counting for OpenACC constructs that are originally
using structured reference counter to instead use the dynamic reference
counter.  This doesn't seem conceptually right to me.  (..., even if not
observable from the outside.)


Grüße
 Thomas


> diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
> index f98cccd8b66..089393846d1 100644
> --- a/libgomp/libgomp.h
> +++ b/libgomp/libgomp.h
> @@ -1163,6 +1163,8 @@ struct target_mem_desc;
>  /* Special value for refcount - tgt_offset contains target address of the
>     artificial pointer to "omp declare target link" object.  */
>  #define REFCOUNT_LINK     (REFCOUNT_SPECIAL | 1)
> +/* Special value for refcount - created through acc_map_data.  */
> +#define REFCOUNT_ACC_MAP_DATA (REFCOUNT_SPECIAL | 2)
>  
>  /* Special value for refcount - structure element sibling list items.
>     All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST
> diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
> index ba48a1ccbb7..d590806b5cd 100644
> --- a/libgomp/oacc-mem.c
> +++ b/libgomp/oacc-mem.c
> @@ -411,7 +411,8 @@ acc_map_data (void *h, void *d, size_t s)
>        assert (n->refcount == 1);
>        assert (n->dynamic_refcount == 0);
>        /* Special reference counting behavior.  */
> -      n->refcount = REFCOUNT_INFINITY;
> +      n->refcount = REFCOUNT_ACC_MAP_DATA;
> +      n->dynamic_refcount = 1;
>  
>        if (profiling_p)
>  	{
> @@ -455,12 +456,7 @@ acc_unmap_data (void *h)
>        gomp_fatal ("[%p,%d] surrounds %p",
>  		  (void *) n->host_start, (int) host_size, (void *) h);
>      }
> -  /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
> -     'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
> -     the different 'REFCOUNT_INFINITY' cases, or simply separate
> -     'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
> -     etc.)?  */
> -  else if (n->refcount != REFCOUNT_INFINITY)
> +  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
>      {
>        gomp_mutex_unlock (&acc_dev->lock);
>        gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
> @@ -468,13 +464,12 @@ acc_unmap_data (void *h)
>  		  (void *) h, (int) host_size);
>      }
>  
> -  struct target_mem_desc *tgt = n->tgt;
> +  /* This should hold for all mappings created by acc_map_data. We are however
> +     removing this mapping in a "finalize" manner, so dynamic_refcount > 1 does
> +     not matter.  */
> +  assert (n->dynamic_refcount >= 1);
>  
> -  if (tgt->refcount == REFCOUNT_INFINITY)
> -    {
> -      gomp_mutex_unlock (&acc_dev->lock);
> -      gomp_fatal ("cannot unmap target block");
> -    }
> +  struct target_mem_desc *tgt = n->tgt;
>  
>    /* Above, we've verified that the mapping must have been set up by
>       'acc_map_data'.  */
> @@ -519,7 +514,8 @@ goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
>      }
>  
>    assert (n->refcount != REFCOUNT_LINK);
> -  if (n->refcount != REFCOUNT_INFINITY)
> +  if (n->refcount != REFCOUNT_INFINITY
> +      && n->refcount != REFCOUNT_ACC_MAP_DATA)
>      n->refcount++;
>    n->dynamic_refcount++;
>  
> @@ -683,13 +679,30 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
>  
>    assert (n->refcount != REFCOUNT_LINK);
>    if (n->refcount != REFCOUNT_INFINITY
> +      && n->refcount != REFCOUNT_ACC_MAP_DATA
>        && n->refcount < n->dynamic_refcount)
>      {
>        gomp_mutex_unlock (&acc_dev->lock);
>        gomp_fatal ("Dynamic reference counting assert fail\n");
>      }
>  
> -  if (finalize)
> +  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
> +    {
> +      if (finalize)
> +	{
> +	  /* Mappings created by acc_map_data are returned to initial
> +	     dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
> +	  n->dynamic_refcount = 1;
> +	}
> +      else if (n->dynamic_refcount)
> +	{
> +	  /* When mapping is created by acc_map_data, dynamic_refcount must be
> +	     maintained at >= 1.  */
> +	  if (n->dynamic_refcount > 1)
> +	    n->dynamic_refcount--;
> +	}
> +    }
> +  else if (finalize)
>      {
>        if (n->refcount != REFCOUNT_INFINITY)
>  	n->refcount -= n->dynamic_refcount;
> @@ -1131,7 +1144,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
>  	    }
>  	  /* This is a special case because we must increment the refcount by
>  	     the number of mapped struct elements, rather than by one.  */
> -	  if (n->refcount != REFCOUNT_INFINITY)
> +	  if (n->refcount != REFCOUNT_INFINITY
> +	      && n->refcount != REFCOUNT_ACC_MAP_DATA)
>  	    n->refcount += groupnum - 1;
>  	  n->dynamic_refcount += groupnum - 1;
>  	}
> @@ -1203,7 +1217,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
>  	      processed = true;
>  	    }
>  	  else
> -	    assert (n->refcount != REFCOUNT_INFINITY);
> +	    assert (n->refcount != REFCOUNT_INFINITY
> +		    && n->refcount != REFCOUNT_ACC_MAP_DATA);
>  
>  	  for (size_t j = 0; j < tgt->list_count; j++)
>  	    if (tgt->list[j].key == n)
> diff --git a/libgomp/target.c b/libgomp/target.c
> index bcc86051601..c9dcb8761e5 100644
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -481,7 +481,9 @@ gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
>  
>    uintptr_t *refcount_ptr = &k->refcount;
>  
> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
> +    refcount_ptr = &k->dynamic_refcount;
> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>      refcount_ptr = &k->structelem_refcount;
>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>      refcount_ptr = k->structelem_refcount_ptr;
> @@ -528,7 +530,9 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>  
>    uintptr_t *refcount_ptr = &k->refcount;
>  
> -  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA)
> +    refcount_ptr = &k->dynamic_refcount;
> +  else if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
>      refcount_ptr = &k->structelem_refcount;
>    else if (REFCOUNT_STRUCTELEM_P (k->refcount))
>      refcount_ptr = k->structelem_refcount_ptr;
> @@ -561,6 +565,10 @@ gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
>    else if (*refcount_ptr > 0)
>      *refcount_ptr -= 1;
>  
> +  /* Force back to 1 if this is an acc_map_data mapping.  */
> +  if (k->refcount == REFCOUNT_ACC_MAP_DATA && *refcount_ptr == 0)
> +    *refcount_ptr = 1;
> +
>   end:
>    if (*refcount_ptr == 0)
>      {
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
> new file mode 100644
> index 00000000000..7bc55b94f33
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
> +
> +/* Test if acc_unmap_data has implicit finalize semantics.  */
> +
> +#include <stdlib.h>
> +#include <openacc.h>
> +
> +int
> +main (int argc, char **argv)
> +{
> +  const int N = 256;
> +  unsigned char *h;
> +  void *d;
> +
> +  h = (unsigned char *) malloc (N);
> +
> +  d = acc_malloc (N);
> +
> +  acc_map_data (h, d, N);
> +
> +  acc_copyin (h, N);
> +  acc_copyin (h, N);
> +  acc_copyin (h, N);
> +
> +  acc_unmap_data (h);
> +
> +  if (acc_is_present (h, N))
> +    abort ();
> +
> +  acc_free (d);
> +
> +  free (h);
> +
> +  return 0;
> +}
> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> index 872f0c1de5c..9ed9fa7e413 100644
> --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
> @@ -10,7 +10,7 @@ main (int argc, char *argv[])
>  {
>    acc_init (acc_device_default);
>    acc_unmap_data ((void *) foo);
> -/* { dg-output "libgomp: cannot unmap target block" } */
> +/* { dg-output "libgomp: refusing to unmap block \\\[\[0-9a-fA-FxX\]+,\\\+64\\\] that has not been mapped by 'acc_map_data'" } */
>    return 0;
>  }
>  

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH, OpenACC 2.7, v3] Adjust acc_map_data/acc_unmap_data interaction with reference counters
  2024-04-12  7:14         ` Thomas Schwinge
@ 2024-04-16  9:12           ` Chung-Lin Tang
  0 siblings, 0 replies; 7+ messages in thread
From: Chung-Lin Tang @ 2024-04-16  9:12 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: gcc-patches, Tobias Burnus

[-- Attachment #1: Type: text/plain, Size: 3303 bytes --]

On 2024/4/12 3:14 PM, Thomas Schwinge wrote:
>> I have re-tested the patch *without* the gomp_increment/decrement_refcount changes,
>> and have these regressions (just to demonstrate what is affected):
>> +FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
>> +FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
>> +FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
>> +FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
>> +FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
>> +FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/nested-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
>> +FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O0  execution test
>> +FAIL: libgomp.oacc-c++/../libgomp.oacc-c-c++-common/pr92854-1.c -DACC_DEVICE_TYPE_nvidia=1 -DACC_MEM_SHARED=0 -foffload=nvptx-none  -O2  execution test
> ... are cases where we 'acc_map_data' something, and then invoke an
> OpenACC compute constuct with a data clause for the same memory region...
> 
>> Now, I have also re-tested your version (aka, just break early and return when k->refcount == REFCOUNT_ACC_MAP_DATA)
>> And for the record, that also works (no regressions).
>>
>> However, I strongly suggest we use my version here where we adjust the dynamic_refcount
> ..., and it's confusing to me why such an OpenACC compute constuct (which
> is to use the structured reference counter) should then use the dynamic
> reference counter, for 'acc_map_data'-mapped data?
> 
>> simply because: *It is the whole point of this project item in OpenACC 2.7*
>>
>> The 2.7 spec articulated how increment/decrement interacts with acc_map_data/acc_unmap_data and this patch was supposed to make libgomp more conforming to it implementation-wise.
>> (otherwise, no point in working on this at all, as there wasn't really anything behaviorally wrong about our implementation before)
> That is, in my understanding, those 'gomp_increment_refcount' changes
> don't affect the 'acc_map_data' reference counting, but instead, they
> change the reference counting for OpenACC constructs that are originally
> using structured reference counter to instead use the dynamic reference
> counter.  This doesn't seem conceptually right to me.  (..., even if not
> observable from the outside.)

Okay, I've committed the attached patch, with the "early return upon k->refcount == REFCOUNT_ACC_MAP_DATA" in gomp_increment/decrement_refcount.

If we continue to use k->refcount itself as the flag holder of map type, I guess we will not be able to directly determine whether it is a
structured or dynamic adjustment at that point. Probably need a new field entirely. I think we don't really need to do that right now.

Thanks,
Chung-Lin

[-- Attachment #2: 0001-OpenACC-2.7-Adjust-acc_map_data-acc_unmap_data-inter.patch --]
[-- Type: text/plain, Size: 8908 bytes --]

From a7578a077ed8b64b94282aa55faf7037690abbc5 Mon Sep 17 00:00:00 2001
From: Chung-Lin Tang <cltang@baylibre.com>
Date: Tue, 16 Apr 2024 09:03:21 +0000
Subject: [PATCH] OpenACC 2.7: Adjust acc_map_data/acc_unmap_data interaction
 with reference counters

This patch adjusts the implementation of acc_map_data/acc_unmap_data API library
routines to more fit the description in the OpenACC 2.7 specification.

Instead of using REFCOUNT_INFINITY, we now define a REFCOUNT_ACC_MAP_DATA
special value to mark acc_map_data-created mappings. Adjustment around
mapping related code to respect OpenACC semantics are also added.

libgomp/ChangeLog:

	* libgomp.h (REFCOUNT_ACC_MAP_DATA): Define as (REFCOUNT_SPECIAL | 2).
	* oacc-mem.c (acc_map_data): Adjust to use REFCOUNT_ACC_MAP_DATA,
	initialize dynamic_refcount as 1.
	(acc_unmap_data): Adjust to use REFCOUNT_ACC_MAP_DATA,
	(goacc_map_var_existing): Add REFCOUNT_ACC_MAP_DATA case.
	(goacc_exit_datum_1): Add REFCOUNT_ACC_MAP_DATA case, respect
	REFCOUNT_ACC_MAP_DATA when decrementing/finalizing. Force lowest
	dynamic_refcount to be 1 for REFCOUNT_ACC_MAP_DATA.
	(goacc_enter_data_internal): Add REFCOUNT_ACC_MAP_DATA case.
	* target.c (gomp_increment_refcount): Return early for
	REFCOUNT_ACC_MAP_DATA case.
	(gomp_decrement_refcount): Likewise.
	* testsuite/libgomp.oacc-c-c++-common/lib-96.c: New testcase.
	* testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c: Adjust
	testcase error output scan test.
---
 libgomp/libgomp.h                             |  2 +
 libgomp/oacc-mem.c                            | 49 ++++++++++++-------
 libgomp/target.c                              |  8 ++-
 .../libgomp.oacc-c-c++-common/lib-96.c        | 36 ++++++++++++++
 .../unmap-infinity-1.c                        |  2 +-
 5 files changed, 77 insertions(+), 20 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index f98cccd8b66..089393846d1 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1163,6 +1163,8 @@ struct target_mem_desc;
 /* Special value for refcount - tgt_offset contains target address of the
    artificial pointer to "omp declare target link" object.  */
 #define REFCOUNT_LINK     (REFCOUNT_SPECIAL | 1)
+/* Special value for refcount - created through acc_map_data.  */
+#define REFCOUNT_ACC_MAP_DATA (REFCOUNT_SPECIAL | 2)
 
 /* Special value for refcount - structure element sibling list items.
    All such key refounts have REFCOUNT_STRUCTELEM bits set, with _FLAG_FIRST
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index ba48a1ccbb7..d590806b5cd 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -411,7 +411,8 @@ acc_map_data (void *h, void *d, size_t s)
       assert (n->refcount == 1);
       assert (n->dynamic_refcount == 0);
       /* Special reference counting behavior.  */
-      n->refcount = REFCOUNT_INFINITY;
+      n->refcount = REFCOUNT_ACC_MAP_DATA;
+      n->dynamic_refcount = 1;
 
       if (profiling_p)
 	{
@@ -455,12 +456,7 @@ acc_unmap_data (void *h)
       gomp_fatal ("[%p,%d] surrounds %p",
 		  (void *) n->host_start, (int) host_size, (void *) h);
     }
-  /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
-     'acc_map_data'.  Maybe 'dynamic_refcount' can be used for disambiguating
-     the different 'REFCOUNT_INFINITY' cases, or simply separate
-     'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
-     etc.)?  */
-  else if (n->refcount != REFCOUNT_INFINITY)
+  else if (n->refcount != REFCOUNT_ACC_MAP_DATA)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
@@ -468,13 +464,12 @@ acc_unmap_data (void *h)
 		  (void *) h, (int) host_size);
     }
 
-  struct target_mem_desc *tgt = n->tgt;
+  /* This should hold for all mappings created by acc_map_data. We are however
+     removing this mapping in a "finalize" manner, so dynamic_refcount > 1 does
+     not matter.  */
+  assert (n->dynamic_refcount >= 1);
 
-  if (tgt->refcount == REFCOUNT_INFINITY)
-    {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("cannot unmap target block");
-    }
+  struct target_mem_desc *tgt = n->tgt;
 
   /* Above, we've verified that the mapping must have been set up by
      'acc_map_data'.  */
@@ -519,7 +514,8 @@ goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
     }
 
   assert (n->refcount != REFCOUNT_LINK);
-  if (n->refcount != REFCOUNT_INFINITY)
+  if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA)
     n->refcount++;
   n->dynamic_refcount++;
 
@@ -683,13 +679,30 @@ goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
 
   assert (n->refcount != REFCOUNT_LINK);
   if (n->refcount != REFCOUNT_INFINITY
+      && n->refcount != REFCOUNT_ACC_MAP_DATA
       && n->refcount < n->dynamic_refcount)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("Dynamic reference counting assert fail\n");
     }
 
-  if (finalize)
+  if (n->refcount == REFCOUNT_ACC_MAP_DATA)
+    {
+      if (finalize)
+	{
+	  /* Mappings created by acc_map_data are returned to initial
+	     dynamic_refcount of 1. Can only be deleted by acc_unmap_data.  */
+	  n->dynamic_refcount = 1;
+	}
+      else if (n->dynamic_refcount)
+	{
+	  /* When mapping is created by acc_map_data, dynamic_refcount must be
+	     maintained at >= 1.  */
+	  if (n->dynamic_refcount > 1)
+	    n->dynamic_refcount--;
+	}
+    }
+  else if (finalize)
     {
       if (n->refcount != REFCOUNT_INFINITY)
 	n->refcount -= n->dynamic_refcount;
@@ -1131,7 +1144,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 	    }
 	  /* This is a special case because we must increment the refcount by
 	     the number of mapped struct elements, rather than by one.  */
-	  if (n->refcount != REFCOUNT_INFINITY)
+	  if (n->refcount != REFCOUNT_INFINITY
+	      && n->refcount != REFCOUNT_ACC_MAP_DATA)
 	    n->refcount += groupnum - 1;
 	  n->dynamic_refcount += groupnum - 1;
 	}
@@ -1203,7 +1217,8 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 	      processed = true;
 	    }
 	  else
-	    assert (n->refcount != REFCOUNT_INFINITY);
+	    assert (n->refcount != REFCOUNT_INFINITY
+		    && n->refcount != REFCOUNT_ACC_MAP_DATA);
 
 	  for (size_t j = 0; j < tgt->list_count; j++)
 	    if (tgt->list[j].key == n)
diff --git a/libgomp/target.c b/libgomp/target.c
index bcc86051601..5ec19ae489e 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -476,7 +476,9 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
 static inline void
 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
 {
-  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
+  if (k == NULL
+      || k->refcount == REFCOUNT_INFINITY
+      || k->refcount == REFCOUNT_ACC_MAP_DATA)
     return;
 
   uintptr_t *refcount_ptr = &k->refcount;
@@ -520,7 +522,9 @@ static inline void
 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
 			 bool *do_copy, bool *do_remove)
 {
-  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
+  if (k == NULL
+      || k->refcount == REFCOUNT_INFINITY
+      || k->refcount == REFCOUNT_ACC_MAP_DATA)
     {
       *do_copy = *do_remove = false;
       return;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
new file mode 100644
index 00000000000..7bc55b94f33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-96.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */
+
+/* Test if acc_unmap_data has implicit finalize semantics.  */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main (int argc, char **argv)
+{
+  const int N = 256;
+  unsigned char *h;
+  void *d;
+
+  h = (unsigned char *) malloc (N);
+
+  d = acc_malloc (N);
+
+  acc_map_data (h, d, N);
+
+  acc_copyin (h, N);
+  acc_copyin (h, N);
+  acc_copyin (h, N);
+
+  acc_unmap_data (h);
+
+  if (acc_is_present (h, N))
+    abort ();
+
+  acc_free (d);
+
+  free (h);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
index 872f0c1de5c..9ed9fa7e413 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c
@@ -10,7 +10,7 @@ main (int argc, char *argv[])
 {
   acc_init (acc_device_default);
   acc_unmap_data ((void *) foo);
-/* { dg-output "libgomp: cannot unmap target block" } */
+/* { dg-output "libgomp: refusing to unmap block \\\[\[0-9a-fA-FxX\]+,\\\+64\\\] that has not been mapped by 'acc_map_data'" } */
   return 0;
 }
 
-- 
2.34.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-04-16  9:12 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-22 10:03 [PATCH, OpenACC 2.7] Adjust acc_map_data/acc_unmap_data interaction with reference counters Chung-Lin Tang
2023-10-31 14:06 ` Thomas Schwinge
2024-03-04 16:18   ` [PATCH, OpenACC 2.7, v2] " Chung-Lin Tang
2024-03-15 11:24     ` Thomas Schwinge
2024-04-11 14:08       ` [PATCH, OpenACC 2.7, v3] " Chung-Lin Tang
2024-04-12  7:14         ` Thomas Schwinge
2024-04-16  9:12           ` Chung-Lin Tang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).