public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 2/6] [og8] Factor out duplicate code in gimplify_scan_omp_clauses
  2018-11-20 21:55 [PATCH 0/6] [og8] OpenACC attach/detach Julian Brown
  2018-11-20 21:55 ` [PATCH 1/6] [og8] Host-to-device transfer coalescing & magic offset value self-documentation Julian Brown
@ 2018-11-20 21:55 ` Julian Brown
  2018-11-20 21:55 ` [PATCH 3/6] [og8] OpenACC 2.6 manual deep copy support (attach/detach) Julian Brown
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Julian Brown @ 2018-11-20 21:55 UTC (permalink / raw)
  To: gcc-patches; +Cc: cltang, Catherine_Moore, jakub

[-- Attachment #1: Type: text/plain, Size: 405 bytes --]


Previously posted upstream:
https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00824.html

	gcc/
	* gimplify.c (insert_struct_component_mapping)
	(check_base_and_compare_lt): New.
	(gimplify_scan_omp_clauses): Outline duplicated code into calls to
	above two functions.
---
 gcc/gimplify.c |  307 ++++++++++++++++++++++++++++++++------------------------
 1 files changed, 174 insertions(+), 133 deletions(-)


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0002-og8-Factor-out-duplicate-code-in-gimplify_scan_omp_c.patch --]
[-- Type: text/x-patch; name="0002-og8-Factor-out-duplicate-code-in-gimplify_scan_omp_c.patch", Size: 12918 bytes --]

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 9be0b70..824e020 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -7661,6 +7661,160 @@ demote_firstprivate_pointer (tree decl, gimplify_omp_ctx *ctx)
     }
 }
 
+/* Insert a GOMP_MAP_ALLOC or GOMP_MAP_RELEASE node following a
+   GOMP_MAP_STRUCT mapping.  C is an always_pointer mapping.  STRUCT_NODE is
+   the struct node to insert the new mapping after (when the struct node is
+   initially created).  PREV_NODE is the first of two or three mappings for a
+   pointer, and is either:
+     - the node before C, when a pair of mappings is used, e.g. for a C/C++
+       array section.
+     - not the node before C.  This is true when we have a reference-to-pointer
+       type (with a mapping for the reference and for the pointer), or for
+       Fortran derived-type mappings with a GOMP_MAP_TO_PSET.
+   If SCP is non-null, the new node is inserted before *SCP.
+   if SCP is null, the new node is inserted before PREV_NODE.
+   The return type is:
+     - PREV_NODE, if SCP is non-null.
+     - The newly-created ALLOC or RELEASE node, if SCP is null.
+     - The second newly-created ALLOC or RELEASE node, if we are mapping a
+       reference to a pointer.  */
+
+static tree
+insert_struct_component_mapping (enum tree_code code, tree c, tree struct_node,
+				 tree prev_node, tree *scp)
+{
+  enum gomp_map_kind mkind = (code == OMP_TARGET_EXIT_DATA
+			      || code == OACC_EXIT_DATA)
+			     ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC;
+
+  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP);
+  tree cl = scp ? prev_node : c2;
+  OMP_CLAUSE_SET_MAP_KIND (c2, mkind);
+  OMP_CLAUSE_DECL (c2) = unshare_expr (OMP_CLAUSE_DECL (c));
+  OMP_CLAUSE_CHAIN (c2) = scp ? *scp : prev_node;
+  OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (ptr_type_node);
+  if (struct_node)
+    OMP_CLAUSE_CHAIN (struct_node) = c2;
+
+  /* We might need to create an additional mapping if we have a reference to a
+     pointer (in C++).  Don't do this if we have something other than a
+     GOMP_MAP_ALWAYS_POINTER though, i.e. a GOMP_MAP_TO_PSET.  */
+  if (OMP_CLAUSE_CHAIN (prev_node) != c
+      && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (prev_node)) == OMP_CLAUSE_MAP
+      && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (prev_node))
+	  == GOMP_MAP_ALWAYS_POINTER))
+    {
+      tree c4 = OMP_CLAUSE_CHAIN (prev_node);
+      tree c3 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP);
+      OMP_CLAUSE_SET_MAP_KIND (c3, mkind);
+      OMP_CLAUSE_DECL (c3) = unshare_expr (OMP_CLAUSE_DECL (c4));
+      OMP_CLAUSE_SIZE (c3) = TYPE_SIZE_UNIT (ptr_type_node);
+      OMP_CLAUSE_CHAIN (c3) = prev_node;
+      if (!scp)
+	OMP_CLAUSE_CHAIN (c2) = c3;
+      else
+	cl = c3;
+    }
+
+  if (scp)
+    *scp = c2;
+
+  return cl;
+}
+
+/* Called initially with ORIG_BASE non-null, sets PREV_BITPOS and PREV_POFFSET
+   to the offset of the field given in BASE.  Return type is 1 if BASE is equal
+   to *ORIG_BASE after stripping off ARRAY_REF and INDIRECT_REF nodes and
+   calling get_inner_reference, else 0.
+
+   Called subsequently with ORIG_BASE null, compares the offset of the field
+   given in BASE to PREV_BITPOS, PREV_POFFSET. Returns -1 if the base object
+   has changed, 0 if the new value has a higher bit position than that
+   described by the aforementioned arguments, or 1 if the new value is less
+   than them.  Used for (insertion) sorting components after a GOMP_MAP_STRUCT
+   mapping.  */
+
+static int
+check_base_and_compare_lt (tree base, tree *orig_base, tree decl,
+			   poly_int64 *prev_bitpos,
+			   poly_offset_int *prev_poffset)
+{
+  tree offset;
+  poly_int64 bitsize, bitpos;
+  machine_mode mode;
+  int unsignedp, reversep, volatilep = 0;
+  poly_offset_int poffset;
+
+  if (orig_base)
+    {
+      while (TREE_CODE (base) == ARRAY_REF)
+	base = TREE_OPERAND (base, 0);
+
+      if (TREE_CODE (base) == INDIRECT_REF)
+	base = TREE_OPERAND (base, 0);
+    }
+  else
+    {
+      if (TREE_CODE (base) == ARRAY_REF)
+	{
+	  while (TREE_CODE (base) == ARRAY_REF)
+	    base = TREE_OPERAND (base, 0);
+	  if (TREE_CODE (base) != COMPONENT_REF
+	      || TREE_CODE (TREE_TYPE (base)) != ARRAY_TYPE)
+	    return -1;
+	}
+      else if (TREE_CODE (base) == INDIRECT_REF
+	       && TREE_CODE (TREE_OPERAND (base, 0)) == COMPONENT_REF
+	       && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0)))
+		   == REFERENCE_TYPE))
+	base = TREE_OPERAND (base, 0);
+    }
+
+  base = get_inner_reference (base, &bitsize, &bitpos, &offset, &mode,
+			      &unsignedp, &reversep, &volatilep);
+
+  if (orig_base)
+    *orig_base = base;
+
+  if ((TREE_CODE (base) == INDIRECT_REF
+       || (TREE_CODE (base) == MEM_REF
+	   && integer_zerop (TREE_OPERAND (base, 1))))
+      && DECL_P (TREE_OPERAND (base, 0))
+      && TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0))) == REFERENCE_TYPE)
+    base = TREE_OPERAND (base, 0);
+
+  gcc_assert (offset == NULL_TREE || poly_int_tree_p (offset));
+
+  if (offset)
+    poffset = wi::to_poly_offset (offset);
+  else
+    poffset = 0;
+
+  if (maybe_ne (bitpos, 0))
+    poffset += bits_to_bytes_round_down (bitpos);
+
+  if (orig_base)
+    {
+      gcc_assert (base == decl);
+
+      *prev_bitpos = bitpos;
+      *prev_poffset = poffset;
+
+      return *orig_base == base;
+    }
+  else
+    {
+      if (base != decl)
+	return -1;
+
+      return (maybe_lt (*prev_poffset, poffset)
+	      || (known_eq (*prev_poffset, poffset)
+		  && maybe_lt (*prev_bitpos, bitpos)));
+    }
+
+  return 0;
+}
+
 /* Scan the OMP clauses in *LIST_P, installing mappings into a new
    and previous omp contexts.  */
 
@@ -8131,29 +8285,13 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 			}
 		    }
 
-		  tree offset;
-		  poly_int64 bitsize, bitpos;
-		  machine_mode mode;
-		  int unsignedp, reversep, volatilep = 0;
-		  tree base = OMP_CLAUSE_DECL (c);
-		  while (TREE_CODE (base) == ARRAY_REF)
-		    base = TREE_OPERAND (base, 0);
-		  if (TREE_CODE (base) == INDIRECT_REF)
-		    base = TREE_OPERAND (base, 0);
-		  base = get_inner_reference (base, &bitsize, &bitpos, &offset,
-					      &mode, &unsignedp, &reversep,
-					      &volatilep);
-		  tree orig_base = base;
-		  if ((TREE_CODE (base) == INDIRECT_REF
-		       || (TREE_CODE (base) == MEM_REF
-			   && integer_zerop (TREE_OPERAND (base, 1))))
-		      && DECL_P (TREE_OPERAND (base, 0))
-		      && (TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0)))
-			  == REFERENCE_TYPE))
-		    base = TREE_OPERAND (base, 0);
-		  gcc_assert (base == decl
-			      && (offset == NULL_TREE
-				  || poly_int_tree_p (offset)));
+		  tree orig_base;
+		  poly_int64 bitpos1;
+		  poly_offset_int offset1;
+
+		  int base_eq_orig_base
+		    = check_base_and_compare_lt (OMP_CLAUSE_DECL (c),
+			&orig_base, decl, &bitpos1, &offset1);
 
 		  splay_tree_node n
 		    = splay_tree_lookup (ctx->variables, (splay_tree_key)decl);
@@ -8165,7 +8303,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		      tree l = build_omp_clause (OMP_CLAUSE_LOCATION (c),
 						 OMP_CLAUSE_MAP);
 		      OMP_CLAUSE_SET_MAP_KIND (l, GOMP_MAP_STRUCT);
-		      if (orig_base != base)
+		      if (!base_eq_orig_base)
 			OMP_CLAUSE_DECL (l) = unshare_expr (orig_base);
 		      else
 			OMP_CLAUSE_DECL (l) = decl;
@@ -8175,32 +8313,8 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		      struct_map_to_clause->put (decl, l);
 		      if (ptr)
 			{
-			  enum gomp_map_kind mkind
-			    = code == OMP_TARGET_EXIT_DATA
-			      ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC;
-			  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c),
-						      OMP_CLAUSE_MAP);
-			  OMP_CLAUSE_SET_MAP_KIND (c2, mkind);
-			  OMP_CLAUSE_DECL (c2)
-			    = unshare_expr (OMP_CLAUSE_DECL (c));
-			  OMP_CLAUSE_CHAIN (c2) = *prev_list_p;
-			  OMP_CLAUSE_SIZE (c2)
-			    = TYPE_SIZE_UNIT (ptr_type_node);
-			  OMP_CLAUSE_CHAIN (l) = c2;
-			  if (OMP_CLAUSE_CHAIN (*prev_list_p) != c)
-			    {
-			      tree c4 = OMP_CLAUSE_CHAIN (*prev_list_p);
-			      tree c3
-				= build_omp_clause (OMP_CLAUSE_LOCATION (c),
-						    OMP_CLAUSE_MAP);
-			      OMP_CLAUSE_SET_MAP_KIND (c3, mkind);
-			      OMP_CLAUSE_DECL (c3)
-				= unshare_expr (OMP_CLAUSE_DECL (c4));
-			      OMP_CLAUSE_SIZE (c3)
-				= TYPE_SIZE_UNIT (ptr_type_node);
-			      OMP_CLAUSE_CHAIN (c3) = *prev_list_p;
-			      OMP_CLAUSE_CHAIN (c2) = c3;
-			    }
+			  insert_struct_component_mapping (code, c, l,
+							   *prev_list_p, NULL);
 			  *prev_list_p = l;
 			  prev_list_p = NULL;
 			}
@@ -8210,7 +8324,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 			  *list_p = l;
 			  list_p = &OMP_CLAUSE_CHAIN (l);
 			}
-		      if (orig_base != base && code == OMP_TARGET)
+		      if (!base_eq_orig_base && code == OMP_TARGET)
 			{
 			  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c),
 						      OMP_CLAUSE_MAP);
@@ -8233,13 +8347,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		      tree *sc = NULL, *scp = NULL;
 		      if (GOMP_MAP_ALWAYS_P (OMP_CLAUSE_MAP_KIND (c)) || ptr)
 			n->value |= GOVD_SEEN;
-		      poly_offset_int o1, o2;
-		      if (offset)
-			o1 = wi::to_poly_offset (offset);
-		      else
-			o1 = 0;
-		      if (maybe_ne (bitpos, 0))
-			o1 += bits_to_bytes_round_down (bitpos);
 		      sc = &OMP_CLAUSE_CHAIN (*osc);
 		      if (*sc != c
 			  && (OMP_CLAUSE_MAP_KIND (*sc)
@@ -8257,44 +8364,14 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 			  break;
 			else
 			  {
-			    tree offset2;
-			    poly_int64 bitsize2, bitpos2;
-			    base = OMP_CLAUSE_DECL (*sc);
-			    if (TREE_CODE (base) == ARRAY_REF)
-			      {
-				while (TREE_CODE (base) == ARRAY_REF)
-				  base = TREE_OPERAND (base, 0);
-				if (TREE_CODE (base) != COMPONENT_REF
-				    || (TREE_CODE (TREE_TYPE (base))
-					!= ARRAY_TYPE))
-				  break;
-			      }
-			    else if (TREE_CODE (base) == INDIRECT_REF
-				     && (TREE_CODE (TREE_OPERAND (base, 0))
-					 == COMPONENT_REF)
-				     && (TREE_CODE (TREE_TYPE
-						     (TREE_OPERAND (base, 0)))
-					 == REFERENCE_TYPE))
-			      base = TREE_OPERAND (base, 0);
-			    base = get_inner_reference (base, &bitsize2,
-							&bitpos2, &offset2,
-							&mode, &unsignedp,
-							&reversep, &volatilep);
-			    if ((TREE_CODE (base) == INDIRECT_REF
-				 || (TREE_CODE (base) == MEM_REF
-				     && integer_zerop (TREE_OPERAND (base,
-								     1))))
-				&& DECL_P (TREE_OPERAND (base, 0))
-				&& (TREE_CODE (TREE_TYPE (TREE_OPERAND (base,
-									0)))
-				    == REFERENCE_TYPE))
-			      base = TREE_OPERAND (base, 0);
-			    if (base != decl)
+			    int same_decl_offset_lt
+			      = check_base_and_compare_lt (
+				  OMP_CLAUSE_DECL (*sc), NULL, decl,
+				  &bitpos1, &offset1);
+			    if (same_decl_offset_lt == -1)
 			      break;
 			    if (scp)
 			      continue;
-			    gcc_assert (offset == NULL_TREE
-					|| poly_int_tree_p (offset));
 			    tree d1 = OMP_CLAUSE_DECL (*sc);
 			    tree d2 = OMP_CLAUSE_DECL (c);
 			    while (TREE_CODE (d1) == ARRAY_REF)
@@ -8323,14 +8400,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 				remove = true;
 				break;
 			      }
-			    if (offset2)
-			      o2 = wi::to_poly_offset (offset2);
-			    else
-			      o2 = 0;
-			    o2 += bits_to_bytes_round_down (bitpos2);
-			    if (maybe_lt (o1, o2)
-				|| (known_eq (o1, 2)
-				    && maybe_lt (bitpos, bitpos2)))
+			    if (same_decl_offset_lt)
 			      {
 				if (ptr)
 				  scp = sc;
@@ -8345,38 +8415,9 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 				      size_one_node);
 		      if (ptr)
 			{
-			  tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c),
-						      OMP_CLAUSE_MAP);
-			  tree cl = NULL_TREE;
-			  enum gomp_map_kind mkind
-			    = code == OMP_TARGET_EXIT_DATA
-			      ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC;
-			  OMP_CLAUSE_SET_MAP_KIND (c2, mkind);
-			  OMP_CLAUSE_DECL (c2)
-			    = unshare_expr (OMP_CLAUSE_DECL (c));
-			  OMP_CLAUSE_CHAIN (c2) = scp ? *scp : *prev_list_p;
-			  OMP_CLAUSE_SIZE (c2)
-			    = TYPE_SIZE_UNIT (ptr_type_node);
-			  cl = scp ? *prev_list_p : c2;
-			  if (OMP_CLAUSE_CHAIN (*prev_list_p) != c)
-			    {
-			      tree c4 = OMP_CLAUSE_CHAIN (*prev_list_p);
-			      tree c3
-				= build_omp_clause (OMP_CLAUSE_LOCATION (c),
-						    OMP_CLAUSE_MAP);
-			      OMP_CLAUSE_SET_MAP_KIND (c3, mkind);
-			      OMP_CLAUSE_DECL (c3)
-				= unshare_expr (OMP_CLAUSE_DECL (c4));
-			      OMP_CLAUSE_SIZE (c3)
-				= TYPE_SIZE_UNIT (ptr_type_node);
-			      OMP_CLAUSE_CHAIN (c3) = *prev_list_p;
-			      if (!scp)
-				OMP_CLAUSE_CHAIN (c2) = c3;
-			      else
-				cl = c3;
-			    }
-			  if (scp)
-			    *scp = c2;
+			  tree cl
+			    = insert_struct_component_mapping (code, c, NULL,
+				*prev_list_p, scp);
 			  if (sc == prev_list_p)
 			    {
 			      *sc = cl;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 4/6] [og8] Interaction of dynamic/multidimensional arrays with attach/detach.
  2018-11-20 21:55 [PATCH 0/6] [og8] OpenACC attach/detach Julian Brown
                   ` (2 preceding siblings ...)
  2018-11-20 21:55 ` [PATCH 3/6] [og8] OpenACC 2.6 manual deep copy support (attach/detach) Julian Brown
@ 2018-11-20 21:55 ` Julian Brown
  2018-11-20 21:56 ` [PATCH 5/6] [og8] Backport parts of upstream declare-allocate patch Julian Brown
  2018-11-20 21:57 ` [PATCH 6/6] [og8] OpenACC refcounting refresh Julian Brown
  5 siblings, 0 replies; 8+ messages in thread
From: Julian Brown @ 2018-11-20 21:55 UTC (permalink / raw)
  To: gcc-patches; +Cc: cltang, Catherine_Moore, jakub

[-- Attachment #1: Type: text/plain, Size: 1101 bytes --]


OpenACC multidimensional (or "dynamic") arrays do not seem to fit very
neatly into the attach/detach mechanism described for OpenACC 2.6,
that is if the user tries to use a multidimensional array as a field
in a struct.  This patch disallows that combination, for now at least.
Multidimensional array support in general has been submitted upstream
here but not yet accepted:

https://gcc.gnu.org/ml/gcc-patches/2018-10/msg00937.html

	gcc/
	* omp-low.c (scan_sharing_clauses): Disallow dynamic (multidimensional)
	arrays within structs.

	gcc/testsuite/
	* c-c++-common/goacc/deep-copy-multidim.c: Add test.

	libgomp/
	* target.c (gomp_map_vars_async, gomp_load_image_to_device):
	Zero-initialise do_detach, dynamic_refcount and attach_count in more
	places.
---
 gcc/omp-low.c                                      |   10 +++++-
 .../c-c++-common/goacc/deep-copy-multidim.c        |   32 ++++++++++++++++++++
 libgomp/target.c                                   |    6 ++++
 3 files changed, 47 insertions(+), 1 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/deep-copy-multidim.c


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0004-og8-Interaction-of-dynamic-multidimensional-arrays-w.patch --]
[-- Type: text/x-patch; name="0004-og8-Interaction-of-dynamic-multidimensional-arrays-w.patch", Size: 3269 bytes --]

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e559211..1726451 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -1481,7 +1481,15 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 		  t = TREE_TYPE (t);
 		}
 
-	      install_var_field (da_decl, by_ref, 3, ctx);
+	      if (DECL_P (decl))
+		install_var_field (da_decl, by_ref, 3, ctx);
+	      else
+	        {
+		  error_at (OMP_CLAUSE_LOCATION (c),
+			    "dynamic arrays cannot be used within structs");
+		  break;
+		}
+
 	      tree new_var = install_var_local (da_decl, ctx);
 
 	      bool existed = ctx->dynamic_arrays->put (new_var, da_dimensions);
diff --git a/gcc/testsuite/c-c++-common/goacc/deep-copy-multidim.c b/gcc/testsuite/c-c++-common/goacc/deep-copy-multidim.c
new file mode 100644
index 0000000..1696f0c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/deep-copy-multidim.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct dc
+{
+  int a;
+  int **b;
+};
+
+int
+main ()
+{
+  int n = 100, i, j;
+  struct dc v = { .a = 3 };
+
+  v.b = (int **) malloc (sizeof (int *) * n);
+  for (i = 0; i < n; i++)
+    v.b[i] = (int *) malloc (sizeof (int) * n);
+
+#pragma acc parallel loop copy(v.a, v.b[:n][:n]) /* { dg-error "dynamic arrays cannot be used within structs" } */
+  for (i = 0; i < n; i++)
+    for (j = 0; j < n; j++)
+      v.b[i][j] = v.a + i + j;
+
+  for (i = 0; i < n; i++)
+    for (j = 0; j < n; j++)
+      assert (v.b[i][j] == v.a + i + j);
+
+  return 0;
+}
diff --git a/libgomp/target.c b/libgomp/target.c
index d9d42eb..da51291 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1484,6 +1484,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 	     set to false here.  */
 	  tgt->list[i].copy_from = false;
 	  tgt->list[i].always_copy_from = false;
+	  tgt->list[i].do_detach = false;
 
 	  size_t align = (size_t) 1 << (kind >> rshift);
 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
@@ -1521,6 +1522,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 
 		  k->tgt = tgt;
 		  k->refcount = 1;
+		  k->dynamic_refcount = 0;
+		  k->attach_count = NULL;
 		  k->link_key = NULL;
 		  tgt_size = (tgt_size + align - 1) & ~(align - 1);
 		  target_row_addr = tgt->tgt_start + tgt_size;
@@ -1532,6 +1535,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 		    = GOMP_MAP_COPY_FROM_P (kind & typemask);
 		  row_desc->always_copy_from
 		    = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
+		  row_desc->do_detach = false;
 		  row_desc->offset = 0;
 		  row_desc->length = da->data_row_size;
 
@@ -1839,6 +1843,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
       k->tgt = tgt;
       k->tgt_offset = target_table[i].start;
       k->refcount = REFCOUNT_INFINITY;
+      k->attach_count = NULL;
       k->link_key = NULL;
       tgt->list[i].key = k;
       tgt->refcount++;
@@ -1873,6 +1878,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
       k->tgt = tgt;
       k->tgt_offset = target_var->start;
       k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
+      k->attach_count = NULL;
       k->link_key = NULL;
       tgt->list[i].key = k;
       tgt->refcount++;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/6] [og8] Host-to-device transfer coalescing & magic offset value self-documentation
  2018-11-20 21:55 [PATCH 0/6] [og8] OpenACC attach/detach Julian Brown
@ 2018-11-20 21:55 ` Julian Brown
  2018-11-20 21:55 ` [PATCH 2/6] [og8] Factor out duplicate code in gimplify_scan_omp_clauses Julian Brown
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Julian Brown @ 2018-11-20 21:55 UTC (permalink / raw)
  To: gcc-patches; +Cc: cltang, Catherine_Moore, jakub

[-- Attachment #1: Type: text/plain, Size: 749 bytes --]


Previously posted upstream:
https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00825.html

	libgomp/
	* libgomp.h (OFFSET_INLINED, OFFSET_POINTER, OFFSET_STRUCT): Define.
	* target.c (FIELD_TGT_EMPTY): Define.
	(gomp_coalesce_chunk): New.
	(gomp_coalesce_buf): Use above instead of flat array of size_t pairs.
	(gomp_coalesce_buf_add): Adjust for above change.
	(gomp_copy_host2dev): Likewise.
	(gomp_map_val): Use OFFSET_* macros instead of magic constants.  Write
	as switch instead of list of ifs.
	(gomp_map_vars_async): Adjust for gomp_coalesce_chunk change.  Use
	OFFSET_* macros.
---
 libgomp/libgomp.h |    5 +++
 libgomp/target.c  |  101 +++++++++++++++++++++++++++++++---------------------
 2 files changed, 65 insertions(+), 41 deletions(-)


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-og8-Host-to-device-transfer-coalescing-magic-offset-.patch --]
[-- Type: text/x-patch; name="0001-og8-Host-to-device-transfer-coalescing-magic-offset-.patch", Size: 9484 bytes --]

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 607f4c2..acf7f8f 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -842,6 +842,11 @@ struct target_mem_desc {
    artificial pointer to "omp declare target link" object.  */
 #define REFCOUNT_LINK (~(uintptr_t) 1)
 
+/* Special offset values.  */
+#define OFFSET_INLINED (~(uintptr_t) 0)
+#define OFFSET_POINTER (~(uintptr_t) 1)
+#define OFFSET_STRUCT (~(uintptr_t) 2)
+
 struct splay_tree_key_s {
   /* Address of the host object.  */
   uintptr_t host_start;
diff --git a/libgomp/target.c b/libgomp/target.c
index ab17650..7220ac6 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -45,6 +45,8 @@
 #include "plugin-suffix.h"
 #endif
 
+#define FIELD_TGT_EMPTY (~(size_t) 0)
+
 static void gomp_target_init (void);
 
 /* The whole initialization code for offloading plugins is only run one.  */
@@ -206,8 +208,14 @@ goacc_device_copy_async (struct gomp_device_descr *devicep,
     }
 }
 
-/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
-   host to device memory transfers.  */
+/* Infrastructure for coalescing adjacent or nearly adjacent (in device
+   addresses) host to device memory transfers.  */
+
+struct gomp_coalesce_chunk
+{
+  /* The starting and ending point of a coalesced chunk of memory.  */
+  size_t start, end;
+};
 
 struct gomp_coalesce_buf
 {
@@ -215,10 +223,10 @@ struct gomp_coalesce_buf
      it will be copied to the device.  */
   void *buf;
   struct target_mem_desc *tgt;
-  /* Array with offsets, chunks[2 * i] is the starting offset and
-     chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address
+  /* Array with offsets, chunks[i].start is the starting offset and
+     chunks[i].end ending offset relative to tgt->tgt_start device address
      of chunks which are to be copied to buf and later copied to device.  */
-  size_t *chunks;
+  struct gomp_coalesce_chunk *chunks;
   /* Number of chunks in chunks array, or -1 if coalesce buffering should not
      be performed.  */
   long chunk_cnt;
@@ -251,14 +259,14 @@ gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
     {
       if (cbuf->chunk_cnt < 0)
 	return;
-      if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
+      if (start < cbuf->chunks[cbuf->chunk_cnt-1].end)
 	{
 	  cbuf->chunk_cnt = -1;
 	  return;
 	}
-      if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP)
+      if (start < cbuf->chunks[cbuf->chunk_cnt-1].end + MAX_COALESCE_BUF_GAP)
 	{
-	  cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len;
+	  cbuf->chunks[cbuf->chunk_cnt-1].end = start + len;
 	  cbuf->use_cnt++;
 	  return;
 	}
@@ -268,8 +276,8 @@ gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
       if (cbuf->use_cnt == 1)
 	cbuf->chunk_cnt--;
     }
-  cbuf->chunks[2 * cbuf->chunk_cnt] = start;
-  cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len;
+  cbuf->chunks[cbuf->chunk_cnt].start = start;
+  cbuf->chunks[cbuf->chunk_cnt].end = start + len;
   cbuf->chunk_cnt++;
   cbuf->use_cnt = 1;
 }
@@ -301,20 +309,20 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
   if (cbuf)
     {
       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
-      if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
+      if (doff < cbuf->chunks[cbuf->chunk_cnt-1].end)
 	{
 	  long first = 0;
 	  long last = cbuf->chunk_cnt - 1;
 	  while (first <= last)
 	    {
 	      long middle = (first + last) >> 1;
-	      if (cbuf->chunks[2 * middle + 1] <= doff)
+	      if (cbuf->chunks[middle].end <= doff)
 		first = middle + 1;
-	      else if (cbuf->chunks[2 * middle] <= doff)
+	      else if (cbuf->chunks[middle].start <= doff)
 		{
-		  if (doff + sz > cbuf->chunks[2 * middle + 1])
+		  if (doff + sz > cbuf->chunks[middle].end)
 		    gomp_fatal ("internal libgomp cbuf error");
-		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]),
+		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
 			  h, sz);
 		  return;
 		}
@@ -538,17 +546,25 @@ gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
     return tgt->list[i].key->tgt->tgt_start
 	   + tgt->list[i].key->tgt_offset
 	   + tgt->list[i].offset;
-  if (tgt->list[i].offset == ~(uintptr_t) 0)
-    return (uintptr_t) hostaddrs[i];
-  if (tgt->list[i].offset == ~(uintptr_t) 1)
-    return 0;
-  if (tgt->list[i].offset == ~(uintptr_t) 2)
-    return tgt->list[i + 1].key->tgt->tgt_start
-	   + tgt->list[i + 1].key->tgt_offset
-	   + tgt->list[i + 1].offset
-	   + (uintptr_t) hostaddrs[i]
-	   - (uintptr_t) hostaddrs[i + 1];
-  return tgt->tgt_start + tgt->list[i].offset;
+
+  switch (tgt->list[i].offset)
+    {
+    case OFFSET_INLINED:
+      return (uintptr_t) hostaddrs[i];
+
+    case OFFSET_POINTER:
+      return 0;
+
+    case OFFSET_STRUCT:
+      return tgt->list[i + 1].key->tgt->tgt_start
+	     + tgt->list[i + 1].key->tgt_offset
+	     + tgt->list[i + 1].offset
+	     + (uintptr_t) hostaddrs[i]
+	     - (uintptr_t) hostaddrs[i + 1];
+
+    default:
+      return tgt->tgt_start + tgt->list[i].offset;
+    }
 }
 
 /* Dynamic array related data structures, interfaces with the compiler.  */
@@ -758,8 +774,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
   cbuf.buf = NULL;
   if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
     {
-      cbuf.chunks
-	= (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t));
+      size_t chunk_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
+      cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunk_size);
       cbuf.chunk_cnt = 0;
     }
   if (pragma_kind == GOMP_MAP_VARS_TARGET)
@@ -769,8 +785,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
       tgt_size = mapnum * sizeof (void *);
       cbuf.chunk_cnt = 1;
       cbuf.use_cnt = 1 + (mapnum > 1);
-      cbuf.chunks[0] = 0;
-      cbuf.chunks[1] = tgt_size;
+      cbuf.chunks[0].start = 0;
+      cbuf.chunks[0].end = tgt_size;
     }
 
   gomp_mutex_lock (&devicep->lock);
@@ -788,7 +804,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
 	{
 	  tgt->list[i].key = NULL;
-	  tgt->list[i].offset = ~(uintptr_t) 0;
+	  tgt->list[i].offset = OFFSET_INLINED;
 	  continue;
 	}
       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
@@ -806,7 +822,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 	    = (void *) (n->tgt->tgt_start + n->tgt_offset
 			+ cur_node.host_start);
 	  tgt->list[i].key = NULL;
-	  tgt->list[i].offset = ~(uintptr_t) 0;
+	  tgt->list[i].offset = OFFSET_INLINED;
 	  continue;
 	}
       else if ((kind & typemask) == GOMP_MAP_STRUCT)
@@ -817,7 +833,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
 			      + sizes[last];
 	  tgt->list[i].key = NULL;
-	  tgt->list[i].offset = ~(uintptr_t) 2;
+	  tgt->list[i].offset = OFFSET_STRUCT;
 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
 	  if (n == NULL)
 	    {
@@ -850,7 +866,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
 	{
 	  tgt->list[i].key = NULL;
-	  tgt->list[i].offset = ~(uintptr_t) 1;
+	  tgt->list[i].offset = OFFSET_POINTER;
 	  has_firstprivate = true;
 	  continue;
 	}
@@ -894,7 +910,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 	  if (!n)
 	    {
 	      tgt->list[i].key = NULL;
-	      tgt->list[i].offset = ~(uintptr_t) 1;
+	      tgt->list[i].offset = OFFSET_POINTER;
 	      continue;
 	    }
 	}
@@ -1018,7 +1034,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
       if (cbuf.chunk_cnt > 0)
 	{
 	  cbuf.buf
-	    = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]);
+	    = malloc (cbuf.chunks[cbuf.chunk_cnt-1].end - cbuf.chunks[0].start);
 	  if (cbuf.buf)
 	    {
 	      cbuf.tgt = tgt;
@@ -1144,6 +1160,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 	    else
 	      k->host_end = k->host_start + sizeof (void *);
 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
+	    /* Need to account for the case where a struct field hasn't been
+	       mapped onto the accelerator yet.  */
 	    if (n && n->refcount != REFCOUNT_LINK)
 	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
 				      kind & typemask, cbufp);
@@ -1160,12 +1178,12 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 		size_t align = (size_t) 1 << (kind >> rshift);
 		tgt->list[i].key = k;
 		k->tgt = tgt;
-		if (field_tgt_clear != ~(size_t) 0)
+		if (field_tgt_clear != FIELD_TGT_EMPTY)
 		  {
 		    k->tgt_offset = k->host_start - field_tgt_base
 				    + field_tgt_offset;
 		    if (i == field_tgt_clear)
-		      field_tgt_clear = ~(size_t) 0;
+		      field_tgt_clear = FIELD_TGT_EMPTY;
 		  }
 		else
 		  {
@@ -1419,9 +1437,10 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
       long c = 0;
       for (c = 0; c < cbuf.chunk_cnt; ++c)
 	gomp_copy_host2dev (devicep, aq,
-			    (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
-			    (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
-			    cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
+			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
+			    (char *) cbuf.buf + (cbuf.chunks[c].start
+						 - cbuf.chunks[0].start),
+			    cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
       free (cbuf.buf);
     }
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 3/6] [og8] OpenACC 2.6 manual deep copy support (attach/detach)
  2018-11-20 21:55 [PATCH 0/6] [og8] OpenACC attach/detach Julian Brown
  2018-11-20 21:55 ` [PATCH 1/6] [og8] Host-to-device transfer coalescing & magic offset value self-documentation Julian Brown
  2018-11-20 21:55 ` [PATCH 2/6] [og8] Factor out duplicate code in gimplify_scan_omp_clauses Julian Brown
@ 2018-11-20 21:55 ` Julian Brown
  2018-11-22 15:48   ` Bernhard Reutner-Fischer
  2018-11-20 21:55 ` [PATCH 4/6] [og8] Interaction of dynamic/multidimensional arrays with attach/detach Julian Brown
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 8+ messages in thread
From: Julian Brown @ 2018-11-20 21:55 UTC (permalink / raw)
  To: gcc-patches; +Cc: cltang, Catherine_Moore, jakub

[-- Attachment #1: Type: text/plain, Size: 9082 bytes --]


Previously posted upstream:
https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00826.html

	gcc/c/
	* c-parser.c (c_parser_omp_variable_list): Allow deref (->) in
	variable lists.
	(c_parser_oacc_all_clauses): Re-alphabetize cases.
	* c-typeck.c (handle_omp_array_sections_1): Support deref.

	gcc/cp/
	* parser.c (cp_parser_omp_var_list_no_open): Support deref.
	(cp_parser_oacc_all_clauses): Re-alphabetize cases.
	* semantics.c (finish_omp_clauses): Allow "this" for OpenACC data
	clauses.  Support deref.

	gcc/fortran/
	* gfortran.h (gfc_omp_map_op): Add OMP_MAP_ATTACH, OMP_MAP_DETACH.
	* openmp.c (omp_mask2): Add OMP_CLAUSE_ATTACH, OMP_CLAUSE_DETACH.
	(gfc_match_omp_clauses): Remove allow_derived parameter, infer from
	clause mask.  Support attach and detach.  Slight reformatting.
	(OACC_PARALLEL_CLAUSES, OACC_KERNELS_CLAUSES, OACC_DATA_CLAUSES)
	(OACC_ENTER_DATA_CLAUSES): Add OMP_CLAUSE_ATTACH.
	(OACC_EXIT_DATA_CLAUSES): Add OMP_CLAUSE_DETACH.
	(match_acc): Remove derived_types parameter, and don't pass to
	gfc_match_omp_clauses.
	(gfc_match_oacc_update): Don't pass allow_derived argument.
	(gfc_match_oacc_enter_data): Likewise.
	(gfc_match_oacc_exit_data): Likewise.
	(check_symbol_not_pointer): Don't disallow pointer objects of derived
	type.
	(resolve_oacc_data_clauses): Don't disallow allocatable derived types.
	(resolve_omp_clauses): Perform duplicate checking only for non-derived
	type component accesses (plain variables and arrays or array sections).
	Support component refs.
	* trans-openmp.c (gfc_omp_privatize_by_reference): Support component
	refs.
	(gfc_trans_omp_clauses_1): Support component refs, attach and detach
	clauses.

	gcc/
	* gimplify.c (gimplify_omp_var_data): Add GOVD_MAP_HAS_ATTACHMENTS.
	(insert_struct_component_mapping): Support derived-type member mappings
	for arrays with descriptors which use GOMP_MAP_TO_PSET.
	(gimplify_scan_omp_clauses): Rewrite GOMP_MAP_ALWAYS_POINTER to
	GOMP_MAP_ATTACH for OpenACC struct/derived-type component pointers.
	Handle pointer mappings that use GOMP_MAP_TO_PSET.  Handle attach/detach
	clauses.
	(gimplify_adjust_omp_clauses_1): Skip adjustments for explicit
	attach/detach clauses.
	(gimplify_omp_target_update): Handle finalize for detach.

	gcc/testsuite/
	* c-c++-common/goacc/mdc-1.c: Update scan tests.
	* gfortran.dg/goacc/data-clauses.f95: Remove expected errors.
	* gfortran.dg/goacc/derived-types.f90: Likewise.
	* gfortran.dg/goacc/enter-exit-data.f95: Likewise.

	libgomp/
	* libgomp.h (struct target_var_desc): Add do_detach flag.
	(struct splay_tree_key_s): Add attach_count field.
	(struct gomp_coalesce_buf): Add forward declaration.
	(gomp_map_val, gomp_attach_pointer, gomp_detach_pointer): Add
	prototypes.
	(gomp_unmap_vars): Add finalize parameter.
	* libgomp.map (OACC_2.6): New section. Add acc_attach, acc_attach_async,
	acc_detach, acc_detach_async, acc_detach_finalize,
	acc_detach_finalize_async.
	* oacc-async.c (goacc_async_copyout_unmap_vars): Add finalize parameter.
	Pass to gomp_unmap_vars_async.
	* oacc-init.c (acc_shutdown_1): Update call to gomp_unmap_vars.
	* oacc-int.h (goacc_async_copyout_unmap_vars): Add finalize parameter.
	* oacc-mem.c (acc_unmap_data): Update call to gomp_unmap_vars.
	(present_create_copy): Initialise attach_count.
	(delete_copyout): Likewise.
	(gomp_acc_insert_pointer): Likewise.
	(gomp_acc_remove_pointer): Update calls to gomp_unmap_vars,
	goacc_async_copyout_unmap_vars.
	(acc_attach_async, acc_attach, goacc_detach_internal, acc_detach)
	(acc_detach_async, acc_detach_finalize, acc_detach_finalize_async): New
	functions.
	* oacc-parallel.c (find_pointer): Support attach/detach.  Make a little
	more strict.
	(GOACC_parallel_keyed_internal): Use gomp_map_val to calculate device
	addresses.  Update calls to gomp_unmap_vars,
	goacc_async_copyout_unmap_vars.
	(GOACC_data_end): Update call to gomp_unmap_vars.
	(GOACC_enter_exit_data): Support attach/detach and GOMP_MAP_STRUCT.
	* openacc.h (acc_attach, acc_attach_async, acc_detach)
	(acc_detach_async, acc_detach_finalize, acc_detach_finalize_async): Add
	prototypes.
	* target.c (limits.h): Include.
	(gomp_map_vars_existing): Initialise do_detach field of tgt_var_desc.
	(gomp_attach_pointer, gomp_detach_pointer): New functions.
	(gomp_map_val): Make global.
	(gomp_map_vars_async): Support attach and detach.
	(gomp_remove_var): Free attach count array if present.
	(gomp_unmap_vars): Add finalize parameter.  Update call to
	gomp_unmap_vars_async.
	(gomp_unmap_vars_async): Add finalize parameter.  Add pointer detaching
	support.
	(GOMP_target): Update call to gomp_unmap_vars.
	(GOMP_target_ext): Likewise.
	(gomp_exit_data): Free attach count array if present.
	(gomp_target_task_fn): Update call to gomp_unmap_vars.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c: New test.
	* testsuite/libgomp.oacc-fortran/deep-copy-1.c: New test.
	* testsuite/libgomp.oacc-fortran/deep-copy-2.c: New test.
	* testsuite/libgomp.oacc-fortran/deep-copy-3.c: New test.
	* testsuite/libgomp.oacc-fortran/deep-copy-4.c: New test.
	* testsuite/libgomp.oacc-fortran/deep-copy-5.c: New test.
	* testsuite/libgomp.oacc-fortran/deep-copy-6.c: New test.
	* testsuite/libgomp.oacc-fortran/deep-copy-7.c: New test.
	* testsuite/libgomp.oacc-fortran/deep-copy-8.c: New test.
	* testsuite/libgomp.oacc-fortran/derived-type-1.f90: Update test to use
	stop <n>.
	* testsuite/libgomp.oacc-fortran/update-2.f90: Likewise.
---
 gcc/c/c-parser.c                                   |   15 +-
 gcc/c/c-typeck.c                                   |    4 +
 gcc/cp/parser.c                                    |   16 +-
 gcc/cp/semantics.c                                 |    6 +-
 gcc/fortran/gfortran.h                             |    2 +
 gcc/fortran/openmp.c                               |  126 +++++++++-----
 gcc/fortran/trans-openmp.c                         |  143 ++++++---------
 gcc/gimplify.c                                     |   82 +++++++--
 gcc/testsuite/c-c++-common/goacc/mdc-1.c           |   10 +-
 gcc/testsuite/gfortran.dg/goacc/data-clauses.f95   |   38 ++--
 gcc/testsuite/gfortran.dg/goacc/derived-types.f90  |   23 +--
 .../gfortran.dg/goacc/enter-exit-data.f95          |   24 ++--
 libgomp/libgomp.h                                  |   23 ++-
 libgomp/libgomp.map                                |   10 +
 libgomp/oacc-async.c                               |    4 +-
 libgomp/oacc-init.c                                |    2 +-
 libgomp/oacc-int.h                                 |    2 +-
 libgomp/oacc-mem.c                                 |   86 +++++++++-
 libgomp/oacc-parallel.c                            |  190 +++++++++++++++-----
 libgomp/openacc.h                                  |    6 +
 libgomp/target.c                                   |  189 ++++++++++++++++++-
 .../libgomp.oacc-c-c++-common/deep-copy-1.c        |   24 +++
 .../libgomp.oacc-c-c++-common/deep-copy-2.c        |   29 +++
 .../libgomp.oacc-c-c++-common/deep-copy-3.c        |   34 ++++
 .../libgomp.oacc-c-c++-common/deep-copy-4.c        |   87 +++++++++
 .../libgomp.oacc-c-c++-common/deep-copy-5.c        |   81 +++++++++
 .../testsuite/libgomp.oacc-fortran/deep-copy-1.f90 |   35 ++++
 .../testsuite/libgomp.oacc-fortran/deep-copy-2.f90 |   33 ++++
 .../testsuite/libgomp.oacc-fortran/deep-copy-3.f90 |   34 ++++
 .../testsuite/libgomp.oacc-fortran/deep-copy-4.f90 |   49 +++++
 .../testsuite/libgomp.oacc-fortran/deep-copy-5.f90 |   57 ++++++
 .../testsuite/libgomp.oacc-fortran/deep-copy-6.f90 |   61 +++++++
 .../testsuite/libgomp.oacc-fortran/deep-copy-7.f90 |   89 +++++++++
 .../testsuite/libgomp.oacc-fortran/deep-copy-8.f90 |   41 +++++
 .../libgomp.oacc-fortran/derived-type-1.f90        |    6 +-
 .../testsuite/libgomp.oacc-fortran/update-2.f90    |   44 +++---
 36 files changed, 1407 insertions(+), 298 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-1.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-4.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-5.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-7.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-8.f90


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0003-og8-OpenACC-2.6-manual-deep-copy-support-attach-deta.patch --]
[-- Type: text/x-patch; name="0003-og8-OpenACC-2.6-manual-deep-copy-support-attach-deta.patch", Size: 91395 bytes --]

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index ffc5fe9..4b6ab84 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -11553,9 +11553,12 @@ c_parser_omp_variable_list (c_parser *parser,
 	    case OMP_CLAUSE_MAP:
 	    case OMP_CLAUSE_FROM:
 	    case OMP_CLAUSE_TO:
-	      while (c_parser_next_token_is (parser, CPP_DOT))
+	      while (c_parser_next_token_is (parser, CPP_DOT)
+		     || c_parser_next_token_is (parser, CPP_DEREF))
 		{
 		  location_t op_loc = c_parser_peek_token (parser)->location;
+		  if (c_parser_next_token_is (parser, CPP_DEREF))
+		    t = build_simple_mem_ref (t);
 		  c_parser_consume_token (parser);
 		  if (!c_parser_next_token_is (parser, CPP_NAME))
 		    {
@@ -11679,7 +11682,7 @@ c_parser_omp_var_list_parens (c_parser *parser, enum omp_clause_code kind,
 }
 
 /* OpenACC 2.5:
-   attach (variable-list )
+   attach ( variable-list )
    copy ( variable-list )
    copyin ( variable-list )
    copyout ( variable-list )
@@ -14090,15 +14093,15 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask,
 	  clauses = c_parser_oacc_clause_async (parser, clauses);
 	  c_name = "async";
 	  break;
+	case PRAGMA_OACC_CLAUSE_ATTACH:
+	  clauses = c_parser_oacc_data_clause (parser, c_kind, clauses);
+	  c_name = "attach";
+	  break;
 	case PRAGMA_OACC_CLAUSE_AUTO:
 	  clauses = c_parser_oacc_simple_clause (parser, here, OMP_CLAUSE_AUTO,
 						 clauses);
 	  c_name = "auto";
 	  break;
-	case PRAGMA_OACC_CLAUSE_ATTACH:
-	  clauses = c_parser_oacc_data_clause (parser, c_kind, clauses);
-	  c_name = "attach";
-	  break;
 	case PRAGMA_OACC_CLAUSE_BIND:
 	  clauses = c_parser_oacc_clause_bind (parser, clauses);
 	  c_name = "bind";
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index ab6819c..1a18867 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -12446,6 +12446,8 @@ handle_omp_array_sections_1 (tree c, tree t, vec<tree> &types,
 		}
 	      t = TREE_OPERAND (t, 0);
 	    }
+	  if (TREE_CODE (t) == MEM_REF)
+	    t = TREE_OPERAND (t, 0);
 	}
       if (!VAR_P (t) && TREE_CODE (t) != PARM_DECL)
 	{
@@ -13750,6 +13752,8 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 		}
 	      if (remove)
 		break;
+	      if (TREE_CODE (t) == MEM_REF)
+		t = TREE_OPERAND (t, 0);
 	      if (VAR_P (t) || TREE_CODE (t) == PARM_DECL)
 		{
 		  if (bitmap_bit_p (&map_field_head, DECL_UID (t)))
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 8161d63..79c03d2 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -31563,15 +31563,19 @@ cp_parser_omp_var_list_no_open (cp_parser *parser, enum omp_clause_code kind,
 	    case OMP_CLAUSE_MAP:
 	    case OMP_CLAUSE_FROM:
 	    case OMP_CLAUSE_TO:
-	      while (cp_lexer_next_token_is (parser->lexer, CPP_DOT))
+	      while (cp_lexer_next_token_is (parser->lexer, CPP_DOT)
+		     || cp_lexer_next_token_is (parser->lexer, CPP_DEREF))
 		{
+		  cpp_ttype ttype
+		    = cp_lexer_next_token_is (parser->lexer, CPP_DOT)
+		      ? CPP_DOT : CPP_DEREF;
 		  location_t loc
 		    = cp_lexer_peek_token (parser->lexer)->location;
 		  cp_id_kind idk = CP_ID_KIND_NONE;
 		  cp_lexer_consume_token (parser->lexer);
 		  decl = convert_from_reference (decl);
 		  decl
-		    = cp_parser_postfix_dot_deref_expression (parser, CPP_DOT,
+		    = cp_parser_postfix_dot_deref_expression (parser, ttype,
 							      decl, false,
 							      &idk, loc);
 		}
@@ -33858,15 +33862,15 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask,
 	  clauses = cp_parser_oacc_clause_async (parser, clauses);
 	  c_name = "async";
 	  break;
+	case PRAGMA_OACC_CLAUSE_ATTACH:
+	  clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses);
+	  c_name = "attach";
+	  break;
 	case PRAGMA_OACC_CLAUSE_AUTO:
 	  clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_AUTO,
 						 clauses, here);
 	  c_name = "auto";
 	  break;
-	case PRAGMA_OACC_CLAUSE_ATTACH:
-	  clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses);
-	  c_name = "attach";
-	  break;
 	case PRAGMA_OACC_CLAUSE_BIND:
 	  clauses = cp_parser_oacc_clause_bind (parser, clauses);
 	  c_name = "bind";
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 796ae7f..7cbcb34 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -6724,7 +6724,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 		error ("%qE is not a variable in %<depend%> clause", t);
 	      remove = true;
 	    }
-	  else if (ort != C_ORT_ACC && t == current_class_ptr)
+	  else if (t == current_class_ptr)
 	    {
 	      error ("%<this%> allowed in OpenMP only in %<declare simd%>"
 		     " clauses");
@@ -6810,6 +6810,10 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 	      t = TREE_OPERAND (t, 0);
 	      OMP_CLAUSE_DECL (c) = t;
 	    }
+	  if (ort == C_ORT_ACC
+	      && TREE_CODE (t) == COMPONENT_REF
+	      && TREE_CODE (TREE_OPERAND (t, 0)) == INDIRECT_REF)
+	    t = TREE_OPERAND (TREE_OPERAND (t, 0), 0);
 	  if (TREE_CODE (t) == COMPONENT_REF
 	      && ((ort & C_ORT_OMP_DECLARE_SIMD) == C_ORT_OMP
 		  || ort == C_ORT_ACC)
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 3a9e45b..14b5def 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -1183,10 +1183,12 @@ enum gfc_omp_depend_op
 enum gfc_omp_map_op
 {
   OMP_MAP_ALLOC,
+  OMP_MAP_ATTACH,
   OMP_MAP_TO,
   OMP_MAP_FROM,
   OMP_MAP_TOFROM,
   OMP_MAP_DELETE,
+  OMP_MAP_DETACH,
   OMP_MAP_FORCE_ALLOC,
   OMP_MAP_FORCE_TO,
   OMP_MAP_FORCE_FROM,
diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 654ceb6..f120e3d 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -808,7 +808,7 @@ enum omp_mask1
   OMP_MASK1_LAST
 };
 
-/* OpenACC 2.0 specific clauses. */
+/* OpenACC 2.0+ specific clauses. */
 enum omp_mask2
 {
   OMP_CLAUSE_ASYNC,
@@ -837,6 +837,8 @@ enum omp_mask2
   OMP_CLAUSE_IF_PRESENT,
   OMP_CLAUSE_FINALIZE,
   OMP_CLAUSE_DEVICE_TYPE,
+  OMP_CLAUSE_ATTACH,
+  OMP_CLAUSE_DETACH,
   /* This must come last.  */
   OMP_MASK2_LAST
 };
@@ -964,10 +966,18 @@ static match
 gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask,
 		       const omp_mask dtype_mask,
 		       bool first = true, bool needs_space = true,
-		       bool openacc = false, bool allow_derived = false)
+		       bool openacc = false)
 {
   gfc_omp_clauses *base_clauses, *c = gfc_get_omp_clauses ();
   locus old_loc;
+  /* Determine whether we're dealing with an OpenACC directive that permits
+     derived type member accesses.  This in particular disallows
+     "!$acc declare" from using such accesses, because it's not clear if/how
+     that should work.  */
+  bool allow_derived = (openacc
+			&& ((mask & OMP_CLAUSE_ATTACH)
+			    || (mask & OMP_CLAUSE_DETACH)
+			    || (mask & OMP_CLAUSE_HOST_SELF)));
 
   base_clauses = c;
 
@@ -1043,6 +1053,12 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask,
 	      needs_space = true;
 	      continue;
 	    }
+	  if ((mask & OMP_CLAUSE_ATTACH)
+	      && gfc_match ("attach ( ") == MATCH_YES
+	      && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP],
+					   OMP_MAP_ATTACH, false,
+					   allow_derived))
+	    continue;
 	  break;
 	case 'b':
 	  if ((mask & OMP_CLAUSE_BIND) && c->routine_bind == NULL
@@ -1098,8 +1114,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask,
 	  if ((mask & OMP_CLAUSE_COPYOUT)
 	      && gfc_match ("copyout ( ") == MATCH_YES
 	      && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP],
-					   OMP_MAP_FROM, true,
-					   allow_derived))
+					   OMP_MAP_FROM, true, allow_derived))
 	    continue;
 	  if ((mask & OMP_CLAUSE_COPYPRIVATE)
 	      && gfc_match_omp_variable_list ("copyprivate (",
@@ -1109,8 +1124,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask,
 	  if ((mask & OMP_CLAUSE_CREATE)
 	      && gfc_match ("create ( ") == MATCH_YES
 	      && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP],
-					   OMP_MAP_ALLOC, true,
-					   allow_derived))
+					   OMP_MAP_ALLOC, true, allow_derived))
 	    continue;
 	  break;
 	case 'd':
@@ -1190,6 +1204,12 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask,
 	      else
 		gfc_current_locus = old_loc;
 	    }
+	  if ((mask & OMP_CLAUSE_DETACH)
+	      && gfc_match ("detach ( ") == MATCH_YES
+	      && gfc_match_omp_map_clause (&c->lists[OMP_LIST_MAP],
+					   OMP_MAP_DETACH, false,
+					   allow_derived))
+	    continue;
 	  if ((mask & OMP_CLAUSE_DEVICE)
 	      && !openacc
 	      && c->device == NULL
@@ -1784,8 +1804,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask,
 
 	      if (gfc_match_omp_variable_list (" :",
 					       &c->lists[OMP_LIST_REDUCTION],
-					       false, NULL, &head,
-					       openacc) == MATCH_YES)
+					       false, NULL, &head, openacc,
+					       allow_derived) == MATCH_YES)
 		{
 		  gfc_omp_namelist *n;
 		  if (rop == OMP_REDUCTION_NONE)
@@ -2053,7 +2073,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask,
    | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT				\
    | OMP_CLAUSE_DEVICEPTR						\
    | OMP_CLAUSE_PRIVATE | OMP_CLAUSE_FIRSTPRIVATE			\
-   | OMP_CLAUSE_DEFAULT)
+   | OMP_CLAUSE_DEFAULT | OMP_CLAUSE_ATTACH)
 #define OACC_KERNELS_CLAUSES \
   (omp_mask (OMP_CLAUSE_ASYNC) | OMP_CLAUSE_WAIT			\
    | OMP_CLAUSE_NUM_GANGS | OMP_CLAUSE_NUM_WORKERS			\
@@ -2063,12 +2083,12 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask,
    | OMP_CLAUSE_COPY | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT		\
    | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT				\
    | OMP_CLAUSE_DEVICEPTR						\
-   | OMP_CLAUSE_DEFAULT)
+   | OMP_CLAUSE_DEFAULT | OMP_CLAUSE_ATTACH)
 #define OACC_DATA_CLAUSES \
   (omp_mask (OMP_CLAUSE_IF)						\
    | OMP_CLAUSE_COPY | OMP_CLAUSE_COPYIN | OMP_CLAUSE_COPYOUT		\
    | OMP_CLAUSE_CREATE | OMP_CLAUSE_PRESENT				\
-   | OMP_CLAUSE_DEVICEPTR)
+   | OMP_CLAUSE_DEVICEPTR | OMP_CLAUSE_ATTACH)
 #define OACC_HOST_DATA_CLAUSES \
   (omp_mask (OMP_CLAUSE_USE_DEVICE))
 #define OACC_LOOP_CLAUSES \
@@ -2098,12 +2118,12 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask,
 #define OACC_ENTER_DATA_CLAUSES \
   (omp_mask (OMP_CLAUSE_IF)						\
    | OMP_CLAUSE_ASYNC | OMP_CLAUSE_WAIT					\
-   | OMP_CLAUSE_COPYIN | OMP_CLAUSE_CREATE)
+   | OMP_CLAUSE_COPYIN | OMP_CLAUSE_CREATE | OMP_CLAUSE_ATTACH)
 #define OACC_EXIT_DATA_CLAUSES \
   (omp_mask (OMP_CLAUSE_IF)						\
    | OMP_CLAUSE_ASYNC | OMP_CLAUSE_WAIT					\
    | OMP_CLAUSE_COPYOUT | OMP_CLAUSE_DELETE				\
-   | OMP_CLAUSE_FINALIZE)
+   | OMP_CLAUSE_FINALIZE | OMP_CLAUSE_DETACH)
 #define OACC_ROUTINE_CLAUSES \
   (omp_mask (OMP_CLAUSE_GANG) | OMP_CLAUSE_WORKER | OMP_CLAUSE_VECTOR	\
    | OMP_CLAUSE_SEQ							\
@@ -2139,12 +2159,10 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, omp_mask mask,
 
 
 static match
-match_acc (gfc_exec_op op, const omp_mask mask, const omp_mask dtype_mask,
-	   bool derived_types=false)
+match_acc (gfc_exec_op op, const omp_mask mask, const omp_mask dtype_mask)
 {
   gfc_omp_clauses *c;
-  if (gfc_match_omp_clauses (&c, mask, dtype_mask, false, false, true,
-			     derived_types)
+  if (gfc_match_omp_clauses (&c, mask, dtype_mask, false, false, true)
       != MATCH_YES)
     return MATCH_ERROR;
   new_st.op = op;
@@ -2309,7 +2327,8 @@ gfc_match_oacc_update (void)
 
   if (gfc_match_omp_clauses (&c, OACC_UPDATE_CLAUSES,
 			     OACC_UPDATE_CLAUSE_DEVICE_TYPE_MASK, false,
-			     false, true, true) != MATCH_YES)
+			     false, true)
+      != MATCH_YES)
     return MATCH_ERROR;
 
   if (!c->lists[OMP_LIST_MAP])
@@ -2329,7 +2348,7 @@ match
 gfc_match_oacc_enter_data (void)
 {
   return match_acc (EXEC_OACC_ENTER_DATA, OACC_ENTER_DATA_CLAUSES,
-		    OMP_MASK2_LAST, true);
+		    OMP_MASK2_LAST);
 }
 
 
@@ -2337,7 +2356,7 @@ match
 gfc_match_oacc_exit_data (void)
 {
   return match_acc (EXEC_OACC_EXIT_DATA, OACC_EXIT_DATA_CLAUSES,
-		    OMP_MASK2_LAST, true);
+		    OMP_MASK2_LAST);
 }
 
 
@@ -4017,9 +4036,6 @@ resolve_nonnegative_int_expr (gfc_expr *expr, const char *clause)
 static void
 check_symbol_not_pointer (gfc_symbol *sym, locus loc, const char *name)
 {
-  if (sym->ts.type == BT_DERIVED && sym->attr.pointer)
-    gfc_error ("POINTER object %qs of derived type in %s clause at %L",
-	       sym->name, name, &loc);
   if (sym->ts.type == BT_DERIVED && sym->attr.cray_pointer)
     gfc_error ("Cray pointer object %qs of derived type in %s clause at %L",
 	       sym->name, name, &loc);
@@ -4060,9 +4076,6 @@ check_array_not_assumed (gfc_symbol *sym, locus loc, const char *name)
 static void
 resolve_oacc_data_clauses (gfc_symbol *sym, locus loc, const char *name)
 {
-  if (sym->ts.type == BT_DERIVED && sym->attr.allocatable)
-    gfc_error ("ALLOCATABLE object %qs of derived type in %s clause at %L",
-	       sym->name, name, &loc);
   if ((sym->ts.type == BT_ASSUMED && sym->attr.allocatable)
       || (sym->ts.type == BT_CLASS && CLASS_DATA (sym)
 	  && CLASS_DATA (sym)->attr.allocatable))
@@ -4408,11 +4421,23 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 	&& (list != OMP_LIST_REDUCTION || !openacc))
       for (n = omp_clauses->lists[list]; n; n = n->next)
 	{
-	  if (n->sym->mark)
-	    gfc_error ("Symbol %qs present on multiple clauses at %L",
-		       n->sym->name, &n->where);
-	  else
-	    n->sym->mark = 1;
+	  bool array_only_p = true;
+	  /* Disallow duplicate bare variable references and multiple
+	     subarrays of the same array here, but allow multiple components of
+	     the same (e.g. derived-type) variable.  For the latter, duplicate
+	     components are detected elsewhere.  */
+	  if (openacc && n->expr && n->expr->expr_type == EXPR_VARIABLE)
+	    for (gfc_ref *ref = n->expr->ref; ref; ref = ref->next)
+	      if (ref->type != REF_ARRAY)
+		array_only_p = false;
+	  if (array_only_p)
+	    {
+	      if (n->sym->mark)
+		gfc_error ("Symbol %qs present on multiple clauses at %L",
+			   n->sym->name, &n->where);
+	      else
+		n->sym->mark = 1;
+	    }
 	}
 
   gcc_assert (OMP_LIST_LASTPRIVATE == OMP_LIST_FIRSTPRIVATE + 1);
@@ -4603,26 +4628,41 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 				 "are allowed on ORDERED directive at %L",
 				 &n->where);
 		  }
+		gfc_ref *array_ref = NULL;
+		bool resolved = false;
 		if (n->expr)
 		  {
-		    if (!gfc_resolve_expr (n->expr)
+		    array_ref = n->expr->ref;
+		    resolved = gfc_resolve_expr (n->expr);
+
+		    /* Look through component refs to find last array
+		       reference.  */
+		    while (resolved
+			   && array_ref
+			   && (array_ref->type == REF_COMPONENT
+			       || (array_ref->type == REF_ARRAY
+				   && array_ref->next
+			           && array_ref->next->type == REF_COMPONENT)))
+		      array_ref = array_ref->next;
+		  }
+		if (array_ref
+		    || (n->expr
+			&& (!resolved || n->expr->expr_type != EXPR_VARIABLE)))
+		  {
+		    if (!resolved
 			|| n->expr->expr_type != EXPR_VARIABLE
-			|| n->expr->ref == NULL
-			|| n->expr->ref->next
-			|| n->expr->ref->type != REF_ARRAY)
-		      {
-			if (n->sym->ts.type != BT_DERIVED)
-			  gfc_error ("%qs in %s clause at %L is not a proper "
-				     "array section", n->sym->name, name,
-				     &n->where);
-		      }
-		    else if (n->expr->ref->u.ar.codimen)
+			|| array_ref->next
+			|| array_ref->type != REF_ARRAY)
+		      gfc_error ("%qs in %s clause at %L is not a proper "
+				 "array section", n->sym->name, name,
+				 &n->where);
+		    else if (array_ref->u.ar.codimen)
 		      gfc_error ("Coarrays not supported in %s clause at %L",
 				 name, &n->where);
 		    else
 		      {
 			int i;
-			gfc_array_ref *ar = &n->expr->ref->u.ar;
+			gfc_array_ref *ar = &array_ref->u.ar;
 			for (i = 0; i < ar->dimen; i++)
 			  if (ar->stride[i])
 			    {
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 8840fd2..98f40d1 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -62,6 +62,9 @@ gfc_omp_privatize_by_reference (const_tree decl)
 
   if (TREE_CODE (type) == POINTER_TYPE)
     {
+      while (TREE_CODE (decl) == COMPONENT_REF)
+        decl = TREE_OPERAND (decl, 1);
+
       /* Array POINTER/ALLOCATABLE have aggregate types, all user variables
 	 that have POINTER_TYPE type and aren't scalar pointers, scalar
 	 allocatables, Cray pointees or C pointers are supposed to be
@@ -2121,69 +2124,35 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 	      tree decl = gfc_get_symbol_decl (n->sym);
 	      if (DECL_P (decl))
 		TREE_ADDRESSABLE (decl) = 1;
-	      /* Handle derived-typed members for OpenACC Update.  */
-	      if (n->sym->ts.type == BT_DERIVED
-		  && n->expr != NULL && n->expr->ref != NULL
-		  && (n->expr->ref->next == NULL
-		      || (n->expr->ref->next != NULL
-			  && n->expr->ref->next->type == REF_ARRAY
-			  && n->expr->ref->next->u.ar.type == AR_FULL))
-		  && (n->expr->ref->type == REF_ARRAY
-		      && n->expr->ref->u.ar.type != AR_SECTION))
-		{
-		  gfc_ref *ref = n->expr->ref;
-		  gfc_component *c = ref->u.c.component;
-		  tree field;
-		  tree context;
-		  tree ptr;
-		  tree type;
-		  tree scratch;
 
-		  if (c->backend_decl == NULL_TREE
-		      && ref->u.c.sym != NULL)
-		    gfc_get_derived_type (ref->u.c.sym);
+	      gfc_ref *ref = n->expr ? n->expr->ref : NULL;
+	      symbol_attribute *sym_attr = &n->sym->attr;
+	      gomp_map_kind ptr_map_kind = GOMP_MAP_POINTER;
 
-		  field = c->backend_decl;
-		  gcc_assert (field && TREE_CODE (field) == FIELD_DECL);
-		  context = DECL_FIELD_CONTEXT (field);
-
-		  type = TREE_TYPE (decl);
-		  if (POINTER_TYPE_P (type))
-		    type = TREE_TYPE (type);
+	      if (ref && n->sym->ts.type == BT_DERIVED)
+	        {
+		  if (gfc_omp_privatize_by_reference (decl))
+		    decl = build_fold_indirect_ref (decl);
 
-		  if (context != type)
+		  for (; ref && ref->type == REF_COMPONENT; ref = ref->next)
 		    {
-		      tree f2 = c->norestrict_decl;
-		      if (!f2 || DECL_FIELD_CONTEXT (f2) != type)
-			for (f2 = TYPE_FIELDS (TREE_TYPE (decl)); f2;
-			     f2 = DECL_CHAIN (f2))
-			  if (TREE_CODE (f2) == FIELD_DECL
-			      && DECL_NAME (f2) == DECL_NAME (field))
-			    break;
-		      gcc_assert (f2);
-		      c->norestrict_decl = f2;
-		      field = f2;
+		      tree field = ref->u.c.component->backend_decl;
+		      gcc_assert (field && TREE_CODE (field) == FIELD_DECL);
+		      decl = fold_build3 (COMPONENT_REF, TREE_TYPE (field),
+					  decl, field, NULL_TREE);
+		      sym_attr = &ref->u.c.component->attr;
 		    }
 
-		  if (POINTER_TYPE_P (TREE_TYPE (decl)))
-		    decl = build_fold_indirect_ref_loc (input_location,
-							decl);
-
-		  scratch = fold_build3_loc (input_location, COMPONENT_REF,
-					     TREE_TYPE (field), decl, field,
-					     NULL_TREE);
-		  type = TREE_TYPE (scratch);
-		  ptr = gfc_create_var (pvoid_type_node, NULL);
-		  scratch = fold_convert (pvoid_type_node,
-					  build_fold_addr_expr (scratch));
-		  gfc_add_modify (block, ptr, scratch);
-		  OMP_CLAUSE_SIZE (node) = TYPE_SIZE_UNIT (type);
-		  OMP_CLAUSE_DECL (node) = build_fold_indirect_ref (ptr);
+		  ptr_map_kind = GOMP_MAP_ALWAYS_POINTER;
 		}
-	      else if ((n->sym->ts.type == BT_DERIVED && n->expr == NULL)
-		       || (n->expr == NULL
-			   || n->expr->ref->u.ar.type == AR_FULL))
+
+	      if (ref == NULL || ref->u.ar.type == AR_FULL)
 		{
+		  tree field = decl;
+
+		  while (TREE_CODE (field) == COMPONENT_REF)
+		    field = TREE_OPERAND (field, 1);
+
 		  if (POINTER_TYPE_P (TREE_TYPE (decl))
 		      && n->u.map_op == OMP_MAP_FORCE_DEVICEPTR)
 		    {
@@ -2192,18 +2161,18 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 		    }
 		  else if (POINTER_TYPE_P (TREE_TYPE (decl))
 		      && (gfc_omp_privatize_by_reference (decl)
-			  || GFC_DECL_GET_SCALAR_POINTER (decl)
-			  || GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)
-			  || GFC_DECL_CRAY_POINTEE (decl)
+			  || GFC_DECL_GET_SCALAR_POINTER (field)
+			  || GFC_DECL_GET_SCALAR_ALLOCATABLE (field)
+			  || GFC_DECL_CRAY_POINTEE (field)
 			  || GFC_DESCRIPTOR_TYPE_P
-					(TREE_TYPE (TREE_TYPE (decl)))))
+					(TREE_TYPE (TREE_TYPE (field)))))
 		    {
 		      tree orig_decl = decl;
 		      enum gomp_map_kind gmk = GOMP_MAP_FIRSTPRIVATE_POINTER;
 		      if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)
 			  && (n->sym->attr.oacc_declare_create)
 			  && clauses->update_allocatable)
-			gmk = GOMP_MAP_ALWAYS_POINTER;
+			gmk = ptr_map_kind;
 		      node4 = build_omp_clause (input_location,
 						OMP_CLAUSE_MAP);
 		      OMP_CLAUSE_SET_MAP_KIND (node4, gmk);
@@ -2216,7 +2185,7 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 			{
 			  node3 = build_omp_clause (input_location,
 						    OMP_CLAUSE_MAP);
-			  OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER);
+			  OMP_CLAUSE_SET_MAP_KIND (node3, ptr_map_kind);
 			  OMP_CLAUSE_DECL (node3) = decl;
 			  OMP_CLAUSE_SIZE (node3) = size_int (0);
 			  decl = build_fold_indirect_ref (decl);
@@ -2225,7 +2194,9 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 		      if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl)))
 			OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER);
 		    }
-		  if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl)))
+		  if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))
+		      && n->u.map_op != OMP_MAP_ATTACH
+		      && n->u.map_op != OMP_MAP_DETACH)
 		    {
 		      tree type = TREE_TYPE (decl);
 		      tree ptr = gfc_conv_descriptor_data_get (decl);
@@ -2238,14 +2209,16 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 		      OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type);
 		      node3 = build_omp_clause (input_location,
 						OMP_CLAUSE_MAP);
-		      OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER);
+		      OMP_CLAUSE_SET_MAP_KIND (node3, ptr_map_kind);
 		      OMP_CLAUSE_DECL (node3)
 			= gfc_conv_descriptor_data_get (decl);
+		      if (ptr_map_kind == GOMP_MAP_ALWAYS_POINTER)
+		        STRIP_NOPS (OMP_CLAUSE_DECL (node3));
 		      OMP_CLAUSE_SIZE (node3) = size_int (0);
 
 		      /* We have to check for n->sym->attr.dimension because
 			 of scalar coarrays.  */
-		      if (n->sym->attr.pointer && n->sym->attr.dimension)
+		      if (sym_attr->pointer && sym_attr->dimension)
 			{
 			  stmtblock_t cond_block;
 			  tree size
@@ -2275,11 +2248,11 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 							     else_b));
 			  OMP_CLAUSE_SIZE (node) = size;
 			}
-		      else if (n->sym->attr.dimension)
+		      else if (sym_attr->dimension)
 			OMP_CLAUSE_SIZE (node)
 			  = gfc_full_array_size (block, decl,
 						 GFC_TYPE_ARRAY_RANK (type));
-		      if (n->sym->attr.dimension)
+		      if (sym_attr->dimension)
 			{
 			  tree elemsz
 			    = TYPE_SIZE_UNIT (gfc_get_element_type (type));
@@ -2292,31 +2265,17 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 		  else
 		    OMP_CLAUSE_DECL (node) = decl;
 		}
-	      else
+	      else if (ref)
 		{
 		  tree ptr, ptr2;
 		  gfc_init_se (&se, NULL);
-		  if ((n->sym->ts.type == BT_DERIVED
-		       && n->expr->rank == 0)
-		      || (n->sym->ts.type != BT_DERIVED
-			  && n->expr->ref->u.ar.type == AR_ELEMENT))
+		  if (ref->u.ar.type == AR_ELEMENT)
 		    {
 		      gfc_conv_expr_reference (&se, n->expr);
 		      gfc_add_block_to_block (block, &se.pre);
 		      ptr = se.expr;
-		      tree type = TREE_TYPE (ptr);
-		      if (n->sym->ts.type == BT_DERIVED)
-			{
-			  tree t = gfc_create_var (build_pointer_type
-						   (void_type_node),
-						   NULL);
-			  ptr = fold_convert (pvoid_type_node, ptr);
-			  gfc_add_modify (block, t, ptr);
-			  ptr = t;
-			  type = TREE_TYPE (type);
-			}
 		      OMP_CLAUSE_SIZE (node)
-			= TYPE_SIZE_UNIT (type);
+			= TYPE_SIZE_UNIT (TREE_TYPE (ptr));
 		    }
 		  else
 		    {
@@ -2337,14 +2296,12 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 		  gfc_add_block_to_block (block, &se.post);
 		  OMP_CLAUSE_DECL (node) = build_fold_indirect_ref (ptr);
 
-		  if (n->sym->ts.type == BT_DERIVED)
-		    goto finalize_map_clause;
 		  if (POINTER_TYPE_P (TREE_TYPE (decl))
 		      && GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (TREE_TYPE (decl))))
 		    {
 		      node4 = build_omp_clause (input_location,
 						OMP_CLAUSE_MAP);
-		      OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER);
+		      OMP_CLAUSE_SET_MAP_KIND (node4, ptr_map_kind);
 		      OMP_CLAUSE_DECL (node4) = decl;
 		      OMP_CLAUSE_SIZE (node4) = size_int (0);
 		      decl = build_fold_indirect_ref (decl);
@@ -2361,9 +2318,11 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 		      OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type);
 		      node3 = build_omp_clause (input_location,
 						OMP_CLAUSE_MAP);
-		      OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER);
+		      OMP_CLAUSE_SET_MAP_KIND (node3, ptr_map_kind);
 		      OMP_CLAUSE_DECL (node3)
 			= gfc_conv_descriptor_data_get (decl);
+		      if (ptr_map_kind == GOMP_MAP_ALWAYS_POINTER)
+		        STRIP_NOPS (OMP_CLAUSE_DECL (node3));
 		    }
 		  else
 		    {
@@ -2376,7 +2335,7 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 			}
 		      node3 = build_omp_clause (input_location,
 						OMP_CLAUSE_MAP);
-		      OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_POINTER);
+		      OMP_CLAUSE_SET_MAP_KIND (node3, ptr_map_kind);
 		      OMP_CLAUSE_DECL (node3) = decl;
 		    }
 		  ptr2 = fold_convert (sizetype, ptr2);
@@ -2384,11 +2343,16 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 		    = fold_build2 (MINUS_EXPR, sizetype, ptr, ptr2);
 		finalize_map_clause:;
 		}
+	      else
+	        gcc_unreachable ();
 	      switch (n->u.map_op)
 		{
 		case OMP_MAP_ALLOC:
 		  OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_ALLOC);
 		  break;
+		case OMP_MAP_ATTACH:
+		  OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_ATTACH);
+		  break;
 		case OMP_MAP_TO:
 		  OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_TO);
 		  break;
@@ -2413,6 +2377,9 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 		case OMP_MAP_DELETE:
 		  OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DELETE);
 		  break;
+		case OMP_MAP_DETACH:
+		  OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_DETACH);
+		  break;
 		case OMP_MAP_FORCE_ALLOC:
 		  OMP_CLAUSE_SET_MAP_KIND (node, GOMP_MAP_FORCE_ALLOC);
 		  break;
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 824e020..40bf586 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -111,6 +111,10 @@ enum gimplify_omp_var_data
   /* Flag for OpenACC deviceptrs.  */
   GOVD_DEVICEPTR = (1<<21),
 
+  /* Flag for GOVD_MAP: (struct) vars that have pointer attachments for
+     fields.  */
+  GOVD_MAP_HAS_ATTACHMENTS = (1<<22),
+
   GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE
 			   | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR
 			   | GOVD_LOCAL)
@@ -7692,7 +7696,13 @@ insert_struct_component_mapping (enum tree_code code, tree c, tree struct_node,
   OMP_CLAUSE_SET_MAP_KIND (c2, mkind);
   OMP_CLAUSE_DECL (c2) = unshare_expr (OMP_CLAUSE_DECL (c));
   OMP_CLAUSE_CHAIN (c2) = scp ? *scp : prev_node;
-  OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (ptr_type_node);
+  if (OMP_CLAUSE_CHAIN (prev_node) != c
+      && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (prev_node)) == OMP_CLAUSE_MAP
+      && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (prev_node))
+	  == GOMP_MAP_TO_PSET))
+    OMP_CLAUSE_SIZE (c2) = OMP_CLAUSE_SIZE (OMP_CLAUSE_CHAIN (prev_node));
+  else
+    OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (ptr_type_node);
   if (struct_node)
     OMP_CLAUSE_CHAIN (struct_node) = c2;
 
@@ -8245,7 +8255,9 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		  remove = true;
 		  break;
 		}
-	      if (DECL_P (decl))
+	      if (DECL_P (decl)
+		  && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_TO_PSET
+		  && code != OACC_UPDATE)
 		{
 		  if (error_operand_p (decl))
 		    {
@@ -8297,17 +8309,36 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		    = splay_tree_lookup (ctx->variables, (splay_tree_key)decl);
 		  bool ptr = (OMP_CLAUSE_MAP_KIND (c)
 			      == GOMP_MAP_ALWAYS_POINTER);
-		  if ((n == NULL || (n->value & GOVD_MAP) == 0)
-		      && code != OACC_UPDATE)
+		  bool attach = OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH
+			        || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH;
+		  bool has_attachments = false;
+		  /* For OpenACC, pointers in structs should trigger an
+		     attach action.  */
+		  if (ptr && (region_type & ORT_ACC) != 0)
+		    {
+		      /* Turning a GOMP_MAP_ALWAYS_POINTER clause into a
+			 GOMP_MAP_ATTACH clause after we have detected a case
+			 that needs a GOMP_MAP_STRUCT mapping adding.  */
+		      OMP_CLAUSE_SET_MAP_KIND (c,
+			(code == OACC_EXIT_DATA) ? GOMP_MAP_DETACH
+						 : GOMP_MAP_ATTACH);
+		      has_attachments = true;
+		    }
+		  if (n == NULL || (n->value & GOVD_MAP) == 0)
 		    {
 		      tree l = build_omp_clause (OMP_CLAUSE_LOCATION (c),
 						 OMP_CLAUSE_MAP);
-		      OMP_CLAUSE_SET_MAP_KIND (l, GOMP_MAP_STRUCT);
+		      OMP_CLAUSE_SET_MAP_KIND (l, attach
+			? GOMP_MAP_FORCE_PRESENT : GOMP_MAP_STRUCT);
 		      if (!base_eq_orig_base)
 			OMP_CLAUSE_DECL (l) = unshare_expr (orig_base);
 		      else
 			OMP_CLAUSE_DECL (l) = decl;
-		      OMP_CLAUSE_SIZE (l) = size_int (1);
+		      OMP_CLAUSE_SIZE (l) = attach
+			? (DECL_P (OMP_CLAUSE_DECL (l))
+			     ? DECL_SIZE_UNIT (OMP_CLAUSE_DECL (l))
+			     : TYPE_SIZE_UNIT (TREE_TYPE (OMP_CLAUSE_DECL (l))))
+			: size_int (1);
 		      if (struct_map_to_clause == NULL)
 			struct_map_to_clause = new hash_map<tree, tree>;
 		      struct_map_to_clause->put (decl, l);
@@ -8339,9 +8370,11 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		      flags = GOVD_MAP | GOVD_EXPLICIT;
 		      if (GOMP_MAP_ALWAYS_P (OMP_CLAUSE_MAP_KIND (c)) || ptr)
 			flags |= GOVD_SEEN;
+		      if (has_attachments)
+			flags |= GOVD_MAP_HAS_ATTACHMENTS;
 		      goto do_add_decl;
 		    }
-		  else
+		  else if (struct_map_to_clause)
 		    {
 		      tree *osc = struct_map_to_clause->get (decl);
 		      tree *sc = NULL, *scp = NULL;
@@ -8350,8 +8383,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		      sc = &OMP_CLAUSE_CHAIN (*osc);
 		      if (*sc != c
 			  && (OMP_CLAUSE_MAP_KIND (*sc)
-			      == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) 
+			      == GOMP_MAP_FIRSTPRIVATE_REFERENCE))
 			sc = &OMP_CLAUSE_CHAIN (*sc);
+		      /* Here "prev_list_p" is the end of the inserted
+			 alloc/release nodes after the struct node, OSC.  */
 		      for (; *sc != c; sc = &OMP_CLAUSE_CHAIN (*sc))
 			if (ptr && sc == prev_list_p)
 			  break;
@@ -8410,9 +8445,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 			  }
 		      if (remove)
 			break;
-		      OMP_CLAUSE_SIZE (*osc)
-			= size_binop (PLUS_EXPR, OMP_CLAUSE_SIZE (*osc),
-				      size_one_node);
+		      if (!attach)
+			OMP_CLAUSE_SIZE (*osc)
+			  = size_binop (PLUS_EXPR, OMP_CLAUSE_SIZE (*osc),
+					size_one_node);
 		      if (ptr)
 			{
 			  tree cl
@@ -8444,11 +8480,15 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		}
 	      if (!remove
 		  && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ALWAYS_POINTER
+		  && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_TO_PSET
 		  && OMP_CLAUSE_CHAIN (c)
 		  && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (c)) == OMP_CLAUSE_MAP
-		  && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c))
-		      == GOMP_MAP_ALWAYS_POINTER))
+		  && ((OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c))
+		       == GOMP_MAP_ALWAYS_POINTER)
+		      || (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c))
+		          == GOMP_MAP_TO_PSET)))
 		prev_list_p = list_p;
+
 	      break;
 	    }
 	  flags = GOVD_MAP | GOVD_EXPLICIT;
@@ -9020,6 +9060,8 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data)
     return 0;
   if ((flags & GOVD_SEEN) == 0)
     return 0;
+  if ((flags & GOVD_MAP_HAS_ATTACHMENTS) != 0)
+    return 0;
   if (flags & GOVD_DEBUG_PRIVATE)
     {
       gcc_assert ((flags & GOVD_DATA_SHARE_CLASS) == GOVD_SHARED);
@@ -9509,8 +9551,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 		}
 	    }
 	  else if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_STRUCT
-		   && (code == OMP_TARGET_EXIT_DATA
-		       || code == OACC_EXIT_DATA))
+		   && code == OMP_TARGET_EXIT_DATA)
 	    remove = true;
 	  else if (DECL_SIZE (decl)
 		   && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST
@@ -11218,10 +11259,15 @@ gimplify_omp_target_update (tree *expr_p, gimple_seq *pre_p)
 	      OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_DETACH);
 	      finalize_marked = true;
 	      break;
+	    case GOMP_MAP_STRUCT:
+	    case GOMP_MAP_FORCE_PRESENT:
+	      /* Skip over an initial struct or force_present mapping.  */
+	      break;
 	    default:
-	      /* Check consistency: libgomp relies on the very first data
-		 mapping clause being marked, so make sure we did that before
-		 any other mapping clauses.  */
+	      /* Check consistency: libgomp relies on the very first
+		 non-struct, non-force-present data mapping clause being
+		 marked, so make sure we did that before any other mapping
+		 clauses.  */
 	      gcc_assert (finalize_marked);
 	      break;
 	    }
diff --git a/gcc/testsuite/c-c++-common/goacc/mdc-1.c b/gcc/testsuite/c-c++-common/goacc/mdc-1.c
index c20b94d..84a44af 100644
--- a/gcc/testsuite/c-c++-common/goacc/mdc-1.c
+++ b/gcc/testsuite/c-c++-common/goacc/mdc-1.c
@@ -42,13 +42,13 @@ t1 ()
 }
 
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.to:s .len: 32.." 1 "omplower" } } */
-/* { dg-final { scan-tree-dump-times "pragma omp target oacc_data map.tofrom:.z .len: 40.. map.struct:s .len: 1.. map.alloc:s.a .len: 8.. map.tofrom:._1 .len: 40.. map.always_pointer:s.a .pointer assign, bias: 0.." 1 "omplower" } } */
-/* { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel map.struct:s .len: 1.. map.attach:s.e .len: 8.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "pragma omp target oacc_data map.tofrom:.z .len: 40.. map.struct:s .len: 1.. map.alloc:s.a .len: 8.. map.tofrom:._1 .len: 40.. map.attach:s.a .len: 0.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "pragma omp target oacc_parallel map.force_present:s .len: 32.. map.attach:s.e .len: 8.." 1 "omplower" } } */
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.attach:a .len: 8.." 1 "omplower" } } */
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.detach:a .len: 8.." 1 "omplower" } } */
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.to:a .len: 8.." 1 "omplower" } } */
-/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.detach:s.e .len: 8.." 1 "omplower" } } */
-/* { dg-final { scan-tree-dump-times "pragma omp target oacc_data map.struct:s .len: 1.. map.attach:s.e .len: 8.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.force_present:s .len: 32.. map.detach:s.e .len: 8.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "pragma omp target oacc_data map.force_present:s .len: 32.. map.attach:s.e .len: 8.." 1 "omplower" } } */
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data map.release:a .len: 8.." 1 "omplower" } } */
 /* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data finalize map.force_detach:a .len: 8.." 1 "omplower" } } */
-/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data finalize map.force_detach:s.a .len: 8.." 1 "omplower" } } */
+/* { dg-final { scan-tree-dump-times "pragma omp target oacc_enter_exit_data finalize map.force_present:s .len: 32.. map.force_detach:s.a .len: 8.." 1 "omplower" } } */
diff --git a/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95 b/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95
index b94214e..1a4a671 100644
--- a/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/data-clauses.f95
@@ -39,9 +39,9 @@ contains
   !$acc end data
 
 
-  !$acc parallel copy (tip) ! { dg-error "POINTER" }
+  !$acc parallel copy (tip)
   !$acc end parallel
-  !$acc parallel copy (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc parallel copy (tia)
   !$acc end parallel
   !$acc parallel deviceptr (i) copy (i) ! { dg-error "multiple clauses" }
   !$acc end parallel
@@ -54,9 +54,9 @@ contains
   !$acc end data
 
 
-  !$acc parallel copyin (tip) ! { dg-error "POINTER" }
+  !$acc parallel copyin (tip)
   !$acc end parallel
-  !$acc parallel copyin (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc parallel copyin (tia)
   !$acc end parallel
   !$acc parallel deviceptr (i) copyin (i) ! { dg-error "multiple clauses" }
   !$acc end parallel
@@ -71,9 +71,9 @@ contains
   !$acc end data
 
 
-  !$acc parallel copyout (tip) ! { dg-error "POINTER" }
+  !$acc parallel copyout (tip)
   !$acc end parallel
-  !$acc parallel copyout (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc parallel copyout (tia)
   !$acc end parallel
   !$acc parallel deviceptr (i) copyout (i) ! { dg-error "multiple clauses" }
   !$acc end parallel
@@ -90,9 +90,9 @@ contains
   !$acc end data
 
 
-  !$acc parallel create (tip) ! { dg-error "POINTER" }
+  !$acc parallel create (tip)
   !$acc end parallel
-  !$acc parallel create (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc parallel create (tia)
   !$acc end parallel
   !$acc parallel deviceptr (i) create (i) ! { dg-error "multiple clauses" }
   !$acc end parallel
@@ -111,9 +111,9 @@ contains
   !$acc end data
 
 
-  !$acc parallel present (tip) ! { dg-error "POINTER" }
+  !$acc parallel present (tip)
   !$acc end parallel
-  !$acc parallel present (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc parallel present (tia)
   !$acc end parallel
   !$acc parallel deviceptr (i) present (i) ! { dg-error "multiple clauses" }
   !$acc end parallel
@@ -144,9 +144,9 @@ contains
   !$acc end parallel
 
 
-  !$acc parallel present_or_copy (tip) ! { dg-error "POINTER" }
+  !$acc parallel present_or_copy (tip)
   !$acc end parallel
-  !$acc parallel present_or_copy (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc parallel present_or_copy (tia)
   !$acc end parallel
   !$acc parallel deviceptr (i) present_or_copy (i) ! { dg-error "multiple clauses" }
   !$acc end parallel
@@ -169,9 +169,9 @@ contains
   !$acc end data
 
 
-  !$acc parallel present_or_copyin (tip) ! { dg-error "POINTER" }
+  !$acc parallel present_or_copyin (tip)
   !$acc end parallel
-  !$acc parallel present_or_copyin (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc parallel present_or_copyin (tia)
   !$acc end parallel
   !$acc parallel deviceptr (i) present_or_copyin (i) ! { dg-error "multiple clauses" }
   !$acc end parallel
@@ -196,9 +196,9 @@ contains
   !$acc end data
 
 
-  !$acc parallel present_or_copyout (tip) ! { dg-error "POINTER" }
+  !$acc parallel present_or_copyout (tip)
   !$acc end parallel
-  !$acc parallel present_or_copyout (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc parallel present_or_copyout (tia)
   !$acc end parallel
   !$acc parallel deviceptr (i) present_or_copyout (i) ! { dg-error "multiple clauses" }
   !$acc end parallel
@@ -225,9 +225,9 @@ contains
   !$acc end data
 
 
-  !$acc parallel present_or_create (tip) ! { dg-error "POINTER" }
+  !$acc parallel present_or_create (tip)
   !$acc end parallel
-  !$acc parallel present_or_create (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc parallel present_or_create (tia)
   !$acc end parallel
   !$acc parallel deviceptr (i) present_or_create (i) ! { dg-error "multiple clauses" }
   !$acc end parallel
@@ -256,4 +256,4 @@ contains
   !$acc end data
 
   end subroutine foo
-end module test
\ No newline at end of file
+end module test
diff --git a/gcc/testsuite/gfortran.dg/goacc/derived-types.f90 b/gcc/testsuite/gfortran.dg/goacc/derived-types.f90
index 11d055a..5fb2981 100644
--- a/gcc/testsuite/gfortran.dg/goacc/derived-types.f90
+++ b/gcc/testsuite/gfortran.dg/goacc/derived-types.f90
@@ -33,48 +33,45 @@ program derived_acc
   !$acc exit data copyout(var)
   !$acc exit data copyout(var%a)
 
-  !$acc data copy(var%a) ! { dg-error "Syntax error in OpenMP" }
-  !$acc end data ! { dg-error "Unexpected ..ACC END DATA" }
-  
   !$acc data copy(var)
   !$acc end data
 
-  !$acc data copyout(var%a) ! { dg-error "Syntax error in OpenMP" }
-  !$acc end data ! { dg-error "Unexpected ..ACC END" }
+  !$acc data copyout(var%a)
+  !$acc end data
 
   !$acc parallel loop pcopyout(var)
   do i = 1, 10
   end do  
   !$acc end parallel loop
 
-  !$acc parallel loop copyout(var%a) ! { dg-error "Syntax error in OpenMP" }
+  !$acc parallel loop copyout(var%a)
   do i = 1, 10
   end do
-  !$acc end parallel loop ! { dg-error "Unexpected ..ACC END" }
+  !$acc end parallel loop
 
   !$acc parallel pcopy(var)
   !$acc end parallel
 
-  !$acc parallel pcopy(var%a) ! { dg-error "Syntax error in OpenMP" }
+  !$acc parallel pcopy(var%a)
   do i = 1, 10
   end do
-  !$acc end parallel ! { dg-error "Unexpected ..ACC END" }
+  !$acc end parallel
   
   !$acc kernels pcopyin(var)
   !$acc end kernels
 
-  !$acc kernels pcopy(var%a) ! { dg-error "Syntax error in OpenMP" }
+  !$acc kernels pcopy(var%a)
   do i = 1, 10
   end do
-  !$acc end kernels ! { dg-error "Unexpected ..ACC END" }
+  !$acc end kernels
 
   !$acc kernels loop pcopyin(var)
   do i = 1, 10
   end do
   !$acc end kernels loop
 
-  !$acc kernels loop pcopy(var%a) ! { dg-error "Syntax error in OpenMP" }
+  !$acc kernels loop pcopy(var%a)
   do i = 1, 10
   end do
-  !$acc end kernels loop ! { dg-error "Unexpected ..ACC END" }
+  !$acc end kernels loop
 end program derived_acc
diff --git a/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95 b/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95
index 805459c..b616b39 100644
--- a/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/enter-exit-data.f95
@@ -44,14 +44,14 @@ contains
   !$acc enter data wait (i, 1) 
   !$acc enter data wait (a) ! { dg-error "INTEGER" }
   !$acc enter data wait (b(5:6)) ! { dg-error "INTEGER" }
-  !$acc enter data copyin (tip) ! { dg-error "POINTER" }
-  !$acc enter data copyin (tia) ! { dg-error "ALLOCATABLE" }
-  !$acc enter data create (tip) ! { dg-error "POINTER" }
-  !$acc enter data create (tia) ! { dg-error "ALLOCATABLE" }
-  !$acc enter data present_or_copyin (tip) ! { dg-error "POINTER" }
-  !$acc enter data present_or_copyin (tia) ! { dg-error "ALLOCATABLE" }
-  !$acc enter data present_or_create (tip) ! { dg-error "POINTER" }
-  !$acc enter data present_or_create (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc enter data copyin (tip)
+  !$acc enter data copyin (tia)
+  !$acc enter data create (tip)
+  !$acc enter data create (tia)
+  !$acc enter data present_or_copyin (tip)
+  !$acc enter data present_or_copyin (tia)
+  !$acc enter data present_or_create (tip)
+  !$acc enter data present_or_create (tia)
   !$acc enter data copyin (i) create (i) ! { dg-error "multiple clauses" }
   !$acc enter data copyin (i) present_or_copyin (i) ! { dg-error "multiple clauses" }
   !$acc enter data create (i) present_or_copyin (i) ! { dg-error "multiple clauses" }
@@ -79,10 +79,10 @@ contains
   !$acc exit data wait (i, 1) 
   !$acc exit data wait (a) ! { dg-error "INTEGER" }
   !$acc exit data wait (b(5:6)) ! { dg-error "INTEGER" }
-  !$acc exit data copyout (tip) ! { dg-error "POINTER" }
-  !$acc exit data copyout (tia) ! { dg-error "ALLOCATABLE" }
-  !$acc exit data delete (tip) ! { dg-error "POINTER" }
-  !$acc exit data delete (tia) ! { dg-error "ALLOCATABLE" }
+  !$acc exit data copyout (tip)
+  !$acc exit data copyout (tia)
+  !$acc exit data delete (tip)
+  !$acc exit data delete (tia)
   !$acc exit data copyout (i) delete (i) ! { dg-error "multiple clauses" }
   !$acc exit data finalize
   !$acc exit data finalize copyout (i)
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index acf7f8f..17fe0d3 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -806,6 +806,8 @@ struct target_var_desc {
   bool copy_from;
   /* True if data always should be copied from device to host at the end.  */
   bool always_copy_from;
+  /* True if variable should be detached at end of region.  */
+  bool do_detach;
   /* Relative offset against key host_start.  */
   uintptr_t offset;
   /* Actual length.  */
@@ -860,6 +862,8 @@ struct splay_tree_key_s {
   uintptr_t refcount;
   /* Dynamic reference count.  */
   uintptr_t dynamic_refcount;
+  /* For a block with attached pointers, the attachment counters for each.  */
+  unsigned short *attach_count;
   /* Pointer to the original mapping of "omp declare target link" object.  */
   splay_tree_key link_key;
 };
@@ -1003,6 +1007,8 @@ enum gomp_map_vars_kind
   GOMP_MAP_VARS_ENTER_DATA
 };
 
+struct gomp_coalesce_buf;
+
 extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int);
 extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
 extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
@@ -1013,8 +1019,17 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *,
 				void *, const void *, size_t,
 				struct gomp_coalesce_buf *);
 extern void gomp_copy_dev2host (struct gomp_device_descr *,
-				struct goacc_asyncqueue *,
-				void *, const void *, size_t);
+				struct goacc_asyncqueue *, void *, const void *,
+				size_t);
+extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t);
+extern void gomp_attach_pointer (struct gomp_device_descr *,
+				 struct goacc_asyncqueue *, splay_tree,
+				 splay_tree_key, uintptr_t, size_t,
+				 struct gomp_coalesce_buf *);
+extern void gomp_detach_pointer (struct gomp_device_descr *,
+				 struct goacc_asyncqueue *, splay_tree_key,
+				 uintptr_t, bool, struct gomp_coalesce_buf *);
+
 extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
 					      size_t, void **, void **,
 					      size_t *, void *, bool,
@@ -1025,9 +1040,9 @@ extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *,
 						    size_t *, void *, bool,
 						    enum gomp_map_vars_kind);
 extern void gomp_unmap_tgt (struct target_mem_desc *);
-extern void gomp_unmap_vars (struct target_mem_desc *, bool);
+extern void gomp_unmap_vars (struct target_mem_desc *, bool, bool);
 extern void gomp_unmap_vars_async (struct target_mem_desc *, bool,
-				   struct goacc_asyncqueue *);
+				   struct goacc_asyncqueue *, bool);
 extern void gomp_init_device (struct gomp_device_descr *);
 extern bool gomp_fini_device (struct gomp_device_descr *);
 extern void gomp_unload_device (struct gomp_device_descr *);
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 595b988..cc1ce2a 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -440,6 +440,16 @@ OACC_2.5 {
 	acc_update_self_async_array_h_;
 } OACC_2.0.1;
 
+OACC_2.6 {
+  global:
+	acc_attach;
+	acc_attach_async;
+	acc_detach;
+	acc_detach_async;
+	acc_detach_finalize;
+	acc_detach_finalize_async;
+} OACC_2.5;
+
 GOACC_2.0 {
   global:
 	GOACC_data_end;
diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c
index bb00279..6c12c82 100644
--- a/libgomp/oacc-async.c
+++ b/libgomp/oacc-async.c
@@ -373,14 +373,14 @@ goacc_async_unmap_tgt (void *ptr)
 
 attribute_hidden void
 goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt,
-				struct goacc_asyncqueue *aq)
+				struct goacc_asyncqueue *aq, bool finalize)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
 
   /* Increment reference to delay freeing of device memory until callback
      has triggered.  */
   tgt->refcount++;
-  gomp_unmap_vars_async (tgt, true, aq);
+  gomp_unmap_vars_async (tgt, true, aq, finalize);
   devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
 					      (void *) tgt);
 }
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 48c9646..e1938c5 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -391,7 +391,7 @@ acc_shutdown_1 (acc_device_t d)
 	    {
 	      struct target_mem_desc *tgt = walk->dev->mem_map.root->key.tgt;
 
-	      gomp_unmap_vars (tgt, false);
+	      gomp_unmap_vars (tgt, false, false);
 	    }
 
 	  walk->dev = NULL;
diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h
index 1f6c62c..878f0f4 100644
--- a/libgomp/oacc-int.h
+++ b/libgomp/oacc-int.h
@@ -112,7 +112,7 @@ void goacc_host_init (void);
 void goacc_init_asyncqueues (struct gomp_device_descr *);
 bool goacc_fini_asyncqueues (struct gomp_device_descr *);
 void goacc_async_copyout_unmap_vars (struct target_mem_desc *,
-				     struct goacc_asyncqueue *);
+				     struct goacc_asyncqueue *, bool);
 void goacc_async_free (struct gomp_device_descr *,
 		       struct goacc_asyncqueue *, void *);
 struct goacc_asyncqueue *get_goacc_asyncqueue (int);
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index e5ee956..76ba914 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -518,7 +518,7 @@ acc_unmap_data (void *h)
 
   gomp_mutex_unlock (&acc_dev->lock);
 
-  gomp_unmap_vars (t, true);
+  gomp_unmap_vars (t, true, false);
 
   if (profiling_setup_p)
     {
@@ -612,6 +612,7 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
 				 &kinds, true, GOMP_MAP_VARS_OPENACC);
       /* Initialize dynamic refcount.  */
       tgt->list[0].key->dynamic_refcount = 1;
+      tgt->list[0].key->attach_count = NULL;
 
       gomp_mutex_lock (&acc_dev->lock);
 
@@ -750,6 +751,7 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
     {
       n->refcount = 0;
       n->dynamic_refcount = 0;
+      n->attach_count = NULL;
     }
   if (n->refcount < n->dynamic_refcount)
     {
@@ -997,6 +999,7 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
 
   /* Initialize dynamic refcount.  */
   tgt->list[0].key->dynamic_refcount = 1;
+  tgt->list[0].key->attach_count = NULL;
 
   gomp_mutex_lock (&acc_dev->lock);
   tgt->prev = acc_dev->openacc.data_environ;
@@ -1084,11 +1087,11 @@ gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
 
       /* If running synchronously, unmap immediately.  */
       if (async < acc_async_noval)
-	gomp_unmap_vars (t, true);
+	gomp_unmap_vars (t, true, finalize);
       else
 	{
 	  goacc_aq aq = get_goacc_asyncqueue (async);        
-	  goacc_async_copyout_unmap_vars (t, aq);
+	  goacc_async_copyout_unmap_vars (t, aq, finalize);
 	}
     }
 
@@ -1096,3 +1099,80 @@ gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
 
   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
 }
+
+
+void
+acc_attach_async (void **hostaddr, int async)
+{
+  struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *acc_dev = thr->dev;
+  goacc_aq aq = get_goacc_asyncqueue (async);
+
+  struct splay_tree_key_s cur_node;
+  splay_tree_key n;
+
+  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return;
+
+  cur_node.host_start = (uintptr_t) hostaddr;
+  cur_node.host_end = cur_node.host_start + sizeof (void *);
+  n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+  if (n == NULL)
+    gomp_fatal ("struct not mapped for acc_attach");
+
+  gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
+		       0, NULL);
+}
+
+void
+acc_attach (void **hostaddr)
+{
+  acc_attach_async (hostaddr, acc_async_sync);
+}
+
+static void
+goacc_detach_internal (void **hostaddr, int async, bool finalize)
+{
+  struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *acc_dev = thr->dev;
+  struct splay_tree_key_s cur_node;
+  splay_tree_key n;
+  struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
+
+  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    return;
+
+  cur_node.host_start = (uintptr_t) hostaddr;
+  cur_node.host_end = cur_node.host_start + sizeof (void *);
+  n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+  if (n == NULL)
+    gomp_fatal ("struct not mapped for acc_detach");
+
+  gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
+}
+
+void
+acc_detach (void **hostaddr)
+{
+  goacc_detach_internal (hostaddr, acc_async_sync, false);
+}
+
+void
+acc_detach_async (void **hostaddr, int async)
+{
+  goacc_detach_internal (hostaddr, async, false);
+}
+
+void
+acc_detach_finalize (void **hostaddr)
+{
+  goacc_detach_internal (hostaddr, acc_async_sync, true);
+}
+
+void
+acc_detach_finalize_async (void **hostaddr, int async)
+{
+  goacc_detach_internal (hostaddr, async, true);
+}
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 15b1462..f6c9114 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -50,12 +50,29 @@ find_pointer (int pos, size_t mapnum, unsigned short *kinds)
   if (pos + 1 >= mapnum)
     return 0;
 
-  unsigned char kind = kinds[pos+1] & 0xff;
+  unsigned char kind0 = kinds[pos] & 0xff;
 
-  if (kind == GOMP_MAP_TO_PSET)
-    return 3;
-  else if (kind == GOMP_MAP_POINTER)
-    return 2;
+  switch (kind0)
+    {
+    case GOMP_MAP_TO:
+    case GOMP_MAP_FORCE_TO:
+    case GOMP_MAP_FROM:
+    case GOMP_MAP_FORCE_FROM:
+    case GOMP_MAP_ALLOC:
+    case GOMP_MAP_RELEASE:
+      {
+	unsigned char kind1 = kinds[pos + 1] & 0xff;
+	if (kind1 == GOMP_MAP_POINTER
+	    || kind1 == GOMP_MAP_ALWAYS_POINTER
+	    || kind1 == GOMP_MAP_ATTACH
+	    || kind1 == GOMP_MAP_DETACH)
+	  return 2;
+	else if (kind1 == GOMP_MAP_TO_PSET)
+	  return 3;
+      }
+    default:
+      /* empty.  */;
+    }
 
   return 0;
 }
@@ -355,14 +372,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
 
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
   for (i = 0; i < mapnum; i++)
-    {
-      if (tgt->list[i].key != NULL)
-	devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
-				+ tgt->list[i].key->tgt_offset
-				+ tgt->list[i].offset);
-      else
-	devaddrs[i] = NULL;
-    }
+    devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
 
   if (aq == NULL)
     {
@@ -382,7 +392,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
 				    &api_info);
 	}
       /* If running synchronously, unmap immediately.  */
-      gomp_unmap_vars (tgt, true);
+      gomp_unmap_vars (tgt, true, false);
       if (profiling_dispatch_p)
 	{
 	  prof_info.event_type = acc_ev_exit_data_end;
@@ -400,7 +410,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
       else
 	acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs,
 					  devaddrs, dims, tgt, aq);
-      goacc_async_copyout_unmap_vars (tgt, aq);
+      goacc_async_copyout_unmap_vars (tgt, aq, false);
     }
 
  out:
@@ -637,7 +647,7 @@ GOACC_data_end (void)
 
   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
   thr->mapped_data = tgt->prev;
-  gomp_unmap_vars (tgt, true);
+  gomp_unmap_vars (tgt, true, false);
   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
 
   if (profiling_dispatch_p)
@@ -668,6 +678,10 @@ GOACC_enter_exit_data (int device, size_t mapnum,
   if (mapnum > 0)
     {
       unsigned char kind = kinds[0] & 0xff;
+
+      if (kind == GOMP_MAP_STRUCT || kind == GOMP_MAP_FORCE_PRESENT)
+        kind = kinds[1] & 0xff;
+
       if (kind == GOMP_MAP_DELETE
 	  || kind == GOMP_MAP_FORCE_FROM)
 	finalize = true;
@@ -678,11 +692,14 @@ GOACC_enter_exit_data (int device, size_t mapnum,
     {
       unsigned char kind = kinds[i] & 0xff;
 
-      if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
+      if (kind == GOMP_MAP_POINTER
+	  || kind == GOMP_MAP_TO_PSET
+	  || kind == GOMP_MAP_STRUCT
+	  || kind == GOMP_MAP_FORCE_PRESENT)
 	continue;
 
       if (kind == GOMP_MAP_FORCE_ALLOC
-	  || kind == GOMP_MAP_FORCE_PRESENT
+	  || kind == GOMP_MAP_ATTACH
 	  || kind == GOMP_MAP_FORCE_TO
 	  || kind == GOMP_MAP_TO
 	  || kind == GOMP_MAP_ALLOC
@@ -694,6 +711,8 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 
       if (kind == GOMP_MAP_RELEASE
 	  || kind == GOMP_MAP_DELETE
+	  || kind == GOMP_MAP_DETACH
+	  || kind == GOMP_MAP_FORCE_DETACH
 	  || kind == GOMP_MAP_FROM
 	  || kind == GOMP_MAP_FORCE_FROM
 	  || kind == GOMP_MAP_DECLARE_DEALLOCATE)
@@ -809,6 +828,9 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 		case GOMP_MAP_ALLOC:
 		  acc_present_or_create (hostaddrs[i], sizes[i]);
 		  break;
+		case GOMP_MAP_ATTACH:
+		case GOMP_MAP_FORCE_PRESENT:
+		  break;
 		case GOMP_MAP_FORCE_ALLOC:
 		  acc_create (hostaddrs[i], sizes[i]);
 		  break;
@@ -818,6 +840,27 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 		case GOMP_MAP_FORCE_TO:
 		  acc_copyin (hostaddrs[i], sizes[i]);
 		  break;
+		case GOMP_MAP_STRUCT:
+		  {
+		    int elems = sizes[i];
+		    struct splay_tree_key_s k;
+		    splay_tree_key str;
+		    k.host_start = (uintptr_t) hostaddrs[i];
+		    k.host_end = k.host_start + 1;
+		    gomp_mutex_lock (&acc_dev->lock);
+		    str = splay_tree_lookup (&acc_dev->mem_map, &k);
+		    gomp_mutex_unlock (&acc_dev->lock);
+		    /* We increment the dynamic reference count for the struct
+		       itself by the number of struct elements that we
+		       mapped.  */
+		    if (str->refcount != REFCOUNT_INFINITY)
+		      {
+		        str->refcount += elems;
+			str->dynamic_refcount += elems;
+		      }
+		    i += elems;
+		  }
+		  break;
 		default:
 		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
 			      kind);
@@ -839,16 +882,57 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 	      i += pointer - 1;
 	    }
 	}
+
+      /* This loop only handles explicit "attach" clauses that are not an
+	 implicit part of a copy{,in,out}, etc. mapping.  */
+      for (i = 0; i < mapnum; i++)
+        {
+	  unsigned char kind = kinds[i] & 0xff;
+
+	  /* Scan for pointers and PSETs.  */
+	  int pointer = find_pointer (i, mapnum, kinds);
+
+	  if (!pointer)
+	    {
+	      if (kind == GOMP_MAP_ATTACH)
+		acc_attach (hostaddrs[i]);
+	      else if (kind == GOMP_MAP_STRUCT)
+	        i += sizes[i];
+	    }
+	  else
+	    i += pointer - 1;
+	}
     }
   else
-    for (i = 0; i < mapnum; ++i)
-      {
-	unsigned char kind = kinds[i] & 0xff;
+    {
+      /* This loop only handles explicit "detach" clauses that are not an
+	 implicit part of a copy{,in,out}, etc. mapping.  */
+      for (i = 0; i < mapnum; i++)
+        {
+	  unsigned char kind = kinds[i] & 0xff;
 
-	int pointer = find_pointer (i, mapnum, kinds);
+	  int pointer = find_pointer (i, mapnum, kinds);
 
-	if (!pointer)
-	  {
+	  if (!pointer)
+	    {
+	      if (kind == GOMP_MAP_DETACH)
+		acc_detach (hostaddrs[i]);
+	      else if (kind == GOMP_MAP_FORCE_DETACH)
+		acc_detach_finalize (hostaddrs[i]);
+	      else if (kind == GOMP_MAP_STRUCT)
+	        i += sizes[i];
+	    }
+	  else
+	    i += pointer - 1;
+	}
+
+      for (i = 0; i < mapnum; ++i)
+	{
+	  unsigned char kind = kinds[i] & 0xff;
+
+	  int pointer = find_pointer (i, mapnum, kinds);
+
+	  if (!pointer)
 	    switch (kind)
 	      {
 	      case GOMP_MAP_RELEASE:
@@ -861,6 +945,10 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 		      acc_delete_async (hostaddrs[i], sizes[i], async);
 		  }
 		break;
+	      case GOMP_MAP_DETACH:
+	      case GOMP_MAP_FORCE_DETACH:
+	      case GOMP_MAP_FORCE_PRESENT:
+		break;
 	      case GOMP_MAP_DECLARE_DEALLOCATE:
 	      case GOMP_MAP_FROM:
 	      case GOMP_MAP_FORCE_FROM:
@@ -869,28 +957,48 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 		else
 		  acc_copyout_async (hostaddrs[i], sizes[i], async);
 		break;
+	      case GOMP_MAP_STRUCT:
+		{
+		  int elems = sizes[i];
+		  struct splay_tree_key_s k;
+		  splay_tree_key str;
+		  k.host_start = (uintptr_t) hostaddrs[i];
+		  k.host_end = k.host_start + 1;
+		  gomp_mutex_lock (&acc_dev->lock);
+		  str = splay_tree_lookup (&acc_dev->mem_map, &k);
+		  gomp_mutex_unlock (&acc_dev->lock);
+		  /* Decrement dynamic reference count for the struct by the
+		     number of elements that we are unmapping.  */
+		  if (str->dynamic_refcount >= elems)
+		    {
+		      str->dynamic_refcount -= elems;
+		      str->refcount -= elems;
+		    }
+		  i += elems;
+		}
+		break;
 	      default:
 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
 			    kind);
 		break;
 	      }
-	  }
-	else
-	  {
-	    if (kind == GOMP_MAP_DECLARE_DEALLOCATE)
-	      gomp_acc_declare_allocate (false, pointer, &hostaddrs[i],
-					 &sizes[i], &kinds[i]);
-	    else
-	      {
-		bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
-				 || kind == GOMP_MAP_FROM);
-		gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
-					 finalize, pointer);
-		/* See the above comment.  */
-	      }
-	    i += pointer - 1;
-	  }
-      }
+	  else
+	    {
+	      if (kind == GOMP_MAP_DECLARE_DEALLOCATE)
+		gomp_acc_declare_allocate (false, pointer, &hostaddrs[i],
+					   &sizes[i], &kinds[i]);
+	      else
+		{
+		  bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
+				   || kind == GOMP_MAP_FROM);
+		  gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom,
+					   async, finalize, pointer);
+		  /* See the above comment.  */
+		}
+	      i += pointer - 1;
+	    }
+	}
+    }
 
  out:
   if (profiling_dispatch_p)
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index 261636c..41dd514 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -113,6 +113,10 @@ void *acc_hostptr (void *) __GOACC_NOTHROW;
 int acc_is_present (void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
+void acc_attach (void **) __GOACC_NOTHROW;
+void acc_attach_async (void **, int) __GOACC_NOTHROW;
+void acc_detach (void **) __GOACC_NOTHROW;
+void acc_detach_async (void **, int) __GOACC_NOTHROW;
 
 /* Async functions, specified in OpenACC 2.5.  */
 void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
@@ -129,6 +133,8 @@ void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
 void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
 void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_detach_finalize (void **) __GOACC_NOTHROW;
+void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
 
 /* CUDA-specific routines.  */
 void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
diff --git a/libgomp/target.c b/libgomp/target.c
index 7220ac6..d9d42eb 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -39,6 +39,7 @@
 #include <string.h>
 #include <assert.h>
 #include <errno.h>
+#include <limits.h>
 
 #ifdef PLUGIN_SUPPORT
 #include <dlfcn.h>
@@ -373,6 +374,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
   tgt_var->key = oldn;
   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
+  tgt_var->do_detach = false;
   tgt_var->offset = newn->host_start - oldn->host_start;
   tgt_var->length = newn->host_end - newn->host_start;
 
@@ -539,7 +541,128 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
 	      (void *) cur_node.host_end);
 }
 
-static inline uintptr_t
+void
+gomp_attach_pointer (struct gomp_device_descr *devicep,
+		     struct goacc_asyncqueue *aq, splay_tree mem_map,
+		     splay_tree_key n, uintptr_t attach_to, size_t bias,
+		     struct gomp_coalesce_buf *cbufp)
+{
+  struct splay_tree_key_s s;
+  size_t size, idx;
+
+  if (n == NULL)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("enclosing struct not mapped for attach");
+    }
+
+  size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
+  /* We might have a pointer in a packed struct: however we cannot have more
+     than one such pointer in each pointer-sized portion of the struct, so
+     this is safe.  */
+  idx = (attach_to - n->host_start) / sizeof (void *);
+
+  if (!n->attach_count)
+    n->attach_count = gomp_malloc_cleared (sizeof (*n->attach_count) * size);
+
+  if (n->attach_count[idx] < USHRT_MAX)
+    n->attach_count[idx]++;
+  else
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("attach count overflow");
+    }
+
+  if (n->attach_count[idx] == 1)
+    {
+      uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
+			 - n->host_start;
+      uintptr_t target = (uintptr_t) *(void **) attach_to;
+      splay_tree_key tn;
+      uintptr_t data;
+
+      if ((void *) target == NULL)
+	{
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("attempt to attach null pointer");
+	}
+
+      s.host_start = target + bias;
+      s.host_end = s.host_start + 1;
+      tn = splay_tree_lookup (mem_map, &s);
+
+      if (!tn)
+	{
+	  gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("pointer target not mapped for attach");
+	}
+
+      data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
+
+      gomp_debug (1,
+		  "%s: attaching host %p, target %p (struct base %p) to %p\n",
+		  __FUNCTION__, (void *) attach_to, (void *) devptr,
+		  (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
+
+      gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
+			  sizeof (void *), cbufp);
+    }
+  else
+    gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
+		(void *) attach_to, n->attach_count[idx]);
+}
+
+void
+gomp_detach_pointer (struct gomp_device_descr *devicep,
+		     struct goacc_asyncqueue *aq, splay_tree_key n,
+		     uintptr_t detach_from, bool finalize,
+		     struct gomp_coalesce_buf *cbufp)
+{
+  size_t idx;
+
+  if (n == NULL)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("enclosing struct not mapped for detach");
+    }
+
+  idx = (detach_from - n->host_start) / sizeof (void *);
+
+  if (!n->attach_count)
+    gomp_fatal ("no attachment counters for struct");
+
+  if (finalize)
+    n->attach_count[idx] = 1;
+
+  if (n->attach_count[idx] == 0)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("attach count underflow");
+    }
+  else
+    n->attach_count[idx]--;
+
+  if (n->attach_count[idx] == 0)
+    {
+      uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
+			 - n->host_start;
+      uintptr_t target = (uintptr_t) *(void **) detach_from;
+
+      gomp_debug (1,
+		  "%s: detaching host %p, target %p (struct base %p) to %p\n",
+		  __FUNCTION__, (void *) detach_from, (void *) devptr,
+		  (void *) (n->tgt->tgt_start + n->tgt_offset),
+		  (void *) target);
+
+      gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
+			  sizeof (void *), cbufp);
+    }
+  else
+    gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
+		(void *) detach_from, n->attach_count[idx]);
+}
+
+uintptr_t
 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
 {
   if (tgt->list[i].key != NULL)
@@ -883,7 +1006,12 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 	  da->map_index = i;
 	  continue;
 	}
-
+      else if ((kind & typemask) == GOMP_MAP_ATTACH)
+	{
+	  tgt->list[i].key = NULL;
+	  has_firstprivate = true;
+	  continue;
+	}
       cur_node.host_start = (uintptr_t) hostaddrs[i];
       if (!GOMP_MAP_POINTER_P (kind & typemask))
 	cur_node.host_end = cur_node.host_start + sizes[i];
@@ -1141,6 +1269,30 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
 				      + cur_node.host_start - n->host_start;
 		continue;
+	      case GOMP_MAP_ATTACH:
+		{
+		  cur_node.host_start = (uintptr_t) hostaddrs[i];
+		  cur_node.host_end = cur_node.host_start + sizeof (void *);
+		  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
+		  if (n != NULL)
+		    {
+		      tgt->list[i].key = n;
+		      tgt->list[i].offset = cur_node.host_start - n->host_start;
+		      tgt->list[i].length = n->host_end - n->host_start;
+		      tgt->list[i].copy_from = false;
+		      tgt->list[i].always_copy_from = false;
+		      tgt->list[i].do_detach = true;
+		    }
+		  else
+		    {
+		      gomp_mutex_unlock (&devicep->lock);
+		      gomp_fatal ("outer struct not mapped for attach");
+		    }
+		  gomp_attach_pointer (devicep, aq, mem_map, n,
+				       (uintptr_t) hostaddrs[i], sizes[i],
+				       cbufp);
+		  continue;
+		}
 	      default:
 		break;
 	      }
@@ -1194,10 +1346,12 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
 		tgt->list[i].always_copy_from
 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
+		tgt->list[i].do_detach = false;
 		tgt->list[i].offset = 0;
 		tgt->list[i].length = k->host_end - k->host_start;
 		k->refcount = 1;
 		k->dynamic_refcount = 0;
+		k->attach_count = NULL;
 		tgt->refcount++;
 		array->left = NULL;
 		array->right = NULL;
@@ -1482,6 +1636,8 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
       is_tgt_unmapped = true;
       gomp_unmap_tgt (k->tgt);
     }
+  if (k->attach_count)
+    free (k->attach_count);
   return is_tgt_unmapped;
 }
 
@@ -1490,14 +1646,14 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
    has been done already.  */
 
 attribute_hidden void
-gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
+gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, bool finalize)
 {
-  gomp_unmap_vars_async (tgt, do_copyfrom, NULL);
+  gomp_unmap_vars_async (tgt, do_copyfrom, NULL, finalize);
 }
 
 attribute_hidden void
 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
-		       struct goacc_asyncqueue *aq)
+		       struct goacc_asyncqueue *aq, bool finalize)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
 
@@ -1517,10 +1673,23 @@ gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
     }
 
   size_t i;
+
+  /* We must perform detachments before any copies back to the host.  */
   for (i = 0; i < tgt->list_count; i++)
     {
       splay_tree_key k = tgt->list[i].key;
-      if (k == NULL)
+
+      if (k != NULL && tgt->list[i].do_detach)
+	gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
+					     + tgt->list[i].offset, finalize,
+			     NULL);
+    }
+
+  for (i = 0; i < tgt->list_count; i++)
+    {
+      splay_tree_key k = tgt->list[i].key;
+
+      if (k == NULL || tgt->list[i].do_detach)
 	continue;
 
       bool do_unmap = false;
@@ -2139,7 +2308,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
 		     GOMP_MAP_VARS_TARGET);
   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
 		     NULL);
-  gomp_unmap_vars (tgt_vars, true);
+  gomp_unmap_vars (tgt_vars, true, false);
 }
 
 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
@@ -2283,7 +2452,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
 		     args);
   if (tgt_vars)
-    gomp_unmap_vars (tgt_vars, true);
+    gomp_unmap_vars (tgt_vars, true, false);
 }
 
 /* Host fallback for GOMP_target_data{,_ext} routines.  */
@@ -2352,7 +2521,7 @@ GOMP_target_end_data (void)
     {
       struct target_mem_desc *tgt = icv->target_data;
       icv->target_data = tgt->prev;
-      gomp_unmap_vars (tgt, true);
+      gomp_unmap_vars (tgt, true, false);
     }
 }
 
@@ -2587,7 +2756,7 @@ gomp_target_task_fn (void *data)
       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
 	{
 	  if (ttask->tgt)
-	    gomp_unmap_vars (ttask->tgt, true);
+	    gomp_unmap_vars (ttask->tgt, true, false);
 	  return false;
 	}
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c
new file mode 100644
index 0000000..d8d7067
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c
@@ -0,0 +1,24 @@
+#include <stdlib.h>
+#include <assert.h>
+
+struct dc
+{
+  int a;
+  int *b;
+};
+
+int
+main ()
+{
+  int n = 100, i;
+  struct dc v = { .a = 3, .b = (int *) malloc (sizeof (int) * n) };
+
+#pragma acc parallel loop copy(v.a, v.b[:n])
+  for (i = 0; i < n; i++)
+    v.b[i] = v.a;
+
+  for (i = 0; i < 10; i++)
+    assert (v.b[i] == v.a);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c
new file mode 100644
index 0000000..7e26e9a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c
@@ -0,0 +1,29 @@
+#include <assert.h>
+#include <stdlib.h>
+
+int
+main(int argc, char* argv[])
+{
+  struct foo {
+    int *a, *b, c, d, *e;
+  } s;
+
+  s.a = (int *) malloc (16 * sizeof (int));
+  s.b = (int *) malloc (16 * sizeof (int));
+  s.e = (int *) malloc (16 * sizeof (int));
+
+  #pragma acc data copy(s)
+  {
+    #pragma acc data copy(s.a[0:10])
+    {
+      #pragma acc parallel loop attach(s.a)
+      for (int i = 0; i < 10; i++)
+	s.a[i] = i;
+    }
+  }
+
+  for (int i = 0; i < 10; i++)
+    assert (s.a[i] == i);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
new file mode 100644
index 0000000..cec764b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
@@ -0,0 +1,34 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main ()
+{
+  int n = 100, i;
+  int *a = (int *) malloc (sizeof (int) * n);
+  int *b;
+
+  for (i = 0; i < n; i++)
+    a[i] = i+1;
+
+#pragma acc enter data copyin(a[:n]) create(b)
+
+  b = a;
+  acc_attach ((void **)&b);
+
+#pragma acc parallel loop present (b[:n])
+  for (i = 0; i < n; i++)
+    b[i] = i+1;
+
+  acc_detach ((void **)&b);
+
+#pragma acc exit data copyout(a[:n], b)
+
+  for (i = 0; i < 10; i++)
+    assert (a[i] == b[i]);
+
+  free (a);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c
new file mode 100644
index 0000000..8874ca0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c
@@ -0,0 +1,87 @@
+#include <assert.h>
+#include <stdlib.h>
+
+#define LIST_LENGTH 10
+
+struct node
+{
+  struct node *next;
+  int val;
+};
+
+int
+sum_nodes (struct node *head)
+{
+  int i = 0, sum = 0;
+
+#pragma acc parallel reduction(+:sum) present(head[:1])
+  {
+    for (; head != NULL; head = head->next)
+      sum += head->val;
+  }
+
+  return sum;
+}
+
+void
+insert (struct node *head, int val)
+{
+  struct node *n = (struct node *) malloc (sizeof (struct node));
+
+  if (head->next)
+    {
+#pragma acc exit data detach(head->next)
+    }
+
+  n->val = val;
+  n->next = head->next;
+  head->next = n;
+
+#pragma acc enter data copyin(n[:1])
+#pragma acc enter data attach(head->next)
+  if (n->next)
+    {
+#pragma acc enter data attach(n->next)
+    }
+}
+
+void
+destroy (struct node *head)
+{
+  while (head->next != NULL)
+    {
+#pragma acc exit data detach(head->next)
+      struct node * n = head->next;
+      head->next = n->next;
+      if (n->next)
+	{
+#pragma acc exit data detach(n->next)
+	}
+#pragma acc exit data delete (n[:1])
+      if (head->next)
+	{
+#pragma acc enter data attach(head->next)
+	}
+      free (n);
+    }
+}
+
+int
+main ()
+{
+  struct node list = { .next = NULL, .val = 0 };
+  int i;
+
+#pragma acc enter data copyin(list)
+
+  for (i = 0; i < LIST_LENGTH; i++)
+    insert (&list, i + 1);
+
+  assert (sum_nodes (&list) == (LIST_LENGTH * LIST_LENGTH + LIST_LENGTH) / 2);
+
+  destroy (&list);
+
+#pragma acc exit data delete(list)
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
new file mode 100644
index 0000000..89cafbb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
@@ -0,0 +1,81 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+struct node
+{
+  struct node *next;
+  int val;
+};
+
+int
+sum_nodes (struct node *head)
+{
+  int i = 0, sum = 0;
+
+#pragma acc parallel reduction(+:sum) present(head[:1])
+  {
+    for (; head != NULL; head = head->next)
+      sum += head->val;
+  }
+
+  return sum;
+}
+
+void
+insert (struct node *head, int val)
+{
+  struct node *n = (struct node *) malloc (sizeof (struct node));
+
+  if (head->next)
+    acc_detach ((void **) &head->next);
+
+  n->val = val;
+  n->next = head->next;
+  head->next = n;
+
+  acc_copyin (n, sizeof (struct node));
+  acc_attach((void **) &head->next);
+
+  if (n->next)
+    acc_attach ((void **) &n->next);
+}
+
+void
+destroy (struct node *head)
+{
+  while (head->next != NULL)
+    {
+      acc_detach ((void **) &head->next);
+      struct node * n = head->next;
+      head->next = n->next;
+      if (n->next)
+	acc_detach ((void **) &n->next);
+
+      acc_delete (n, sizeof (struct node));
+      if (head->next)
+	acc_attach((void **) &head->next);
+
+      free (n);
+    }
+}
+
+int
+main ()
+{
+  struct node list = { .next = NULL, .val = 0 };
+  int i;
+
+  acc_copyin (&list, sizeof (struct node));
+
+  for (i = 0; i < 10; i++)
+    insert (&list, 2);
+
+  assert (sum_nodes (&list) == 10 * 2);
+
+  destroy (&list);
+
+  acc_delete (&list, sizeof (struct node));
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-1.f90
new file mode 100644
index 0000000..c4cea11
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-1.f90
@@ -0,0 +1,35 @@
+! { dg-do run }
+
+! Test of attach/detach with "acc data".
+
+program dtype
+  implicit none
+  integer, parameter :: n = 512
+  type mytype
+    integer, allocatable :: a(:)
+  end type mytype
+  integer i
+
+  type(mytype) :: var
+
+  allocate(var%a(1:n))
+
+!$acc data copy(var)
+!$acc data copy(var%a)
+
+!$acc parallel loop
+  do i = 1,n
+    var%a(i) = i
+  end do
+!$acc end parallel loop
+
+!$acc end data
+!$acc end data
+
+  do i = 1,n
+    if (i .ne. var%a(i)) stop 1
+  end do
+
+  deallocate(var%a)
+
+end program dtype
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90
new file mode 100644
index 0000000..3593661
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90
@@ -0,0 +1,33 @@
+! { dg-do run }
+
+! Test of attach/detach with "acc data", two clauses at once.
+
+program dtype
+  implicit none
+  integer, parameter :: n = 512
+  type mytype
+    integer, allocatable :: a(:)
+  end type mytype
+  integer i
+
+  type(mytype) :: var
+
+  allocate(var%a(1:n))
+
+!$acc data copy(var) copy(var%a)
+
+!$acc parallel loop
+  do i = 1,n
+    var%a(i) = i
+  end do
+!$acc end parallel loop
+
+!$acc end data
+
+  do i = 1,n
+    if (i .ne. var%a(i)) stop 1
+  end do
+
+  deallocate(var%a)
+
+end program dtype
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90
new file mode 100644
index 0000000..667d944
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90
@@ -0,0 +1,34 @@
+! { dg-do run }
+
+! Test of attach/detach with "acc parallel".
+
+program dtype
+  implicit none
+  integer, parameter :: n = 512
+  type mytype
+    integer, allocatable :: a(:)
+    integer, allocatable :: b(:)
+  end type mytype
+  integer i
+
+  type(mytype) :: var
+
+  allocate(var%a(1:n))
+  allocate(var%b(1:n))
+
+!$acc parallel loop copy(var) copy(var%a(1:n)) copy(var%b(1:n))
+  do i = 1,n
+    var%a(i) = i
+    var%b(i) = i
+  end do
+!$acc end parallel loop
+
+  do i = 1,n
+    if (i .ne. var%a(i)) stop 1
+    if (i .ne. var%b(i)) stop 2
+  end do
+
+  deallocate(var%a)
+  deallocate(var%b)
+
+end program dtype
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-4.f90
new file mode 100644
index 0000000..6949e12
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-4.f90
@@ -0,0 +1,49 @@
+! { dg-do run }
+
+! Test of attach/detach with "acc enter/exit data".
+
+program dtype
+  implicit none
+  integer, parameter :: n = 512
+  type mytype
+    integer, allocatable :: a(:)
+    integer, allocatable :: b(:)
+  end type mytype
+  integer, allocatable :: r(:)
+  integer i
+
+  type(mytype) :: var
+
+  allocate(var%a(1:n))
+  allocate(var%b(1:n))
+  allocate(r(1:n))
+
+!$acc enter data copyin(var)
+
+!$acc enter data copyin(var%a, var%b, r)
+
+!$acc parallel loop
+  do i = 1,n
+    var%a(i) = i
+    var%b(i) = i * 2
+    r(i) = i * 3
+  end do
+!$acc end parallel loop
+
+!$acc exit data copyout(var%a)
+!$acc exit data copyout(var%b)
+!$acc exit data copyout(r)
+
+  do i = 1,n
+    if (i .ne. var%a(i)) stop 1
+    if (i * 2 .ne. var%b(i)) stop 2
+    if (i * 3 .ne. r(i)) stop 3
+  end do
+
+!$acc exit data delete(var)
+
+  deallocate(var%a)
+  deallocate(var%b)
+  deallocate(r)
+
+end program dtype
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-5.f90
new file mode 100644
index 0000000..6843cf1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-5.f90
@@ -0,0 +1,57 @@
+! { dg-do run }
+
+! Test of attach/detach, "enter data" inside "data", and subarray.
+
+program dtype
+  implicit none
+  integer, parameter :: n = 512
+  type mytype
+    integer, allocatable :: a(:)
+    integer, allocatable :: b(:)
+  end type mytype
+  integer i
+
+  type(mytype) :: var
+
+  allocate(var%a(1:n))
+  allocate(var%b(1:n))
+
+!$acc data copy(var)
+
+  do i = 1, n
+    var%a(i) = 0
+    var%b(i) = 0
+  end do
+
+!$acc enter data copyin(var%a(5:n - 5), var%b(5:n - 5))
+
+!$acc parallel loop
+  do i = 5,n - 5
+    var%a(i) = i
+    var%b(i) = i * 2
+  end do
+!$acc end parallel loop
+
+!$acc exit data copyout(var%a(5:n - 5), var%b(5:n - 5))
+
+!$acc end data
+
+  do i = 1,4
+    if (var%a(i) .ne. 0) stop 1
+    if (var%b(i) .ne. 0) stop 2
+  end do
+
+  do i = 5,n - 5
+    if (i .ne. var%a(i)) stop 3
+    if (i * 2 .ne. var%b(i)) stop 4
+  end do
+
+  do i = n - 4,n
+    if (var%a(i) .ne. 0) stop 5
+    if (var%b(i) .ne. 0) stop 6
+  end do
+
+  deallocate(var%a)
+  deallocate(var%b)
+
+end program dtype
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90
new file mode 100644
index 0000000..12910d0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90
@@ -0,0 +1,61 @@
+! { dg-do run }
+
+! Test of attachment counters and finalize.
+
+program dtype
+  implicit none
+  integer, parameter :: n = 512
+  type mytype
+    integer, allocatable :: a(:)
+    integer, allocatable :: b(:)
+  end type mytype
+  integer i
+
+  type(mytype) :: var
+
+  allocate(var%a(1:n))
+  allocate(var%b(1:n))
+
+!$acc data copy(var)
+
+  do i = 1, n
+    var%a(i) = 0
+    var%b(i) = 0
+  end do
+
+!$acc enter data copyin(var%a(5:n - 5), var%b(5:n - 5))
+
+  do i = 1,20
+    !$acc enter data attach(var%a)
+  end do
+
+!$acc parallel loop
+  do i = 5,n - 5
+    var%a(i) = i
+    var%b(i) = i * 2
+  end do
+!$acc end parallel loop
+
+!$acc exit data copyout(var%a(5:n - 5), var%b(5:n - 5)) finalize
+
+!$acc end data
+
+  do i = 1,4
+    if (var%a(i) .ne. 0) stop 1
+    if (var%b(i) .ne. 0) stop 2
+  end do
+
+  do i = 5,n - 5
+    if (i .ne. var%a(i)) stop 3
+    if (i * 2 .ne. var%b(i)) stop 4
+  end do
+
+  do i = n - 4,n
+    if (var%a(i) .ne. 0) stop 5
+    if (var%b(i) .ne. 0) stop 6
+  end do
+
+  deallocate(var%a)
+  deallocate(var%b)
+
+end program dtype
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-7.f90
new file mode 100644
index 0000000..ab44f0a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-7.f90
@@ -0,0 +1,89 @@
+! { dg-do run }
+
+! Test of attach/detach with scalar elements and nested derived types.
+
+program dtype
+  implicit none
+  integer, parameter :: n = 512
+  type subtype
+    integer :: g, h
+    integer, allocatable :: q(:)
+  end type subtype
+  type mytype
+    integer, allocatable :: a(:)
+    integer, allocatable :: c, d
+    integer, allocatable :: b(:)
+    integer :: f
+    type(subtype) :: s
+  end type mytype
+  integer i
+
+  type(mytype) :: var
+
+  allocate(var%a(1:n))
+  allocate(var%b(1:n))
+  allocate(var%c)
+  allocate(var%d)
+  allocate(var%s%q(1:n))
+
+  var%c = 16
+  var%d = 20
+  var%f = 7
+  var%s%g = 21
+  var%s%h = 38
+
+!$acc enter data copyin(var)
+
+  do i = 1, n
+    var%a(i) = 0
+    var%b(i) = 0
+    var%s%q(i) = 0
+  end do
+
+!$acc data copy(var%a(5:n - 5), var%b(5:n - 5), var%c, var%d) &
+!$acc & copy(var%s%q)
+
+!$acc parallel loop default(none) present(var)
+  do i = 5,n - 5
+    var%a(i) = i
+    var%b(i) = i * 2
+    var%s%q(i) = i * 3
+    var%s%g = 100
+    var%s%h = 101
+  end do
+!$acc end parallel loop
+
+!$acc end data
+
+!$acc exit data copyout(var)
+
+  do i = 1,4
+    if (var%a(i) .ne. 0) stop 1
+    if (var%b(i) .ne. 0) stop 2
+    if (var%s%q(i) .ne. 0) stop 3
+  end do
+
+  do i = 5,n - 5
+    if (i .ne. var%a(i)) stop 4
+    if (i * 2 .ne. var%b(i)) stop 5
+    if (i * 3 .ne. var%s%q(i)) stop 6
+  end do
+
+  do i = n - 4,n
+    if (var%a(i) .ne. 0) stop 7
+    if (var%b(i) .ne. 0) stop 8
+    if (var%s%q(i) .ne. 0) stop 9
+  end do
+
+  if (var%c .ne. 16) stop 10
+  if (var%d .ne. 20) stop 11
+  if (var%s%g .ne. 100 .or. var%s%h .ne. 101) stop 12
+  if (var%f .ne. 7) stop 13
+
+  deallocate(var%a)
+  deallocate(var%b)
+  deallocate(var%c)
+  deallocate(var%d)
+  deallocate(var%s%q)
+
+end program dtype
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-8.f90 b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-8.f90
new file mode 100644
index 0000000..d142763
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/deep-copy-8.f90
@@ -0,0 +1,41 @@
+! { dg-do run }
+
+! Test of explicit attach/detach clauses and attachment counters. There are no
+! acc_attach/acc_detach API routines in Fortran.
+
+program dtype
+  use openacc
+  implicit none
+  integer, parameter :: n = 512
+  type mytype
+    integer, allocatable :: a(:)
+  end type mytype
+  integer i
+
+  type(mytype) :: var
+
+  allocate(var%a(1:n))
+
+  call acc_copyin(var)
+  call acc_copyin(var%a)
+
+  !$acc enter data attach(var%a)
+
+!$acc parallel loop attach(var%a)
+  do i = 1,n
+    var%a(i) = i
+  end do
+!$acc end parallel loop
+
+  !$acc exit data detach(var%a)
+
+  call acc_copyout(var%a)
+  call acc_copyout(var)
+
+  do i = 1,n
+    if (i .ne. var%a(i)) stop 1
+  end do
+
+  deallocate(var%a)
+
+end program dtype
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/derived-type-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/derived-type-1.f90
index 1ec4784..eb7812d 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/derived-type-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/derived-type-1.f90
@@ -20,9 +20,9 @@
   !$acc end data
 
   do i = 1, n
-     if (d(i)%a /= i) call abort
-     if (d(i)%b /= i-1) call abort
-     if (d(i)%c /= i+1) call abort
+     if (d(i)%a /= i) stop 1
+     if (d(i)%b /= i-1) stop 2
+     if (d(i)%c /= i+1) stop 3
   end do
 end program
 
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/update-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/update-2.f90
index a37d526..c3c8a07 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/update-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/update-2.f90
@@ -37,7 +37,7 @@ program derived_acc
 
   !$acc update host(var%a)
 
-  if (var%a /= var%b) call abort
+  if (var%a /= var%b) stop 1
 
   var%b = 100
 
@@ -51,7 +51,7 @@ program derived_acc
 
   !$acc update host(var%a)
 
-  if (var%a /= var%b) call abort
+  if (var%a /= var%b) stop 2
 
   !$acc parallel loop present (var)
   do i = 1, n
@@ -64,7 +64,7 @@ program derived_acc
   var%a = -1
 
   do i = 1, n
-     if (var%c(i) /= i) call abort
+     if (var%c(i) /= i) stop 3
      var%c(i) = var%a
   end do
 
@@ -78,7 +78,7 @@ program derived_acc
      if (var%c(i) /= var%a) res = res + 1
   end do
 
-  if (res /= 0) call abort
+  if (res /= 0) stop 4
 
   var%c(:) = 0
 
@@ -93,8 +93,8 @@ program derived_acc
   !$acc update host(var%c(5))
 
   do i = 1, n
-     if (i /= 5 .and. var%c(i) /= 0) call abort
-     if (i == 5 .and. var%c(i) /= 1) call abort
+     if (i /= 5 .and. var%c(i) /= 0) stop 5
+     if (i == 5 .and. var%c(i) /= 1) stop 6
   end do
 
   !$acc parallel loop present(var)
@@ -106,7 +106,7 @@ program derived_acc
   !$acc update host(var%in%d)
 
   do i = 1, n
-     if (var%in%d(i) /= var%a) call abort
+     if (var%in%d(i) /= var%a) stop 7
   end do
 
   var%c(:) = 0
@@ -124,8 +124,8 @@ program derived_acc
   !$acc update host(var%c(n/2:n))
 
   do i = 1,n
-     if (i < n/2 .and. var%c(i) /= -1) call abort
-     if (i >= n/2 .and. var%c(i) /= i) call abort
+     if (i < n/2 .and. var%c(i) /= -1) stop 8
+     if (i >= n/2 .and. var%c(i) /= i) stop 9
   end do
 
   var%in%d(:) = 0
@@ -140,8 +140,8 @@ program derived_acc
   !$acc update host(var%in%d(5))
 
   do i = 1, n
-     if (i /= 5 .and. var%in%d(i) /= 0) call abort
-     if (i == 5 .and. var%in%d(i) /= 1) call abort
+     if (i /= 5 .and. var%in%d(i) /= 0) stop 10
+     if (i == 5 .and. var%in%d(i) /= 1) stop 11
   end do
 
   !$acc exit data delete(var)
@@ -173,7 +173,7 @@ subroutine derived_acc_subroutine(var)
 
   !$acc update host(var%a)
 
-  if (var%a /= var%b) call abort
+  if (var%a /= var%b) stop 12
 
   var%b = 100
 
@@ -187,7 +187,7 @@ subroutine derived_acc_subroutine(var)
 
   !$acc update host(var%a)
 
-  if (var%a /= var%b) call abort
+  if (var%a /= var%b) stop 13
 
   !$acc parallel loop present (var)
   do i = 1, n
@@ -200,7 +200,7 @@ subroutine derived_acc_subroutine(var)
   var%a = -1
 
   do i = 1, n
-     if (var%c(i) /= i) call abort
+     if (var%c(i) /= i) stop 14
      var%c(i) = var%a
   end do
 
@@ -214,7 +214,7 @@ subroutine derived_acc_subroutine(var)
      if (var%c(i) /= var%a) res = res + 1
   end do
 
-  if (res /= 0) call abort
+  if (res /= 0) stop 15
 
   var%c(:) = 0
 
@@ -229,8 +229,8 @@ subroutine derived_acc_subroutine(var)
   !$acc update host(var%c(5))
 
   do i = 1, n
-     if (i /= 5 .and. var%c(i) /= 0) call abort
-     if (i == 5 .and. var%c(i) /= 1) call abort
+     if (i /= 5 .and. var%c(i) /= 0) stop 16
+     if (i == 5 .and. var%c(i) /= 1) stop 17
   end do
 
   !$acc parallel loop present(var)
@@ -242,7 +242,7 @@ subroutine derived_acc_subroutine(var)
   !$acc update host(var%in%d)
 
   do i = 1, n
-     if (var%in%d(i) /= var%a) call abort
+     if (var%in%d(i) /= var%a) stop 18
   end do
 
   var%c(:) = 0
@@ -260,8 +260,8 @@ subroutine derived_acc_subroutine(var)
   !$acc update host(var%c(n/2:n))
 
   do i = 1,n
-     if (i < n/2 .and. var%c(i) /= -1) call abort
-     if (i >= n/2 .and. var%c(i) /= i) call abort
+     if (i < n/2 .and. var%c(i) /= -1) stop 19
+     if (i >= n/2 .and. var%c(i) /= i) stop 20
   end do
 
   var%in%d(:) = 0
@@ -276,8 +276,8 @@ subroutine derived_acc_subroutine(var)
   !$acc update host(var%in%d(5))
 
   do i = 1, n
-     if (i /= 5 .and. var%in%d(i) /= 0) call abort
-     if (i == 5 .and. var%in%d(i) /= 1) call abort
+     if (i /= 5 .and. var%in%d(i) /= 0) stop 21
+     if (i == 5 .and. var%in%d(i) /= 1) stop 22
   end do
 
   !$acc exit data delete(var)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 0/6] [og8] OpenACC attach/detach
@ 2018-11-20 21:55 Julian Brown
  2018-11-20 21:55 ` [PATCH 1/6] [og8] Host-to-device transfer coalescing & magic offset value self-documentation Julian Brown
                   ` (5 more replies)
  0 siblings, 6 replies; 8+ messages in thread
From: Julian Brown @ 2018-11-20 21:55 UTC (permalink / raw)
  To: gcc-patches; +Cc: cltang, Catherine_Moore, jakub

[-- Attachment #1: Type: text/plain, Size: 5063 bytes --]


This patch series is a backport of the OpenACC attach/detach support to
the openacc-gcc-8-branch branch. It was previously posted upstream here:

https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00823.html

This version of the series has been adjusted to account for features on
the branch that are not yet upstream. It also contains improvements to
the reference counting behaviour, partially verified using self-checking
code (not quite complete, and not yet submitted).

Tested (as a series) with offloading to nvptx. I will apply to the
openacc-gcc-8-branch shortly.

Julian Brown (6):
  [og8] Host-to-device transfer coalescing & magic offset value
    self-documentation
  [og8] Factor out duplicate code in gimplify_scan_omp_clauses
  [og8] OpenACC 2.6 manual deep copy support (attach/detach)
  [og8] Interaction of dynamic/multidimensional arrays with
    attach/detach.
  [og8] Backport parts of upstream declare-allocate patch
  [og8] OpenACC refcounting refresh

 gcc/c/c-parser.c                                   |   15 +-
 gcc/c/c-typeck.c                                   |    4 +
 gcc/cp/parser.c                                    |   16 +-
 gcc/cp/semantics.c                                 |    6 +-
 gcc/fortran/gfortran.h                             |    2 +
 gcc/fortran/openmp.c                               |  126 ++++--
 gcc/fortran/trans-openmp.c                         |  163 +++-----
 gcc/gimplify.c                                     |  414 ++++++++++--------
 gcc/omp-low.c                                      |   13 +-
 .../c-c++-common/goacc/deep-copy-multidim.c        |   32 ++
 gcc/testsuite/c-c++-common/goacc/mdc-1.c           |   10 +-
 gcc/testsuite/gfortran.dg/goacc/data-clauses.f95   |   38 +-
 gcc/testsuite/gfortran.dg/goacc/derived-types.f90  |   23 +-
 .../gfortran.dg/goacc/enter-exit-data.f95          |   24 +-
 .../gfortran.dg/goacc/kernels-alias-3.f95          |    4 +-
 libgomp/libgomp.h                                  |   30 ++-
 libgomp/libgomp.map                                |   10 +
 libgomp/oacc-mem.c                                 |  459 ++++++++++++--------
 libgomp/oacc-parallel.c                            |  212 ++++++++--
 libgomp/openacc.h                                  |    6 +
 libgomp/target.c                                   |  291 +++++++++++--
 .../libgomp.oacc-c-c++-common/context-2.c          |    6 +-
 .../libgomp.oacc-c-c++-common/context-4.c          |    6 +-
 .../libgomp.oacc-c-c++-common/deep-copy-1.c        |   24 +
 .../libgomp.oacc-c-c++-common/deep-copy-2.c        |   29 ++
 .../libgomp.oacc-c-c++-common/deep-copy-3.c        |   34 ++
 .../libgomp.oacc-c-c++-common/deep-copy-4.c        |   87 ++++
 .../libgomp.oacc-c-c++-common/deep-copy-5.c        |   81 ++++
 .../libgomp.oacc-c-c++-common/deep-copy-6.c        |   59 +++
 .../libgomp.oacc-c-c++-common/deep-copy-7.c        |   42 ++
 .../libgomp.oacc-c-c++-common/deep-copy-8.c        |   53 +++
 libgomp/testsuite/libgomp.oacc-fortran/data-2.f90  |   20 +-
 .../testsuite/libgomp.oacc-fortran/deep-copy-1.f90 |   35 ++
 .../testsuite/libgomp.oacc-fortran/deep-copy-2.f90 |   33 ++
 .../testsuite/libgomp.oacc-fortran/deep-copy-3.f90 |   34 ++
 .../testsuite/libgomp.oacc-fortran/deep-copy-4.f90 |   49 ++
 .../testsuite/libgomp.oacc-fortran/deep-copy-5.f90 |   57 +++
 .../testsuite/libgomp.oacc-fortran/deep-copy-6.f90 |   61 +++
 .../testsuite/libgomp.oacc-fortran/deep-copy-7.f90 |   89 ++++
 .../testsuite/libgomp.oacc-fortran/deep-copy-8.f90 |   41 ++
 .../libgomp.oacc-fortran/derived-type-1.f90        |    6 +-
 .../libgomp.oacc-fortran/non-scalar-data.f90       |    6 +-
 .../testsuite/libgomp.oacc-fortran/update-2.f90    |   44 +-
 43 files changed, 2079 insertions(+), 715 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/deep-copy-multidim.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-1.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-2.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-4.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-1.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-2.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-3.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-4.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-5.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-6.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-7.f90
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/deep-copy-8.f90


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 5/6] [og8] Backport parts of upstream declare-allocate patch
  2018-11-20 21:55 [PATCH 0/6] [og8] OpenACC attach/detach Julian Brown
                   ` (3 preceding siblings ...)
  2018-11-20 21:55 ` [PATCH 4/6] [og8] Interaction of dynamic/multidimensional arrays with attach/detach Julian Brown
@ 2018-11-20 21:56 ` Julian Brown
  2018-11-20 21:57 ` [PATCH 6/6] [og8] OpenACC refcounting refresh Julian Brown
  5 siblings, 0 replies; 8+ messages in thread
From: Julian Brown @ 2018-11-20 21:56 UTC (permalink / raw)
  To: gcc-patches; +Cc: cltang, Catherine_Moore, jakub

[-- Attachment #1: Type: text/plain, Size: 1431 bytes --]


This patch adjusts mappings used for some special cases in Fortran
(e.g. allocatable scalars) on og8 to match code that is already upstream,
or that has been submitted but not yet reviewed. Parts taken from
https://gcc.gnu.org/ml/gcc-patches/2018-09/msg01205.html and parts
reverted from https://gcc.gnu.org/ml/gcc-patches/2017-01/msg02188.html.

	gcc/fortran/
	* trans-openmp.c (gfc_omp_finish_clause): Don't use
	GOMP_MAP_FIRSTPRIVATE_POINTER.
	(gfc_trans_omp_clauses_1): Adjust handling of allocatable scalars.

	gcc/
	* gimplify.c (demote_firstprivate_pointer): Remove.
	(gimplify_scan_omp_clauses): Remove special handling for OpenACC. Don't
	call demote_firstprivate_pointer.
	(gimplify_adjust_omp_clauses): Adjust promotion of reduction clauses.
	* omp-low.c (lower_omp_target): Remove special handling for Fortran.

	gcc/testsuite/
	* gfortran.dg/goacc/kernels-alias-3.f95: Revert comment changes and
	XFAIL.

	libgomp/
	* testsuite/libgomp.oacc-fortran/non-scalar-data.f90: Remove XFAIL for
	-O2 and -O3 and explanatory comment.
---
 gcc/fortran/trans-openmp.c                         |   22 ++++-----
 gcc/gimplify.c                                     |   49 ++-----------------
 gcc/omp-low.c                                      |    3 +-
 .../gfortran.dg/goacc/kernels-alias-3.f95          |    4 +-
 .../libgomp.oacc-fortran/non-scalar-data.f90       |    6 +--
 5 files changed, 20 insertions(+), 64 deletions(-)


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0005-og8-Backport-parts-of-upstream-declare-allocate-patc.patch --]
[-- Type: text/x-patch; name="0005-og8-Backport-parts-of-upstream-declare-allocate-patc.patch", Size: 7050 bytes --]

diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 98f40d1..71a3ebb 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -1084,7 +1084,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p)
 	return;
       tree orig_decl = decl;
       c4 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP);
-      OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_FIRSTPRIVATE_POINTER);
+      OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_POINTER);
       OMP_CLAUSE_DECL (c4) = decl;
       OMP_CLAUSE_SIZE (c4) = size_int (0);
       decl = build_fold_indirect_ref (decl);
@@ -1100,10 +1100,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p)
 	  OMP_CLAUSE_SIZE (c3) = size_int (0);
 	  decl = build_fold_indirect_ref (decl);
 	  OMP_CLAUSE_DECL (c) = decl;
-	  OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_POINTER);
 	}
-      if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl)))
-	OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_POINTER);
     }
   if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl)))
     {
@@ -2168,11 +2165,15 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 					(TREE_TYPE (TREE_TYPE (field)))))
 		    {
 		      tree orig_decl = decl;
-		      enum gomp_map_kind gmk = GOMP_MAP_FIRSTPRIVATE_POINTER;
-		      if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)
-			  && (n->sym->attr.oacc_declare_create)
-			  && clauses->update_allocatable)
-			gmk = ptr_map_kind;
+		      enum gomp_map_kind gmk = GOMP_MAP_POINTER;
+		      if (GFC_DECL_GET_SCALAR_ALLOCATABLE (field)
+			  && n->sym->attr.oacc_declare_create)
+			{
+			  if (clauses->update_allocatable)
+			    gmk = GOMP_MAP_ALWAYS_POINTER;
+			  else
+			    gmk = GOMP_MAP_FIRSTPRIVATE_POINTER;
+			}
 		      node4 = build_omp_clause (input_location,
 						OMP_CLAUSE_MAP);
 		      OMP_CLAUSE_SET_MAP_KIND (node4, gmk);
@@ -2189,10 +2190,7 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses,
 			  OMP_CLAUSE_DECL (node3) = decl;
 			  OMP_CLAUSE_SIZE (node3) = size_int (0);
 			  decl = build_fold_indirect_ref (decl);
-			  OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER);
 			}
-		      if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl)))
-			OMP_CLAUSE_SET_MAP_KIND (node4, GOMP_MAP_POINTER);
 		    }
 		  if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))
 		      && n->u.map_op != OMP_MAP_ATTACH
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 40bf586..7f55cfd 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -7634,37 +7634,6 @@ find_decl_expr (tree *tp, int *walk_subtrees, void *data)
   return NULL_TREE;
 }
 
-static void
-demote_firstprivate_pointer (tree decl, gimplify_omp_ctx *ctx)
-{
-  if (!lang_GNU_Fortran ())
-    return;
-
-  while (ctx)
-    {
-      if (ctx->region_type == ORT_ACC_PARALLEL
-	  || ctx->region_type == ORT_ACC_KERNELS)
-	break;
-      ctx = ctx->outer_context;
-    }
-
-  if (ctx == NULL)
-    return;
-
-  tree clauses = ctx->clauses;
-
-  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
-    {
-      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
-	  && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
-	  && OMP_CLAUSE_DECL (c) == decl)
-	{
-	  OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_POINTER);
-	  return;
-	}
-    }
-}
-
 /* Insert a GOMP_MAP_ALLOC or GOMP_MAP_RELEASE node following a
    GOMP_MAP_STRUCT mapping.  C is an always_pointer mapping.  STRUCT_NODE is
    the struct node to insert the new mapping after (when the struct node is
@@ -7843,7 +7812,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
   outer_ctx = ctx->outer_context;
   if (code == OMP_TARGET)
     {
-      if (!lang_GNU_Fortran () || (region_type & ORT_ACC))
+      if (!lang_GNU_Fortran ())
 	ctx->target_map_pointers_as_0len_arrays = true;
       ctx->target_map_scalars_firstprivate = true;
     }
@@ -7971,7 +7940,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 	  if (!(region_type & ORT_ACC))
 	    check_non_private = "reduction";
 	  decl = OMP_CLAUSE_DECL (c);
-	  demote_firstprivate_pointer (decl, ctx->outer_context);
 	  if (TREE_CODE (decl) == MEM_REF)
 	    {
 	      tree type = TREE_TYPE (decl);
@@ -9491,16 +9459,11 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 		      && kind != GOMP_MAP_FORCE_PRESENT
 		      && kind != GOMP_MAP_POINTER)
 		    {
-		      if (lang_hooks.decls.omp_privatize_by_reference (decl))
-			OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_POINTER);
-		      else
-			{
-			  warning_at (OMP_CLAUSE_LOCATION (c), 0,
-				      "incompatible data clause with reduction "
-				      "on %qE; promoting to present_or_copy",
-				      DECL_NAME (t));
-			  OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TOFROM);
-			}
+		      warning_at (OMP_CLAUSE_LOCATION (c), 0,
+				  "incompatible data clause with reduction "
+				  "on %qE; promoting to present_or_copy",
+				  DECL_NAME (t));
+		      OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TOFROM);
 		    }
 		}
 	    }
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 1726451..a5fc2b1 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -9112,8 +9112,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 		  }
 		else
 		  is_ref = omp_is_reference (var);
-		if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE
-		    || (lang_GNU_Fortran () && TREE_CODE (var) == PARM_DECL))
+		if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)
 		  is_ref = false;
 		bool ref_to_array = false;
 		if (is_ref)
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-alias-3.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-alias-3.f95
index 09f0264..36b06d3 100644
--- a/gcc/testsuite/gfortran.dg/goacc/kernels-alias-3.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-alias-3.f95
@@ -17,6 +17,4 @@ end program main
 
 ! Only the omp_data_i related loads should be annotated with cliques.
 ! { dg-final { scan-tree-dump-times "clique 1 base 1" 2 "ealias" } }
-! The following FAILs since/needs to be updated for the "Partially enable
-! GOMP_MAP_FIRSTPRIVATE_POINTER in gfortran" changes.
-! { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" { xfail *-*-* } } }
+! { dg-final { scan-tree-dump-times "(?n)clique 1 base 0" 3 "ealias" } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/non-scalar-data.f90 b/libgomp/testsuite/libgomp.oacc-fortran/non-scalar-data.f90
index 7562571..99bd692 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/non-scalar-data.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/non-scalar-data.f90
@@ -3,11 +3,9 @@
 ! present.
 
 ! { dg-do run }
-! TODO, for "-Os" see <https://gcc.gnu.org/PR80995>, and for the others, this
-! regressed with the "Partially enable GOMP_MAP_FIRSTPRIVATE_POINTER in
-! gfortran" changes.
+! TODO, <https://gcc.gnu.org/PR80995>.
 ! warning: OpenACC kernels construct will be executed sequentially; will by default avoid offloading to prevent data copy penalty
-! { dg-xfail-if "TODO" { openacc_nvidia_accel_selected } { "-Os" "-O2" "-O3" } { "" } }
+! { dg-xfail-if "TODO" { openacc_nvidia_accel_selected } { "-Os" } { "" } }
 
 program main
   implicit none

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 6/6] [og8] OpenACC refcounting refresh
  2018-11-20 21:55 [PATCH 0/6] [og8] OpenACC attach/detach Julian Brown
                   ` (4 preceding siblings ...)
  2018-11-20 21:56 ` [PATCH 5/6] [og8] Backport parts of upstream declare-allocate patch Julian Brown
@ 2018-11-20 21:57 ` Julian Brown
  5 siblings, 0 replies; 8+ messages in thread
From: Julian Brown @ 2018-11-20 21:57 UTC (permalink / raw)
  To: gcc-patches; +Cc: cltang, Catherine_Moore, jakub

[-- Attachment #1: Type: text/plain, Size: 3827 bytes --]


This patch represents a mild overhaul of reference counting for OpenACC
in libgomp.  It's been partly automatically checked (using code not yet
quite finished nor submitted upstream), but it's already more precise
than the pre-patch implementation (as demonstrated by adjustments to
previously-erroneous tests, included).

I have a few more changes planned, but those are still tbd.

	libgomp/
	* libgomp.h (gomp_device_descr): Add GOMP_MAP_VARS_OPENACC_ENTER_DATA.
	(gomp_acc_remove_pointer): Update prototype.
	(gomp_acc_data_env_remove_tgt): Add prototype.
	(gomp_unmap_vars, gomp_map_vars_async): Update prototype.
	* oacc-int.h (goacc_async_copyout_unmap_vars): Update prototype.
	* oacc-async.c (goacc_async_copyout_unmap_vars): Remove finalize
	parameter.
	* oacc-init.c (acc_shutdown_1): Remove finalize argument to
	gomp_unmap_vars call.
	* oacc-mem.c (lookup_dev_1): New helper function.
	(lookup_dev): Rewrite in terms of above.
	(acc_free): Update calls to lookup_dev.
	(acc_map_data): Likewise.  Don't add data mapped this way to OpenACC
	data environment list.
	(gomp_acc_data_env_remove, gomp_acc_data_env_remove_tgt): New functions.
	(acc_unmap_data): Rewrite using splay tree functions directly.  Don't
	call gomp_unmap_vars.  Fix refcount handling.
	(present_create_copy): Use GOMP_MAP_VARS_OPENACC_ENTER_DATA in
	gomp_map_vars_async call.  Adjust refcount handling.
	(delete_copyout): Remove dubious handling of target_mem_desc refcount.
	(gomp_acc_insert_pointer): Use GOMP_MAP_VARS_OPENACC_ENTER_DATA in
	gomp_map_vars_async call.  Update refcount handling.
	(gomp_acc_remove_pointer): Reimplement.  Fix detach and refcount
	handling.
	* oacc-parallel.c (find_pointer): Handle more mapping types.  Update
	calls to gomp_unmap_vars and goacc_async_copyout_unmap_vars.
	(GOACC_enter_exit_data): Update refcount handling.

	libgomp/
	* target.c (gomp_detach_pointer): Unlock device on error path.
	(gomp_map_vars_async): Support GOMP_MAP_VARS_OPENACC_ENTER_DATA and
	mapping size fix GOMP_MAP_ATTACH.
	(gomp_unmap_tgt): Call gomp_acc_data_env_remove_tgt.
	(gomp_unmap_vars): Remove finalize parameter.
	(gomp_unmap_vars_async): Likewise.  Adjust detach handling.
	(GOMP_target, GOMP_target_ext, GOMP_target_end_data)
	(gomp_target_task_fn): Update calls to gomp_unmap_vars.
	* testsuite/libgomp.oacc-c-c++-common/context-2.c: Use correct API to
	unmap data.
	* testsuite/libgomp.oacc-c-c++-common/context-4.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c: New test.
	* testsuite/libgomp.oacc-fortran/data-2.f90: Fix for unmap semantics.
---
 libgomp/libgomp.h                                  |   10 +-
 libgomp/oacc-async.c                               |    4 +-
 libgomp/oacc-init.c                                |    2 +-
 libgomp/oacc-int.h                                 |    2 +-
 libgomp/oacc-mem.c                                 |  387 ++++++++++----------
 libgomp/oacc-parallel.c                            |   76 +++--
 libgomp/target.c                                   |   35 ++-
 .../libgomp.oacc-c-c++-common/context-2.c          |    6 +-
 .../libgomp.oacc-c-c++-common/context-4.c          |    6 +-
 .../libgomp.oacc-c-c++-common/deep-copy-6.c        |   59 +++
 .../libgomp.oacc-c-c++-common/deep-copy-7.c        |   42 +++
 .../libgomp.oacc-c-c++-common/deep-copy-8.c        |   53 +++
 libgomp/testsuite/libgomp.oacc-fortran/data-2.f90  |   20 +-
 13 files changed, 445 insertions(+), 257 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0006-og8-OpenACC-refcounting-refresh.patch --]
[-- Type: text/x-patch; name="0006-og8-OpenACC-refcounting-refresh.patch", Size: 34989 bytes --]

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 17fe0d3..568e260 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1002,6 +1002,7 @@ struct gomp_device_descr
 enum gomp_map_vars_kind
 {
   GOMP_MAP_VARS_OPENACC,
+  GOMP_MAP_VARS_OPENACC_ENTER_DATA,
   GOMP_MAP_VARS_TARGET,
   GOMP_MAP_VARS_DATA,
   GOMP_MAP_VARS_ENTER_DATA
@@ -1010,7 +1011,8 @@ enum gomp_map_vars_kind
 struct gomp_coalesce_buf;
 
 extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int);
-extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
+extern void gomp_acc_remove_pointer (void **, size_t *, unsigned short *,
+				     int, void *, bool, int);
 extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
 				       unsigned short *);
 struct gomp_coalesce_buf;
@@ -1039,10 +1041,12 @@ extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *,
 						    size_t, void **, void **,
 						    size_t *, void *, bool,
 						    enum gomp_map_vars_kind);
+extern void gomp_acc_data_env_remove_tgt (struct target_mem_desc **,
+					  struct target_mem_desc *);
 extern void gomp_unmap_tgt (struct target_mem_desc *);
-extern void gomp_unmap_vars (struct target_mem_desc *, bool, bool);
+extern void gomp_unmap_vars (struct target_mem_desc *, bool);
 extern void gomp_unmap_vars_async (struct target_mem_desc *, bool,
-				   struct goacc_asyncqueue *, bool);
+				   struct goacc_asyncqueue *);
 extern void gomp_init_device (struct gomp_device_descr *);
 extern bool gomp_fini_device (struct gomp_device_descr *);
 extern void gomp_unload_device (struct gomp_device_descr *);
diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c
index 6c12c82..bb00279 100644
--- a/libgomp/oacc-async.c
+++ b/libgomp/oacc-async.c
@@ -373,14 +373,14 @@ goacc_async_unmap_tgt (void *ptr)
 
 attribute_hidden void
 goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt,
-				struct goacc_asyncqueue *aq, bool finalize)
+				struct goacc_asyncqueue *aq)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
 
   /* Increment reference to delay freeing of device memory until callback
      has triggered.  */
   tgt->refcount++;
-  gomp_unmap_vars_async (tgt, true, aq, finalize);
+  gomp_unmap_vars_async (tgt, true, aq);
   devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt,
 					      (void *) tgt);
 }
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index e1938c5..48c9646 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -391,7 +391,7 @@ acc_shutdown_1 (acc_device_t d)
 	    {
 	      struct target_mem_desc *tgt = walk->dev->mem_map.root->key.tgt;
 
-	      gomp_unmap_vars (tgt, false, false);
+	      gomp_unmap_vars (tgt, false);
 	    }
 
 	  walk->dev = NULL;
diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h
index 878f0f4..1f6c62c 100644
--- a/libgomp/oacc-int.h
+++ b/libgomp/oacc-int.h
@@ -112,7 +112,7 @@ void goacc_host_init (void);
 void goacc_init_asyncqueues (struct gomp_device_descr *);
 bool goacc_fini_asyncqueues (struct gomp_device_descr *);
 void goacc_async_copyout_unmap_vars (struct target_mem_desc *,
-				     struct goacc_asyncqueue *, bool);
+				     struct goacc_asyncqueue *);
 void goacc_async_free (struct gomp_device_descr *,
 		       struct goacc_asyncqueue *, void *);
 struct goacc_asyncqueue *get_goacc_asyncqueue (int);
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 76ba914..3202f06 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -52,6 +52,25 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
   return key;
 }
 
+/* Helper for lookup_dev.  Iterate over splay tree.  */
+
+static splay_tree_key
+lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
+{
+  splay_tree_key k = &node->key;
+  struct target_mem_desc *t = k->tgt;
+
+  if (d >= t->tgt_start && d + s <= t->tgt_end)
+    return k;
+
+  if (node->left)
+    return lookup_dev_1 (node->left, d, s);
+  if (node->right)
+    return lookup_dev_1 (node->right, d, s);
+
+  return NULL;
+}
+
 /* Return block containing [D->S), or NULL if not contained.
    The list isn't ordered by device address, so we have to iterate
    over the whole array.  This is not expected to be a common
@@ -59,35 +78,12 @@ lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
    remains locked on exit.  */
 
 static splay_tree_key
-lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
+lookup_dev (splay_tree mem_map, void *d, size_t s)
 {
-  int i;
-  struct target_mem_desc *t;
-
-  if (!tgt)
-    return NULL;
-
-  for (t = tgt; t != NULL; t = t->prev)
-    {
-      if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
-        break;
-    }
-
-  if (!t)
+  if (!mem_map || !mem_map->root)
     return NULL;
 
-  for (i = 0; i < t->list_count; i++)
-    {
-      void * offset;
-
-      splay_tree_key k = &t->array[i].key;
-      offset = d - t->tgt_start + k->tgt_offset;
-
-      if (k->host_start + offset <= (void *) k->host_end)
-        return k;
-    }
-
-  return NULL;
+  return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
 }
 
 /* OpenACC is silent on how memory exhaustion is indicated.  We return
@@ -165,7 +161,7 @@ acc_free (void *d)
   /* We don't have to call lazy open here, as the ptr value must have
      been returned by acc_malloc.  It's not permitted to pass NULL in
      (unless you got that null from acc_malloc).  */
-  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
+  if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
     {
       void *offset;
 
@@ -325,7 +321,7 @@ acc_hostptr (void *d)
 
   gomp_mutex_lock (&acc_dev->lock);
 
-  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
+  n = lookup_dev (&acc_dev->mem_map, d, 1);
 
   if (!n)
     {
@@ -422,7 +418,7 @@ acc_map_data (void *h, void *d, size_t s)
 		      (int)s);
 	}
 
-      if (lookup_dev (thr->dev->openacc.data_environ, d, s))
+      if (lookup_dev (&thr->dev->mem_map, d, s))
         {
 	  gomp_mutex_unlock (&acc_dev->lock);
 	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
@@ -436,11 +432,6 @@ acc_map_data (void *h, void *d, size_t s)
       tgt->list[0].key->refcount = REFCOUNT_INFINITY;
     }
 
-  gomp_mutex_lock (&acc_dev->lock);
-  tgt->prev = acc_dev->openacc.data_environ;
-  acc_dev->openacc.data_environ = tgt;
-  gomp_mutex_unlock (&acc_dev->lock);
-
   if (profiling_setup_p)
     {
       thr->prof_info = NULL;
@@ -448,11 +439,83 @@ acc_map_data (void *h, void *d, size_t s)
     }
 }
 
+/* Remove the target_mem_desc holding the mapping for MAPNUM HOSTADDRS from
+   the OpenACC data environment pointed to by DATA_ENV.  The device lock
+   should be held before calling, and remains locked on exit.  */
+
+static void
+gomp_acc_data_env_remove (struct gomp_device_descr *acc_dev,
+			  struct target_mem_desc **data_env, void **hostaddrs,
+			  int mapnum)
+{
+  struct target_mem_desc *t, *tp;
+
+  for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev)
+    {
+      bool all_match = true;
+
+      /* We must locate the target descriptor by "value", matching each
+	 hostaddr that it describes.  */
+      if (t->list_count != mapnum)
+        continue;
+
+      for (int i = 0; i < t->list_count; i++)
+	if (t->list[i].key
+	    && (t->list[i].key->host_start + t->list[i].offset
+		!= (uintptr_t) hostaddrs[i]))
+	  {
+	    all_match = false;
+	    break;
+	  }
+
+      if (all_match)
+	{
+	  if (t->refcount > 1)
+	    t->refcount--;
+	  else
+	    {
+	      if (tp)
+		tp->prev = t->prev;
+	      else
+		*data_env = t->prev;
+	    }
+	  return;
+	}
+    }
+
+  gomp_mutex_unlock (&acc_dev->lock);
+  gomp_fatal ("cannot find data mapping to remove in data environment");
+}
+
+/* Similar, but removes target_mem_desc REMOVE from the DATA_ENV, in case its
+   reference count drops to zero resulting in it being unmapped (in
+   target.c:gomp_unmap_tgt).  Unlike the above function it is not an error if
+   REMOVE is not present in the environment.  The device lock should be held
+   before calling, and remains locked on exit.  */
+
+attribute_hidden void
+gomp_acc_data_env_remove_tgt (struct target_mem_desc **data_env,
+			      struct target_mem_desc *remove)
+{
+  struct target_mem_desc *t, *tp;
+
+  for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev)
+    if (t == remove)
+      {
+	if (tp)
+	  tp->prev = t->prev;
+	else
+	  *data_env = t->prev;
+	return;
+      }
+}
+
 void
 acc_unmap_data (void *h)
 {
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
+  struct splay_tree_key_s cur_node;
 
   /* No need to call lazy open, as the address must have been mapped.  */
 
@@ -466,12 +529,11 @@ acc_unmap_data (void *h)
     = __builtin_expect (goacc_profiling_setup_p (thr, &prof_info, &api_info),
 			false);
 
-  size_t host_size;
-
   gomp_mutex_lock (&acc_dev->lock);
 
-  splay_tree_key n = lookup_host (acc_dev, h, 1);
-  struct target_mem_desc *t;
+  cur_node.host_start = (uintptr_t) h;
+  cur_node.host_end = cur_node.host_start + 1;
+  splay_tree_key n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
 
   if (!n)
     {
@@ -479,47 +541,28 @@ acc_unmap_data (void *h)
       gomp_fatal ("%p is not a mapped block", (void *)h);
     }
 
-  host_size = n->host_end - n->host_start;
-
   if (n->host_start != (uintptr_t) h)
     {
+      size_t host_size = n->host_end - n->host_start;
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("[%p,%d] surrounds %p",
 		  (void *) n->host_start, (int) host_size, (void *) h);
     }
 
-  /* Mark for removal.  */
-  n->refcount = 1;
+  splay_tree_remove (&acc_dev->mem_map, n);
 
-  t = n->tgt;
+  struct target_mem_desc *tgt = n->tgt;
 
-  if (t->refcount == 2)
+  if (tgt->refcount > 0)
+    tgt->refcount--;
+  else
     {
-      struct target_mem_desc *tp;
-
-      /* This is the last reference, so pull the descriptor off the
-         chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
-         freeing the device memory. */
-      t->tgt_end = 0;
-      t->to_free = 0;
-
-      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-	   tp = t, t = t->prev)
-	if (n->tgt == t)
-	  {
-	    if (tp)
-	      tp->prev = t->prev;
-	    else
-	      acc_dev->openacc.data_environ = t->prev;
-
-	    break;
-	  }
+      free (tgt->array);
+      free (tgt);
     }
 
   gomp_mutex_unlock (&acc_dev->lock);
 
-  gomp_unmap_vars (t, true, false);
-
   if (profiling_setup_p)
     {
       thr->prof_info = NULL;
@@ -585,6 +628,24 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
 	  n->refcount++;
 	  n->dynamic_refcount++;
 	}
+
+      struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)
+						 + sizeof (tgt->list[0]));
+      tgt->refcount = 1;
+      tgt->tgt_start = 0;
+      tgt->tgt_end = 0;
+      tgt->to_free = NULL;
+      tgt->prev = acc_dev->openacc.data_environ;
+      tgt->list_count = 1;
+      tgt->device_descr = acc_dev;
+      tgt->list[0].key = n;
+      tgt->list[0].copy_from = false;
+      tgt->list[0].always_copy_from = false;
+      tgt->list[0].do_detach = false;
+      tgt->list[0].offset = (uintptr_t) h - n->host_start;
+      tgt->list[0].length = 0;
+      acc_dev->openacc.data_environ = tgt;
+
       gomp_mutex_unlock (&acc_dev->lock);
     }
   else if (!(f & FLAG_CREATE))
@@ -609,18 +670,19 @@ present_create_copy (unsigned f, void *h, size_t s, int async)
       goacc_aq aq = get_goacc_asyncqueue (async);
 
       tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
-				 &kinds, true, GOMP_MAP_VARS_OPENACC);
-      /* Initialize dynamic refcount.  */
-      tgt->list[0].key->dynamic_refcount = 1;
-      tgt->list[0].key->attach_count = NULL;
+				 &kinds, true,
+				 GOMP_MAP_VARS_OPENACC_ENTER_DATA);
 
-      gomp_mutex_lock (&acc_dev->lock);
+      for (int i = 0; i < tgt->list_count; i++)
+        if (tgt->list[i].key)
+	  tgt->list[i].key->dynamic_refcount++;
 
-      d = tgt->to_free;
+      gomp_mutex_lock (&acc_dev->lock);
       tgt->prev = acc_dev->openacc.data_environ;
       acc_dev->openacc.data_environ = tgt;
-
       gomp_mutex_unlock (&acc_dev->lock);
+
+      d = tgt->to_free;
     }
 
   if (profiling_setup_p)
@@ -753,11 +815,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
       n->dynamic_refcount = 0;
       n->attach_count = NULL;
     }
-  if (n->refcount < n->dynamic_refcount)
-    {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("Dynamic reference counting assert fail\n");
-    }
 
   if (f & FLAG_FINALIZE)
     {
@@ -772,21 +829,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname)
 
   if (n->refcount == 0)
     {
-      if (n->tgt->refcount == 2)
-	{
-	  struct target_mem_desc *tp, *t;
-	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-	       tp = t, t = t->prev)
-	    if (n->tgt == t)
-	      {
-		if (tp)
-		  tp->prev = t->prev;
-		else
-		  acc_dev->openacc.data_environ = t->prev;
-		break;
-	      }
-	}
-
       if (f & FLAG_COPYOUT)
 	{
 	  goacc_aq aq = get_goacc_asyncqueue (async);
@@ -968,38 +1010,16 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
-  if (acc_is_present (*hostaddrs, *sizes))
-    {
-      splay_tree_key n;
-      gomp_mutex_lock (&acc_dev->lock);
-      n = lookup_host (acc_dev, *hostaddrs, *sizes);
-      gomp_mutex_unlock (&acc_dev->lock);
-
-      tgt = n->tgt;
-      for (size_t i = 0; i < tgt->list_count; i++)
-	if (tgt->list[i].key == n)
-	  {
-	    for (size_t j = 0; j < mapnum; j++)
-	      if (i + j < tgt->list_count && tgt->list[i + j].key)
-		{
-		  tgt->list[i + j].key->refcount++;
-		  tgt->list[i + j].key->dynamic_refcount++;
-		}
-	    return;
-	  }
-      /* Should not reach here.  */
-      gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
-    }
-
   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
   goacc_aq aq = get_goacc_asyncqueue (async);
   tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
-			     NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
+			     NULL, sizes, kinds, true,
+			     GOMP_MAP_VARS_OPENACC_ENTER_DATA);
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
 
-  /* Initialize dynamic refcount.  */
-  tgt->list[0].key->dynamic_refcount = 1;
-  tgt->list[0].key->attach_count = NULL;
+  for (size_t i = 0; i < tgt->list_count; i++)
+    if (tgt->list[i].key)
+      tgt->list[i].key->dynamic_refcount++;
 
   gomp_mutex_lock (&acc_dev->lock);
   tgt->prev = acc_dev->openacc.data_environ;
@@ -1008,96 +1028,83 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
 }
 
 void
-gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
-			 int finalize, int mapnum)
+gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds,
+			 int async, void *detach_from, bool finalize,
+			 int mapnum)
 {
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
+  struct splay_tree_key_s cur_node;
   splay_tree_key n;
-  struct target_mem_desc *t;
-  int minrefs = (mapnum == 1) ? 2 : 3;
-
-  if (!acc_is_present (h, s))
-    return;
 
   gomp_mutex_lock (&acc_dev->lock);
 
-  n = lookup_host (acc_dev, h, 1);
-
-  if (!n)
+  if (detach_from)
     {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("%p is not a mapped block", (void *)h);
+      splay_tree_key n2 = lookup_host (acc_dev, detach_from, 1);
+      goacc_aq aq = get_goacc_asyncqueue (async);
+      gomp_detach_pointer (acc_dev, aq, n2, (uintptr_t) detach_from, finalize,
+			   NULL);
     }
 
-  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
-
-  t = n->tgt;
+  gomp_acc_data_env_remove (acc_dev, &acc_dev->openacc.data_environ, hostaddrs,
+			    mapnum);
 
-  if (n->refcount < n->dynamic_refcount)
+  for (int i = 0; i < mapnum; i++)
     {
-      gomp_mutex_unlock (&acc_dev->lock);
-      gomp_fatal ("Dynamic reference counting assert fail\n");
-    }
-
-  if (finalize)
-    {
-      n->refcount -= n->dynamic_refcount;
-      n->dynamic_refcount = 0;
-    }
-  else if (n->dynamic_refcount)
-    {
-      n->dynamic_refcount--;
-      n->refcount--;
-    }
+      int kind = kinds[i] & 0xff;
+      bool copyfrom = false;
 
-  gomp_mutex_unlock (&acc_dev->lock);
-
-  if (n->refcount == 0)
-    {
-      if (t->refcount == minrefs)
-	{
-	  /* This is the last reference, so pull the descriptor off the
-	     chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
-	     freeing the device memory. */
-	  struct target_mem_desc *tp;
-	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-	       tp = t, t = t->prev)
+      switch (kind)
+        {
+	case GOMP_MAP_FROM:
+	case GOMP_MAP_FORCE_FROM:
+	case GOMP_MAP_ALWAYS_FROM:
+	  copyfrom = true;
+	  /* Fallthrough.  */
+	case GOMP_MAP_TO_PSET:
+	case GOMP_MAP_POINTER:
+	case GOMP_MAP_DELETE:
+	case GOMP_MAP_RELEASE:
+	case GOMP_MAP_DETACH:
+	case GOMP_MAP_FORCE_DETACH:
+	  cur_node.host_start = (uintptr_t) hostaddrs[i];
+	  cur_node.host_end = cur_node.host_start
+			      + ((kind == GOMP_MAP_DETACH
+				  || kind == GOMP_MAP_FORCE_DETACH
+				  || kind == GOMP_MAP_POINTER)
+				 ? sizeof (void *) : sizes[i]);
+	  n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+	  if (n == NULL)
+	    continue;
+	  if (finalize)
 	    {
-	      if (n->tgt == t)
-		{
-		  if (tp)
-		    tp->prev = t->prev;
-		  else
-		    acc_dev->openacc.data_environ = t->prev;
-		  break;
-		}
+	      n->refcount -= n->dynamic_refcount;
+	      n->dynamic_refcount = 0;
 	    }
-	}
-
-      /* Set refcount to 1 to allow gomp_unmap_vars to unmap it.  */
-      n->refcount = 1;
-      t->refcount = minrefs;
-      for (size_t i = 0; i < t->list_count; i++)
-	if (t->list[i].key == n)
-	  {
-	    t->list[i].copy_from = force_copyfrom ? 1 : 0;
-	    break;
-	  }
-
-      /* If running synchronously, unmap immediately.  */
-      if (async < acc_async_noval)
-	gomp_unmap_vars (t, true, finalize);
-      else
-	{
-	  goacc_aq aq = get_goacc_asyncqueue (async);        
-	  goacc_async_copyout_unmap_vars (t, aq, finalize);
+	  else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
+	    {
+	      n->refcount--;
+	      n->dynamic_refcount--;
+	    }
+	  if (copyfrom)
+	    gomp_copy_dev2host (acc_dev, NULL, (void *) cur_node.host_start,
+				(void *) (n->tgt->tgt_start + n->tgt_offset
+					  + cur_node.host_start
+					  - n->host_start),
+				cur_node.host_end - cur_node.host_start);
+	  if (n->refcount == 0)
+	    gomp_remove_var (acc_dev, n);
+	  break;
+	default:
+	  gomp_mutex_unlock (&acc_dev->lock);
+	  gomp_fatal ("gomp_acc_remove_pointer unhandled kind 0x%.2x",
+		      kind);
 	}
     }
 
-  gomp_mutex_unlock (&acc_dev->lock);
 
-  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
+  gomp_mutex_unlock (&acc_dev->lock);
 }
 
 
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index f6c9114..8a3c65b 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -58,8 +58,12 @@ find_pointer (int pos, size_t mapnum, unsigned short *kinds)
     case GOMP_MAP_FORCE_TO:
     case GOMP_MAP_FROM:
     case GOMP_MAP_FORCE_FROM:
+    case GOMP_MAP_TOFROM:
+    case GOMP_MAP_FORCE_TOFROM:
     case GOMP_MAP_ALLOC:
     case GOMP_MAP_RELEASE:
+    case GOMP_MAP_DECLARE_ALLOCATE:
+    case GOMP_MAP_DECLARE_DEALLOCATE:
       {
 	unsigned char kind1 = kinds[pos + 1] & 0xff;
 	if (kind1 == GOMP_MAP_POINTER
@@ -392,7 +396,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
 				    &api_info);
 	}
       /* If running synchronously, unmap immediately.  */
-      gomp_unmap_vars (tgt, true, false);
+      gomp_unmap_vars (tgt, true);
       if (profiling_dispatch_p)
 	{
 	  prof_info.event_type = acc_ev_exit_data_end;
@@ -410,7 +414,7 @@ GOACC_parallel_keyed_internal (int device, int params, void (*fn) (void *),
       else
 	acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs,
 					  devaddrs, dims, tgt, aq);
-      goacc_async_copyout_unmap_vars (tgt, aq, false);
+      goacc_async_copyout_unmap_vars (tgt, aq);
     }
 
  out:
@@ -647,7 +651,7 @@ GOACC_data_end (void)
 
   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
   thr->mapped_data = tgt->prev;
-  gomp_unmap_vars (tgt, true, false);
+  gomp_unmap_vars (tgt, true);
   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
 
   if (profiling_dispatch_p)
@@ -845,18 +849,39 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 		    int elems = sizes[i];
 		    struct splay_tree_key_s k;
 		    splay_tree_key str;
-		    k.host_start = (uintptr_t) hostaddrs[i];
-		    k.host_end = k.host_start + 1;
+		    uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1];
+		    uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems]
+					 + sizes[i + elems];
+		    k.host_start = elems_lo;
+		    k.host_end = elems_hi;
 		    gomp_mutex_lock (&acc_dev->lock);
 		    str = splay_tree_lookup (&acc_dev->mem_map, &k);
 		    gomp_mutex_unlock (&acc_dev->lock);
-		    /* We increment the dynamic reference count for the struct
-		       itself by the number of struct elements that we
-		       mapped.  */
-		    if (str->refcount != REFCOUNT_INFINITY)
+		    if (str == NULL)
 		      {
-		        str->refcount += elems;
-			str->dynamic_refcount += elems;
+		        size_t mapsize = elems_hi - elems_lo;
+			goacc_aq aq = get_goacc_asyncqueue (async);
+			struct target_mem_desc *tgt;
+			unsigned short thiskind = GOMP_MAP_ALLOC;
+			int j;
+			for (j = 0; j < elems; j++)
+			  if ((kinds[i + j] & 0xff) != GOMP_MAP_ALLOC)
+			    {
+			      thiskind = GOMP_MAP_TO;
+			      break;
+			    }
+			tgt = gomp_map_vars_async (acc_dev, aq, 1,
+				&hostaddrs[i + 1], NULL, &mapsize, &thiskind,
+				true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+
+			for (j = 0; j < tgt->list_count; j++)
+			  if (tgt->list[j].key)
+			    tgt->list[j].key->dynamic_refcount++;
+
+			gomp_mutex_lock (&acc_dev->lock);
+			tgt->prev = acc_dev->openacc.data_environ;
+			acc_dev->openacc.data_environ = tgt;
+			gomp_mutex_unlock (&acc_dev->lock);
 		      }
 		    i += elems;
 		  }
@@ -962,18 +987,17 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 		  int elems = sizes[i];
 		  struct splay_tree_key_s k;
 		  splay_tree_key str;
-		  k.host_start = (uintptr_t) hostaddrs[i];
-		  k.host_end = k.host_start + 1;
+		  uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1];
+		  uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems]
+				       + sizes[i + elems];
+		  k.host_start = elems_lo;
+		  k.host_end = elems_hi;
 		  gomp_mutex_lock (&acc_dev->lock);
 		  str = splay_tree_lookup (&acc_dev->mem_map, &k);
 		  gomp_mutex_unlock (&acc_dev->lock);
-		  /* Decrement dynamic reference count for the struct by the
-		     number of elements that we are unmapping.  */
-		  if (str->dynamic_refcount >= elems)
-		    {
-		      str->dynamic_refcount -= elems;
-		      str->refcount -= elems;
-		    }
+		  if (str == NULL)
+		    gomp_fatal ("[%p,%ld] is not mapped", (void *) elems_lo,
+				(unsigned long) (elems_hi - elems_lo));
 		  i += elems;
 		}
 		break;
@@ -989,10 +1013,14 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 					   &sizes[i], &kinds[i]);
 	      else
 		{
-		  bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
-				   || kind == GOMP_MAP_FROM);
-		  gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom,
-					   async, finalize, pointer);
+		  unsigned short ptrkind = kinds[i + pointer - 1] & 0xff;
+		  bool detach = (ptrkind == GOMP_MAP_DETACH
+				 || ptrkind == GOMP_MAP_FORCE_DETACH);
+		  void *detach_from = detach ? hostaddrs[i + pointer - 1]
+					     : NULL;
+		  gomp_acc_remove_pointer (&hostaddrs[i], &sizes[i], &kinds[i],
+					   async, detach_from, finalize,
+					   pointer);
 		  /* See the above comment.  */
 		}
 	      i += pointer - 1;
diff --git a/libgomp/target.c b/libgomp/target.c
index da51291..bb5e1e9 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -629,7 +629,10 @@ gomp_detach_pointer (struct gomp_device_descr *devicep,
   idx = (detach_from - n->host_start) / sizeof (void *);
 
   if (!n->attach_count)
-    gomp_fatal ("no attachment counters for struct");
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      gomp_fatal ("no attachment counters for struct");
+    }
 
   if (finalize)
     n->attach_count[idx] = 1;
@@ -1013,7 +1016,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 	  continue;
 	}
       cur_node.host_start = (uintptr_t) hostaddrs[i];
-      if (!GOMP_MAP_POINTER_P (kind & typemask))
+      if (!GOMP_MAP_POINTER_P (kind & typemask)
+          && (kind & typemask) != GOMP_MAP_ATTACH)
 	cur_node.host_end = cur_node.host_start + sizes[i];
       else
 	cur_node.host_end = cur_node.host_start + sizeof (void *);
@@ -1281,7 +1285,9 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
 		      tgt->list[i].length = n->host_end - n->host_start;
 		      tgt->list[i].copy_from = false;
 		      tgt->list[i].always_copy_from = false;
-		      tgt->list[i].do_detach = true;
+		      tgt->list[i].do_detach
+		        = (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
+		      n->refcount++;
 		    }
 		  else
 		    {
@@ -1622,6 +1628,8 @@ gomp_unmap_tgt (struct target_mem_desc *tgt)
   if (tgt->tgt_end)
     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
 
+  gomp_acc_data_env_remove_tgt (&tgt->device_descr->openacc.data_environ, tgt);
+
   free (tgt->array);
   free (tgt);
 }
@@ -1650,17 +1658,18 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
    has been done already.  */
 
 attribute_hidden void
-gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, bool finalize)
+gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 {
-  gomp_unmap_vars_async (tgt, do_copyfrom, NULL, finalize);
+  gomp_unmap_vars_async (tgt, do_copyfrom, NULL);
 }
 
 attribute_hidden void
 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
-		       struct goacc_asyncqueue *aq, bool finalize)
+		       struct goacc_asyncqueue *aq)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
 
+
   if (tgt->list_count == 0)
     {
       free (tgt);
@@ -1685,15 +1694,15 @@ gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
 
       if (k != NULL && tgt->list[i].do_detach)
 	gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
-					     + tgt->list[i].offset, finalize,
-			     NULL);
+					     + tgt->list[i].offset,
+			     k->refcount == 1, NULL);
     }
 
   for (i = 0; i < tgt->list_count; i++)
     {
       splay_tree_key k = tgt->list[i].key;
 
-      if (k == NULL || tgt->list[i].do_detach)
+      if (k == NULL)
 	continue;
 
       bool do_unmap = false;
@@ -2314,7 +2323,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
 		     GOMP_MAP_VARS_TARGET);
   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
 		     NULL);
-  gomp_unmap_vars (tgt_vars, true, false);
+  gomp_unmap_vars (tgt_vars, true);
 }
 
 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
@@ -2458,7 +2467,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
 		     args);
   if (tgt_vars)
-    gomp_unmap_vars (tgt_vars, true, false);
+    gomp_unmap_vars (tgt_vars, true);
 }
 
 /* Host fallback for GOMP_target_data{,_ext} routines.  */
@@ -2527,7 +2536,7 @@ GOMP_target_end_data (void)
     {
       struct target_mem_desc *tgt = icv->target_data;
       icv->target_data = tgt->prev;
-      gomp_unmap_vars (tgt, true, false);
+      gomp_unmap_vars (tgt, true);
     }
 }
 
@@ -2762,7 +2771,7 @@ gomp_target_task_fn (void *data)
       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
 	{
 	  if (ttask->tgt)
-	    gomp_unmap_vars (ttask->tgt, true, false);
+	    gomp_unmap_vars (ttask->tgt, true);
 	  return false;
 	}
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
index 6a52f74..6bdcfe7 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-2.c
@@ -182,13 +182,13 @@ main (int argc, char **argv)
         exit (EXIT_FAILURE);
     }
 
+    acc_delete (&h_X[0], N * sizeof (float));
+    acc_delete (&h_Y1[0], N * sizeof (float));
+
     free (h_X);
     free (h_Y1);
     free (h_Y2);
 
-    acc_free (d_X);
-    acc_free (d_Y);
-
     context_check (pctx);
 
     s = cublasDestroy (h);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
index 71365e8..b403a5c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/context-4.c
@@ -176,13 +176,13 @@ main (int argc, char **argv)
         exit (EXIT_FAILURE);
     }
 
+    acc_delete (&h_X[0], N * sizeof (float));
+    acc_delete (&h_Y1[0], N * sizeof (float));
+
     free (h_X);
     free (h_Y1);
     free (h_Y2);
 
-    acc_free (d_X);
-    acc_free (d_Y);
-
     context_check (pctx);
 
     s = cublasDestroy (h);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c
new file mode 100644
index 0000000..81c1c5e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-6.c
@@ -0,0 +1,59 @@
+/* { dg-do run { target { ! openacc_host_selected } } } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+struct dc
+{
+  int a;
+  int **b;
+};
+
+int
+main ()
+{
+  int n = 100, i, j, k;
+  struct dc v = { .a = 3 };
+
+  v.b = (int **) malloc (sizeof (int *) * n);
+  for (i = 0; i < n; i++)
+    v.b[i] = (int *) malloc (sizeof (int) * n);
+
+  for (k = 0; k < 16; k++)
+    {
+#pragma acc data copy(v)
+      {
+#pragma acc data copy(v.b[:n])
+	{
+	  for (i = 0; i < n; i++)
+	    {
+	      acc_copyin (v.b[i], sizeof (int) * n);
+	      acc_attach ((void **) &v.b[i]);
+	    }
+
+#pragma acc parallel loop
+	  for (i = 0; i < n; i++)
+	    for (j = 0; j < n; j++)
+	      v.b[i][j] = v.a + i + j;
+
+	  for (i = 0; i < n; i++)
+	    {
+	      acc_detach ((void **) &v.b[i]);
+	      acc_copyout (v.b[i], sizeof (int) * n);
+	    }
+	}
+      }
+
+      for (i = 0; i < n; i++)
+	for (j = 0; j < n; j++)
+	  assert (v.b[i][j] == v.a + i + j);
+
+      assert (!acc_is_present (&v, sizeof (v)));
+      assert (!acc_is_present (v.b, sizeof (int *) * n));
+      for (i = 0; i < n; i++)
+        assert (!acc_is_present (v.b[i], sizeof (int) * n));
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
new file mode 100644
index 0000000..3a970a0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target { ! openacc_host_selected } } } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+struct dc
+{
+  int a;
+  int *b;
+};
+
+int
+main ()
+{
+  int n = 100, i, j, k;
+  struct dc v = { .a = 3 };
+
+  v.b = (int *) malloc (sizeof (int) * n);
+
+  for (k = 0; k < 16; k++)
+    {
+#pragma acc enter data copyin(v.a, v.b[0:n])
+
+#pragma acc enter data pcopyin(v.b[0:n])
+
+#pragma acc parallel loop attach(v.b)
+      for (i = 0; i < n; i++)
+	v.b[i] = v.a + i;
+
+#pragma acc exit data copyout(v.b[:n])
+#pragma acc exit data delete(v) finalize
+
+      for (i = 0; i < n; i++)
+	assert (v.b[i] == v.a + i);
+
+      assert (!acc_is_present (&v, sizeof (v)));
+      assert (!acc_is_present (v.b, sizeof (int *) * n));
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
new file mode 100644
index 0000000..54f553b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c
@@ -0,0 +1,53 @@
+/* { dg-do run { target { ! openacc_host_selected } } } */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <openacc.h>
+
+struct dc
+{
+  int a;
+  int *b;
+  int *c;
+  int *d;
+};
+
+int
+main ()
+{
+  int n = 100, i, j, k;
+  struct dc v = { .a = 3 };
+
+  v.b = (int *) malloc (sizeof (int) * n);
+  v.c = (int *) malloc (sizeof (int) * n);
+  v.d = (int *) malloc (sizeof (int) * n);
+
+#pragma acc enter data copyin(v)
+
+  for (k = 0; k < 16; k++)
+    {
+#pragma acc enter data copyin(v.a, v.b[:n], v.c[:n], v.d[:n])
+
+#pragma acc parallel loop
+      for (i = 0; i < n; i++)
+	v.b[i] = v.a + i;
+
+#pragma acc exit data copyout(v.b[:n])
+#pragma acc exit data copyout(v.c[:n])
+#pragma acc exit data copyout(v.d[:n])
+
+      for (i = 0; i < n; i++)
+	assert (v.b[i] == v.a + i);
+
+      assert (acc_is_present (&v, sizeof (v)));
+      assert (!acc_is_present (v.b, sizeof (int *) * n));
+      assert (!acc_is_present (v.c, sizeof (int *) * n));
+      assert (!acc_is_present (v.d, sizeof (int *) * n));
+    }
+
+#pragma acc exit data copyout(v)
+
+  assert (!acc_is_present (&v, sizeof (v)));
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
index db80413..a58e465 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data-2.f90
@@ -92,10 +92,6 @@ program test
 
   if (acc_is_present (c) .eqv. .TRUE.) call abort
 
-  !$acc exit data delete (c(0:N))
-
-  if (acc_is_present (c) .eqv. .TRUE.) call abort
-
   do i = 1, N
     if (c(i) .ne. 3.0) call abort
   end do
@@ -113,11 +109,6 @@ program test
   if (acc_is_present (c) .eqv. .TRUE.) call abort
   if (acc_is_present (d) .eqv. .TRUE.) call abort
 
-  !$acc exit data delete (c(0:N), d(0:N))
-
-  if (acc_is_present (c) .eqv. .TRUE.) call abort
-  if (acc_is_present (d) .eqv. .TRUE.) call abort
-
   do i = 1, N
     if (c(i) .ne. 5.0) call abort
     if (d(i) .ne. 9.0) call abort
@@ -177,8 +168,8 @@ program test
 
   !$acc exit data delete (c(0:N), d(0:N))
 
-  !if (acc_is_present (c) .eqv. .TRUE.) call abort
-  !if (acc_is_present (d) .eqv. .TRUE.) call abort
+  if (acc_is_present (c) .eqv. .FALSE.) call abort
+  if (acc_is_present (d) .eqv. .FALSE.) call abort
 
   !$acc exit data delete (c(0:N), d(0:N))
 
@@ -190,12 +181,7 @@ program test
   if (acc_is_present (c) .eqv. .FALSE.) call abort
   if (acc_is_present (d) .eqv. .TRUE.) call abort
 
-  !$acc exit data delete (c(0:N), d(0:N))
-
-  if (acc_is_present (c) .eqv. .TRUE.) call abort
-  if (acc_is_present (d) .eqv. .TRUE.) call abort
-
-  !$acc exit data delete (c(0:N), d(0:N))
+  !$acc exit data delete (c(0:N))
 
   if (acc_is_present (c) .eqv. .TRUE.) call abort
   if (acc_is_present (d) .eqv. .TRUE.) call abort

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 3/6] [og8] OpenACC 2.6 manual deep copy support (attach/detach)
  2018-11-20 21:55 ` [PATCH 3/6] [og8] OpenACC 2.6 manual deep copy support (attach/detach) Julian Brown
@ 2018-11-22 15:48   ` Bernhard Reutner-Fischer
  0 siblings, 0 replies; 8+ messages in thread
From: Bernhard Reutner-Fischer @ 2018-11-22 15:48 UTC (permalink / raw)
  To: gcc-patches, Julian Brown; +Cc: cltang, Catherine_Moore, jakub

On 20 November 2018 22:54:49 CET, Julian Brown <julian@codesourcery.com> wrote:
>
>Previously posted upstream:
>https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00826.html

As said in https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00861.html

+	  bool array_only_p = true;
+	  /* Disallow duplicate bare variable references and multiple
+	     subarrays of the same array here, but allow multiple components of
+	     the same (e.g. derived-type) variable.  For the latter, duplicate
+	     components are detected elsewhere.  */
+	  if (openacc && n->expr && n->expr->expr_type == EXPR_VARIABLE)
+	    for (gfc_ref *ref = n->expr->ref; ref; ref = ref->next)
+	      if (ref->type != REF_ARRAY)
+		array_only_p = false;

Looks like you could break here when setting array_only_p to false.

+	  if (array_only_p)
+	    {
+	      if (n->sym->mark)
+		gfc_error ("Symbol %qs present on multiple clauses at %L",
+			   n->sym->name, &n->where);
+	      else
+		n->sym->mark = 1;
+	    }
 	}


+		  if (ptr && (region_type & ORT_ACC) != 0)
+		    {
+		      /* Turning a GOMP_MAP_ALWAYS_POINTER clause into a
+			 GOMP_MAP_ATTACH clause after we have detected a case
+			 that needs a GOMP_MAP_STRUCT mapping adding.

As said:

s/adding/added/ i think. 

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-11-22 15:48 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-20 21:55 [PATCH 0/6] [og8] OpenACC attach/detach Julian Brown
2018-11-20 21:55 ` [PATCH 1/6] [og8] Host-to-device transfer coalescing & magic offset value self-documentation Julian Brown
2018-11-20 21:55 ` [PATCH 2/6] [og8] Factor out duplicate code in gimplify_scan_omp_clauses Julian Brown
2018-11-20 21:55 ` [PATCH 3/6] [og8] OpenACC 2.6 manual deep copy support (attach/detach) Julian Brown
2018-11-22 15:48   ` Bernhard Reutner-Fischer
2018-11-20 21:55 ` [PATCH 4/6] [og8] Interaction of dynamic/multidimensional arrays with attach/detach Julian Brown
2018-11-20 21:56 ` [PATCH 5/6] [og8] Backport parts of upstream declare-allocate patch Julian Brown
2018-11-20 21:57 ` [PATCH 6/6] [og8] OpenACC refcounting refresh Julian Brown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).