public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [gomp4] kernels offload fns
@ 2015-12-29 14:19   ` Nathan Sidwell
  2016-01-07  0:11     ` Cesar Philippidis
  2016-07-27  8:59     ` Test cases to check OpenACC offloaded function's attributes and classification Thomas Schwinge
  0 siblings, 2 replies; 20+ messages in thread
From: Nathan Sidwell @ 2015-12-29 14:19 UTC (permalink / raw)
  To: GCC Patches; +Cc: Tom de Vries

[-- Attachment #1: Type: text/plain, Size: 574 bytes --]

In developing a non-unity default partition mechanism I discovered there was no 
mechanism to reliably determine whether an offload was for a kernels region or 
not.  The tree-ssa pass uses a heuristic that is sufficient for its  needs, but 
not very clear.

This patch adjusts set_oacc_fn_attrib to accept a 'kernels' parameter, which it 
encodes on the TREE_PUBLIC flag of the  attribute values.  I add an 
oacc_fn_attrib_kernels_p predicate and use it where needed.

(The defaulting mechanism needs to reliably determine kernels from parallel 
offload regions).

nathnan

[-- Attachment #2: gomp4-kern.patch --]
[-- Type: text/x-patch, Size: 9096 bytes --]

2015-12-29  Nathan Sidwell  <nathan@acm.org>

	* omp-low.c (set_oacc_fn_attrib): Add IS_KERNEL arg, encode on
	TREE_PUBLIC.
	(oacc_fn_attrib_kernels_p): New.
	(oacc_fn_attrib_level): New.
	(expand_omp_target): Pass kernels_p to set_oacc_fn_attrib.
	(oacc_validate_dims): Add LEVEL arg, don't return it.
	(new_oacc_loop_routine): Use oacc_fn_attrib_level, not
	oacc_validate_dims.
	(execute_oacc_device_lower): Use oacc_fn_attrib_level, validate
	dimensions after discovering loops.  Add more dump info.
	* omp-low.h (set_oacc_fn_attrib): Add IS_KERNEL arg.
	(oacc_fn_attrib_kernels_p): Declare.
	* tree-parloops.c (create_parallel_loop): Adjust
	set_oacc_fn_attrib call.
	* tree-ssa-loop.c (gate_oacc_kernels): Use oacc_fn_attrib_kernels_p.

Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 231992)
+++ gcc/omp-low.c	(working copy)
@@ -12625,10 +12625,11 @@ replace_oacc_fn_attrib (tree fn, tree di
 
 /* Scan CLAUSES for launch dimensions and attach them to the oacc
    function attribute.  Push any that are non-constant onto the ARGS
-   list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
+   list, along with an appropriate GOMP_LAUNCH_DIM tag.  IS_KERNEL is
+   true, if these are for a kernels region offload function.  */
 
 void
-set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
+set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
 {
   /* Must match GOMP_DIM ordering.  */
   static const omp_clause_code ids[]
@@ -12653,6 +12654,9 @@ set_oacc_fn_attrib (tree fn, tree clause
 	  non_const |= GOMP_DIM_MASK (ix);
 	}
       attr = tree_cons (NULL_TREE, dim, attr);
+      /* Note kernelness with TREE_PUBLIC.  */
+      if (is_kernel)
+	TREE_PUBLIC (attr) = 1;
     }
 
   replace_oacc_fn_attrib (fn, attr);
@@ -12721,6 +12725,36 @@ get_oacc_fn_attrib (tree fn)
   return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
 }
 
+/* Return true if this oacc fn attrib is for a kernels offload
+   region.  We use the TREE_PUBLIC flag of each dimension -- only
+   need to check the first one.  */
+
+bool
+oacc_fn_attrib_kernels_p (tree attr)
+{
+  return TREE_PUBLIC (TREE_VALUE (attr));
+}
+
+/* Return level at which oacc routine may spawn a partitioned loop, or
+   -1 if it is not a routine (i.e. is an offload fn).  */
+
+int
+oacc_fn_attrib_level (tree attr)
+{
+  tree pos = TREE_VALUE (attr);
+
+  if (!TREE_PURPOSE (pos))
+    return -1;
+  
+  int ix = 0;
+  for (ix = 0; ix != GOMP_DIM_MAX;
+       ix++, pos = TREE_CHAIN (pos))
+    if (!integer_zerop (TREE_PURPOSE (pos)))
+      break;
+
+  return ix;
+}
+
 /* Extract an oacc execution dimension from FN.  FN must be an
    offloaded function or routine that has already had its execution
    dimensions lowered to the target-specific values.  */
@@ -13045,6 +13079,7 @@ expand_omp_target (struct omp_region *re
   enum built_in_function start_ix;
   location_t clause_loc;
   unsigned int flags_i = 0;
+  bool oacc_kernels_p = false;
 
   switch (gimple_omp_target_kind (entry_stmt))
     {
@@ -13064,8 +13099,10 @@ expand_omp_target (struct omp_region *re
       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
       break;
-    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
     case GF_OMP_TARGET_KIND_OACC_KERNELS:
+      oacc_kernels_p = true;
+      /* FALLTHROUGH */
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
       start_ix = BUILT_IN_GOACC_PARALLEL;
       break;
     case GF_OMP_TARGET_KIND_OACC_DATA:
@@ -13247,7 +13284,7 @@ expand_omp_target (struct omp_region *re
       break;
     case BUILT_IN_GOACC_PARALLEL:
       {
-	set_oacc_fn_attrib (child_fn, clauses, &args);
+	set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
 	tagging = true;
       }
       /* FALLTHRU */
@@ -19259,17 +19296,17 @@ oacc_xform_loop (gcall *call)
 }
 
 /* Validate and update the dimensions for offloaded FN.  ATTRS is the
-   raw attribute.  DIMS is an array of dimensions, which is returned.
-   Returns the function level dimensionality --  the level at which an
-   offload routine wishes to partition a loop.  */
+   raw attribute.  DIMS is an array of dimensions, which is filled in.
+   LEVEL is the partitioning level of a routine, or -1 for an offload
+   region itself.  */
 
-static int
-oacc_validate_dims (tree fn, tree attrs, int *dims)
+static void
+oacc_validate_dims (tree fn, tree attrs, int *dims, int level)
 {
   tree purpose[GOMP_DIM_MAX];
   unsigned ix;
   tree pos = TREE_VALUE (attrs);
-  int fn_level = -1;
+  bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
 
   /* Make sure the attribute creator attached the dimension
      information.  */
@@ -19278,21 +19315,12 @@ oacc_validate_dims (tree fn, tree attrs,
   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
     {
       purpose[ix] = TREE_PURPOSE (pos);
-
-      if (purpose[ix])
-	{
-	  if (integer_zerop (purpose[ix]))
-	    fn_level = ix + 1;
-	  else if (fn_level < 0)
-	    fn_level = ix;
-	}
-
       tree val = TREE_VALUE (pos);
       dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
       pos = TREE_CHAIN (pos);
     }
 
-  bool changed = targetm.goacc.validate_dims (fn, dims, fn_level);
+  bool changed = targetm.goacc.validate_dims (fn, dims, level);
 
   /* Default anything left to 1.  */
   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
@@ -19307,13 +19335,15 @@ oacc_validate_dims (tree fn, tree attrs,
       /* Replace the attribute with new values.  */
       pos = NULL_TREE;
       for (ix = GOMP_DIM_MAX; ix--;)
-	pos = tree_cons (purpose[ix],
-			 build_int_cst (integer_type_node, dims[ix]),
-			 pos);
+	{
+	  pos = tree_cons (purpose[ix],
+			   build_int_cst (integer_type_node, dims[ix]),
+			   pos);
+	  if (is_kernel)
+	    TREE_PUBLIC (pos) = 1;
+	}
       replace_oacc_fn_attrib (fn, pos);
     }
-
-  return fn_level;
 }
 
 /* Create an empty OpenACC loop structure at LOC.  */
@@ -19385,7 +19415,7 @@ new_oacc_loop_routine (oacc_loop *parent
 {
   oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
   int dims[GOMP_DIM_MAX];
-  int level = oacc_validate_dims (decl, attrs, dims);
+  int level = oacc_fn_attrib_level (attrs);
 
   gcc_assert (level >= 0);
 
@@ -20015,13 +20045,30 @@ execute_oacc_device_lower ()
       return TODO_discard_function;
     }
 
-  int dims[GOMP_DIM_MAX];
-  int fn_level = oacc_validate_dims (current_function_decl, attr, dims);
-
   /* Discover, partition and process the loops.  */
   oacc_loop *loops = oacc_loop_discovery ();
+  int fn_level = oacc_fn_attrib_level (attr);
+
+  if (dump_file)
+    fprintf (dump_file, oacc_fn_attrib_kernels_p (attr)
+	     ? "Function is kernels offload\n"
+	     : fn_level < 0 ? "Function is parallel offload\n"
+	     : "Function is routine level %d\n", fn_level);
+
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   oacc_loop_partition (loops, outer_mask);
+
+  int dims[GOMP_DIM_MAX];
+  oacc_validate_dims (current_function_decl, attr, dims, fn_level);
+
+  if (dump_file)
+    {
+      const char *comma = "Compute dimensions [";
+      for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
+	fprintf (dump_file, "%s%d", comma, dims[ix]);
+      fprintf (dump_file, "]\n");
+    }
+
   oacc_loop_process (loops);
   if (dump_file)
     {
Index: gcc/omp-low.h
===================================================================
--- gcc/omp-low.h	(revision 231992)
+++ gcc/omp-low.h	(working copy)
@@ -33,7 +33,8 @@ extern tree omp_member_access_dummy_var
 extern void replace_oacc_fn_attrib (tree, tree);
 extern tree build_oacc_routine_dims (tree);
 extern tree get_oacc_fn_attrib (tree);
-extern void set_oacc_fn_attrib (tree, tree, vec<tree> *);
+extern bool oacc_fn_attrib_kernels_p (tree);
+extern void set_oacc_fn_attrib (tree, tree, bool, vec<tree> *);
 extern int get_oacc_ifn_dim_arg (const gimple *);
 extern int get_oacc_fn_dim_size (tree, int);
 
Index: gcc/tree-parloops.c
===================================================================
--- gcc/tree-parloops.c	(revision 231992)
+++ gcc/tree-parloops.c	(working copy)
@@ -2054,7 +2054,7 @@ create_parallel_loop (struct loop *loop,
       tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
       OMP_CLAUSE_NUM_GANGS_EXPR (clause)
 	= build_int_cst (integer_type_node, n_threads);
-      set_oacc_fn_attrib (cfun->decl, clause, NULL);
+      set_oacc_fn_attrib (cfun->decl, clause, true, NULL);
     }
 
   /* Initialize NEW_DATA.  */
Index: gcc/tree-ssa-loop.c
===================================================================
--- gcc/tree-ssa-loop.c	(revision 231992)
+++ gcc/tree-ssa-loop.c	(working copy)
@@ -154,12 +154,7 @@ gate_oacc_kernels (function *fn)
   tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
   if (oacc_function_attr == NULL_TREE)
     return false;
-
-  tree val = TREE_VALUE (oacc_function_attr);
-  while (val != NULL_TREE && TREE_VALUE (val) == NULL_TREE)
-    val = TREE_CHAIN (val);
-
-  if (val != NULL_TREE)
+  if (!oacc_fn_attrib_kernels_p (oacc_function_attr))
     return false;
 
   struct loop *loop;

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Mark oacc kernels fns
@ 2016-01-04 15:39 Nathan Sidwell
  2016-01-25 15:06 ` Nathan Sidwell
  0 siblings, 1 reply; 20+ messages in thread
From: Nathan Sidwell @ 2016-01-04 15:39 UTC (permalink / raw)
  To: GCC Patches, Jakub Jelinek; +Cc: Tom de Vries

[-- Attachment #1: Type: text/plain, Size: 686 bytes --]

There's currently no robust predicate to determine whether an oacc offload 
function is for a kernels region (as opposed to a parallel region).  The test in 
tree-ssa-loop.c uses the heuristic of seeing if all the dimensions are defaulted 
  (which can easily be true for parallel offloads at that point).

This patch marks TREE_PUBLIC on the offload attribute values, to note kernels 
regions,  and adds a predicate to check that.  I also broke out the function 
level determination from oacc_validate_dims, as there it was only laziness on my 
part to have not done that earlier.

Using these predicates improves the dump output of the openacc device lowering 
pass too.

ok?

nathan

[-- Attachment #2: trunk-kernel.patch --]
[-- Type: text/x-patch, Size: 8400 bytes --]

2016-01-04  Nathan Sidwell  <nathan@codesourcery.com>

	* omp-low.h (oacc_fn_attrib_kernels_p): Declare.
	* omp-low.c (set_oacc_fn_attrib): Add IS_KERNEL arg.
	(oacc_fn_attrib_kernels_p, oacc_fn_attrib_level): New.
	(expand_omp_target): Pass is_kernel to set_oacc_fn_attrib.
	(oacc_validate_dims): Add LEVEL arg, don't return level.
	(new_oacc_loop_routine): Use oacc_fn_attrib_level, not
	oacc_validate_dims.
	(execute_oacc_device_lower): Adjust, add more dump output.
	* tree-ssa-loop.c (gate_oacc_kernels): Use oacc_fn_attrib_kernels_p.

Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 232057)
+++ gcc/omp-low.c	(working copy)
@@ -12395,10 +12395,11 @@ replace_oacc_fn_attrib (tree fn, tree di
 
 /* Scan CLAUSES for launch dimensions and attach them to the oacc
    function attribute.  Push any that are non-constant onto the ARGS
-   list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
+   list, along with an appropriate GOMP_LAUNCH_DIM tag.  IS_KERNEL is
+   true, if these are for a kernels region offload function.  */
 
 static void
-set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
+set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
 {
   /* Must match GOMP_DIM ordering.  */
   static const omp_clause_code ids[]
@@ -12423,6 +12424,9 @@ set_oacc_fn_attrib (tree fn, tree clause
 	  non_const |= GOMP_DIM_MASK (ix);
 	}
       attr = tree_cons (NULL_TREE, dim, attr);
+      /* Note kernelness with TREE_PUBLIC.  */
+      if (is_kernel)
+	TREE_PUBLIC (attr) = 1;
     }
 
   replace_oacc_fn_attrib (fn, attr);
@@ -12491,6 +12495,36 @@ get_oacc_fn_attrib (tree fn)
   return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
 }
 
+/* Return true if this oacc fn attrib is for a kernels offload
+   region.  We use the TREE_PUBLIC flag of each dimension -- only
+   need to check the first one.  */
+
+bool
+oacc_fn_attrib_kernels_p (tree attr)
+{
+  return TREE_PUBLIC (TREE_VALUE (attr));
+}
+
+/* Return level at which oacc routine may spawn a partitioned loop, or
+   -1 if it is not a routine (i.e. is an offload fn).  */
+
+static int
+oacc_fn_attrib_level (tree attr)
+{
+  tree pos = TREE_VALUE (attr);
+
+  if (!TREE_PURPOSE (pos))
+    return -1;
+  
+  int ix = 0;
+  for (ix = 0; ix != GOMP_DIM_MAX;
+       ix++, pos = TREE_CHAIN (pos))
+    if (!integer_zerop (TREE_PURPOSE (pos)))
+      break;
+
+  return ix;
+}
+
 /* Extract an oacc execution dimension from FN.  FN must be an
    offloaded function or routine that has already had its execution
    dimensions lowered to the target-specific values.  */
@@ -12808,6 +12842,7 @@ expand_omp_target (struct omp_region *re
   enum built_in_function start_ix;
   location_t clause_loc;
   unsigned int flags_i = 0;
+  bool oacc_kernels_p = false;
 
   switch (gimple_omp_target_kind (entry_stmt))
     {
@@ -12827,8 +12862,10 @@ expand_omp_target (struct omp_region *re
       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
       break;
-    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
     case GF_OMP_TARGET_KIND_OACC_KERNELS:
+      oacc_kernels_p = true;
+      /* FALLTHROUGH */
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
       start_ix = BUILT_IN_GOACC_PARALLEL;
       break;
     case GF_OMP_TARGET_KIND_OACC_DATA:
@@ -13010,7 +13047,7 @@ expand_omp_target (struct omp_region *re
       break;
     case BUILT_IN_GOACC_PARALLEL:
       {
-	set_oacc_fn_attrib (child_fn, clauses, &args);
+	set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
 	tagging = true;
       }
       /* FALLTHRU */
@@ -18929,17 +18966,17 @@ oacc_xform_loop (gcall *call)
 }
 
 /* Validate and update the dimensions for offloaded FN.  ATTRS is the
-   raw attribute.  DIMS is an array of dimensions, which is returned.
-   Returns the function level dimensionality --  the level at which an
-   offload routine wishes to partition a loop.  */
+   raw attribute.  DIMS is an array of dimensions, which is filled in.
+   LEVEL is the partitioning level of a routine, or -1 for an offload
+   region itself.  */
 
-static int
-oacc_validate_dims (tree fn, tree attrs, int *dims)
+static void
+oacc_validate_dims (tree fn, tree attrs, int *dims, int level)
 {
   tree purpose[GOMP_DIM_MAX];
   unsigned ix;
   tree pos = TREE_VALUE (attrs);
-  int fn_level = -1;
+  bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
 
   /* Make sure the attribute creator attached the dimension
      information.  */
@@ -18948,21 +18985,12 @@ oacc_validate_dims (tree fn, tree attrs,
   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
     {
       purpose[ix] = TREE_PURPOSE (pos);
-
-      if (purpose[ix])
-	{
-	  if (integer_zerop (purpose[ix]))
-	    fn_level = ix + 1;
-	  else if (fn_level < 0)
-	    fn_level = ix;
-	}
-
       tree val = TREE_VALUE (pos);
       dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
       pos = TREE_CHAIN (pos);
     }
 
-  bool changed = targetm.goacc.validate_dims (fn, dims, fn_level);
+  bool changed = targetm.goacc.validate_dims (fn, dims, level);
 
   /* Default anything left to 1.  */
   for (ix = 0; ix != GOMP_DIM_MAX; ix++)
@@ -18977,13 +19005,15 @@ oacc_validate_dims (tree fn, tree attrs,
       /* Replace the attribute with new values.  */
       pos = NULL_TREE;
       for (ix = GOMP_DIM_MAX; ix--;)
-	pos = tree_cons (purpose[ix],
-			 build_int_cst (integer_type_node, dims[ix]),
-			 pos);
+	{
+	  pos = tree_cons (purpose[ix],
+			   build_int_cst (integer_type_node, dims[ix]),
+			   pos);
+	  if (is_kernel)
+	    TREE_PUBLIC (pos) = 1;
+	}
       replace_oacc_fn_attrib (fn, pos);
     }
-
-  return fn_level;
 }
 
 /* Create an empty OpenACC loop structure at LOC.  */
@@ -19054,8 +19084,7 @@ static void
 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
 {
   oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
-  int dims[GOMP_DIM_MAX];
-  int level = oacc_validate_dims (decl, attrs, dims);
+  int level = oacc_fn_attrib_level (attrs);
 
   gcc_assert (level >= 0);
 
@@ -19651,18 +19680,35 @@ static unsigned int
 execute_oacc_device_lower ()
 {
   tree attrs = get_oacc_fn_attrib (current_function_decl);
-  int dims[GOMP_DIM_MAX];
   
   if (!attrs)
     /* Not an offloaded function.  */
     return 0;
 
-  int fn_level = oacc_validate_dims (current_function_decl, attrs, dims);
-
   /* Discover, partition and process the loops.  */
   oacc_loop *loops = oacc_loop_discovery ();
+  int fn_level = oacc_fn_attrib_level (attrs);
+
+  if (dump_file)
+    fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
+	     ? "Function is kernels offload\n"
+	     : fn_level < 0 ? "Function is parallel offload\n"
+	     : "Function is routine level %d\n", fn_level);
+
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   oacc_loop_partition (loops, outer_mask);
+
+  int dims[GOMP_DIM_MAX];
+  oacc_validate_dims (current_function_decl, attrs, dims, fn_level);
+
+  if (dump_file)
+    {
+      const char *comma = "Compute dimensions [";
+      for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
+	fprintf (dump_file, "%s%d", comma, dims[ix]);
+      fprintf (dump_file, "]\n");
+    }
+
   oacc_loop_process (loops);
   if (dump_file)
     {
Index: gcc/omp-low.h
===================================================================
--- gcc/omp-low.h	(revision 232057)
+++ gcc/omp-low.h	(working copy)
@@ -33,6 +33,7 @@ extern tree omp_member_access_dummy_var
 extern void replace_oacc_fn_attrib (tree, tree);
 extern tree build_oacc_routine_dims (tree);
 extern tree get_oacc_fn_attrib (tree);
+extern bool oacc_fn_attrib_kernels_p (tree);
 extern int get_oacc_ifn_dim_arg (const gimple *);
 extern int get_oacc_fn_dim_size (tree, int);
 
Index: gcc/tree-ssa-loop.c
===================================================================
--- gcc/tree-ssa-loop.c	(revision 232057)
+++ gcc/tree-ssa-loop.c	(working copy)
@@ -154,12 +154,7 @@ gate_oacc_kernels (function *fn)
   tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
   if (oacc_function_attr == NULL_TREE)
     return false;
-
-  tree val = TREE_VALUE (oacc_function_attr);
-  while (val != NULL_TREE && TREE_VALUE (val) == NULL_TREE)
-    val = TREE_CHAIN (val);
-
-  if (val != NULL_TREE)
+  if (!oacc_fn_attrib_kernels_p (oacc_function_attr))
     return false;
 
   struct loop *loop;

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [gomp4] kernels offload fns
  2015-12-29 14:19   ` [gomp4] kernels offload fns Nathan Sidwell
@ 2016-01-07  0:11     ` Cesar Philippidis
  2016-01-07 13:35       ` Nathan Sidwell
  2016-07-27  8:59     ` Test cases to check OpenACC offloaded function's attributes and classification Thomas Schwinge
  1 sibling, 1 reply; 20+ messages in thread
From: Cesar Philippidis @ 2016-01-07  0:11 UTC (permalink / raw)
  To: Nathan Sidwell, GCC Patches; +Cc: Tom de Vries

[-- Attachment #1: Type: text/plain, Size: 442 bytes --]

On 12/29/2015 06:19 AM, Nathan Sidwell wrote:

> @@ -19385,7 +19415,7 @@ new_oacc_loop_routine (oacc_loop *parent
>  {
>    oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
>    int dims[GOMP_DIM_MAX];
> -  int level = oacc_validate_dims (decl, attrs, dims);
> +  int level = oacc_fn_attrib_level (attrs);

dims is dead and that's causing a bootstrap failure. I've applied this
patch to gomp-4_0-branch to fix it.

Cesar

[-- Attachment #2: routine-dims-bootstrap.diff --]
[-- Type: text/x-patch, Size: 537 bytes --]

2016-01-06  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* omp-low.c (new_oacc_loop_routine): Remove stale dims variable.


diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e11cefc..55b5da3 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -19454,7 +19454,6 @@ static void
 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
 {
   oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
-  int dims[GOMP_DIM_MAX];
   int level = oacc_fn_attrib_level (attrs);
 
   gcc_assert (level >= 0);

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [gomp4] kernels offload fns
  2016-01-07  0:11     ` Cesar Philippidis
@ 2016-01-07 13:35       ` Nathan Sidwell
  0 siblings, 0 replies; 20+ messages in thread
From: Nathan Sidwell @ 2016-01-07 13:35 UTC (permalink / raw)
  To: Cesar Philippidis, GCC Patches; +Cc: Tom de Vries

On 01/06/16 19:11, Cesar Philippidis wrote:
> On 12/29/2015 06:19 AM, Nathan Sidwell wrote:
>
>> @@ -19385,7 +19415,7 @@ new_oacc_loop_routine (oacc_loop *parent
>>   {
>>     oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
>>     int dims[GOMP_DIM_MAX];
>> -  int level = oacc_validate_dims (decl, attrs, dims);
>> +  int level = oacc_fn_attrib_level (attrs);
>
> dims is dead and that's causing a bootstrap failure. I've applied this
> patch to gomp-4_0-branch to fix it.

thanks

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Mark oacc kernels fns
  2016-01-04 15:39 Mark oacc kernels fns Nathan Sidwell
@ 2016-01-25 15:06 ` Nathan Sidwell
  2015-12-29 14:19   ` [gomp4] kernels offload fns Nathan Sidwell
  2016-01-25 15:09   ` Mark oacc kernels fns Jakub Jelinek
  0 siblings, 2 replies; 20+ messages in thread
From: Nathan Sidwell @ 2016-01-25 15:06 UTC (permalink / raw)
  To: GCC Patches, Jakub Jelinek; +Cc: Tom de Vries

On 01/04/16 10:39, Nathan Sidwell wrote:
> There's currently no robust predicate to determine whether an oacc offload
> function is for a kernels region (as opposed to a parallel region).  The test in
> tree-ssa-loop.c uses the heuristic of seeing if all the dimensions are defaulted
>   (which can easily be true for parallel offloads at that point).
>
> This patch marks TREE_PUBLIC on the offload attribute values, to note kernels
> regions,  and adds a predicate to check that.  I also broke out the function
> level determination from oacc_validate_dims, as there it was only laziness on my
> part to have not done that earlier.
>
> Using these predicates improves the dump output of the openacc device lowering
> pass too.
>
> ok?

https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00092.html
ping?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Mark oacc kernels fns
  2016-01-25 15:06 ` Nathan Sidwell
  2015-12-29 14:19   ` [gomp4] kernels offload fns Nathan Sidwell
@ 2016-01-25 15:09   ` Jakub Jelinek
  2016-07-27 10:07     ` Use "oacc kernels" attribute for OpenACC kernels (was: Mark oacc kernels fns) Thomas Schwinge
  1 sibling, 1 reply; 20+ messages in thread
From: Jakub Jelinek @ 2016-01-25 15:09 UTC (permalink / raw)
  To: Nathan Sidwell; +Cc: GCC Patches, Tom de Vries

On Mon, Jan 25, 2016 at 10:06:50AM -0500, Nathan Sidwell wrote:
> On 01/04/16 10:39, Nathan Sidwell wrote:
> >There's currently no robust predicate to determine whether an oacc offload
> >function is for a kernels region (as opposed to a parallel region).  The test in
> >tree-ssa-loop.c uses the heuristic of seeing if all the dimensions are defaulted
> >  (which can easily be true for parallel offloads at that point).
> >
> >This patch marks TREE_PUBLIC on the offload attribute values, to note kernels
> >regions,  and adds a predicate to check that.  I also broke out the function
> >level determination from oacc_validate_dims, as there it was only laziness on my
> >part to have not done that earlier.
> >
> >Using these predicates improves the dump output of the openacc device lowering
> >pass too.
> >
> >ok?
> 
> https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00092.html
> ping?

Ok, thanks.

	Jakub

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Test cases to check OpenACC offloaded function's attributes and classification
@ 2016-07-27  8:59     ` Thomas Schwinge
  2016-08-04 14:06       ` Thomas Schwinge
  0 siblings, 1 reply; 20+ messages in thread
From: Thomas Schwinge @ 2016-07-27  8:59 UTC (permalink / raw)
  To: gcc-patches

Hi!

OK for trunk?

commit 8200af082db5438be18bc60f721fcf21641c0d86
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Tue Jul 26 17:18:21 2016 +0200

    Test cases to check OpenACC offloaded function's attributes and classification
    
    	gcc/testsuite/
    	* c-c++-common/goacc/oaccdevlow-kernels.c: New file.
    	* c-c++-common/goacc/oaccdevlow-parallel.c: Likewise.
    	* c-c++-common/goacc/oaccdevlow-routine.c: Likewise.
    	* gfortran.dg/goacc/oaccdevlow-kernels.f95: Likewise.
    	* gfortran.dg/goacc/oaccdevlow-parallel.f95: Likewise.
    	* gfortran.dg/goacc/oaccdevlow-routine.f95: Likewise.
---
 .../c-c++-common/goacc/oaccdevlow-kernels.c        | 34 ++++++++++++++++++++
 .../c-c++-common/goacc/oaccdevlow-parallel.c       | 27 ++++++++++++++++
 .../c-c++-common/goacc/oaccdevlow-routine.c        | 29 +++++++++++++++++
 .../gfortran.dg/goacc/oaccdevlow-kernels.f95       | 36 ++++++++++++++++++++++
 .../gfortran.dg/goacc/oaccdevlow-parallel.f95      | 29 +++++++++++++++++
 .../gfortran.dg/goacc/oaccdevlow-routine.f95       | 28 +++++++++++++++++
 6 files changed, 183 insertions(+)

diff --git gcc/testsuite/c-c++-common/goacc/oaccdevlow-kernels.c gcc/testsuite/c-c++-common/goacc/oaccdevlow-kernels.c
new file mode 100644
index 0000000..14d650a
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/oaccdevlow-kernels.c
@@ -0,0 +1,34 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   kernels.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-parloops1-all" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N (1024 * 512)
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels loop is analyzed, and that it can be
+   parallelized.
+   { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } }
+   { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be [1, 1, 1] for target compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/oaccdevlow-parallel.c gcc/testsuite/c-c++-common/goacc/oaccdevlow-parallel.c
new file mode 100644
index 0000000..63c372a
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/oaccdevlow-parallel.c
@@ -0,0 +1,27 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   parallel.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N (1024 * 512)
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void PARALLEL ()
+{
+#pragma acc parallel loop copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be [1, 1, 1] for target compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/oaccdevlow-routine.c gcc/testsuite/c-c++-common/goacc/oaccdevlow-routine.c
new file mode 100644
index 0000000..fa2eae7
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/oaccdevlow-routine.c
@@ -0,0 +1,29 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   routine.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N (1024 * 512)
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+#pragma acc declare copyin (a, b) create (c)
+
+#pragma acc routine worker
+void ROUTINE ()
+{
+#pragma acc loop
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be [1, 1, 1] for target compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/oaccdevlow-kernels.f95 gcc/testsuite/gfortran.dg/goacc/oaccdevlow-kernels.f95
new file mode 100644
index 0000000..8ee641e
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/oaccdevlow-kernels.f95
@@ -0,0 +1,36 @@
+! Check offloaded function's attributes and classification for OpenACC
+! kernels.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter         :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer                    :: i
+
+  call setup(a, b)
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+  !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels loop is analyzed, and that it can be
+! parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be [1, 1, 1] for target compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/oaccdevlow-parallel.f95 gcc/testsuite/gfortran.dg/goacc/oaccdevlow-parallel.f95
new file mode 100644
index 0000000..0975eb8
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/oaccdevlow-parallel.f95
@@ -0,0 +1,29 @@
+! Check offloaded function's attributes and classification for OpenACC
+! parallel.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter         :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer                    :: i
+
+  call setup(a, b)
+
+  !$acc parallel loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+  !$acc end parallel loop
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be [1, 1, 1] for target compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/oaccdevlow-routine.f95 gcc/testsuite/gfortran.dg/goacc/oaccdevlow-routine.f95
new file mode 100644
index 0000000..a68b5eb
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/oaccdevlow-routine.f95
@@ -0,0 +1,28 @@
+! Check offloaded function's attributes and classification for OpenACC
+! routine.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+subroutine ROUTINE
+  !$acc routine worker
+  integer, parameter         :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer                    :: i
+
+  call setup(a, b)
+
+  !$acc loop
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+end subroutine ROUTINE
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 0, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be [1, 1, 1] for target compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Use "oacc kernels" attribute for OpenACC kernels (was: Mark oacc kernels fns)
  2016-01-25 15:09   ` Mark oacc kernels fns Jakub Jelinek
@ 2016-07-27 10:07     ` Thomas Schwinge
  2016-08-04 14:07       ` Use "oacc kernels" attribute for OpenACC kernels Thomas Schwinge
  0 siblings, 1 reply; 20+ messages in thread
From: Thomas Schwinge @ 2016-07-27 10:07 UTC (permalink / raw)
  To: GCC Patches, Nathan Sidwell; +Cc: Tom de Vries, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 9801 bytes --]

Hi!

On Mon, 25 Jan 2016 16:09:14 +0100, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Jan 25, 2016 at 10:06:50AM -0500, Nathan Sidwell wrote:
> > On 01/04/16 10:39, Nathan Sidwell wrote:
> > >There's currently no robust predicate to determine whether an oacc offload
> > >function is for a kernels region (as opposed to a parallel region).
> > >[...]
> > >
> > >This patch marks TREE_PUBLIC on the offload attribute values, to note kernels
> > >regions,  and adds a predicate to check that.  [...]
> > >
> > >Using these predicates improves the dump output of the openacc device lowering
> > >pass too.

I just submitted a patch adding "Test cases to check OpenACC offloaded
function's attributes and classification",
<http://news.gmane.org/find-root.php?message_id=%3C87zip3jw2x.fsf%40hertz.schwinge.homeip.net%3E>,
to actually check the dump output of "oaccdevlow" -- it works.  ;-)

> > https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00092.html
> > ping?
> 
> Ok, thanks.

It's conceptually and code-wise simpler to just use a "oacc kernels"
attribute for that.  (And, that will make another patch I'm working on
less convoluted.)

I'm open to suggestions if there is a better place to set the "oacc
kernels" attribute -- I put it into expand_omp_target, where another
special thing for GF_OMP_TARGET_KIND_OACC_KERNELS is already being done,
and before "rewriting" GF_OMP_TARGET_KIND_OACC_KERNELS (and
GF_OMP_TARGET_KIND_OACC_PARALLEL) into BUILT_IN_GOACC_PARALLEL.  My
reasoning for not setting the attribute earlier (like, in the front
ends), is that at that point in/before expand_omp_target, we still have
the distrinction between OACC_PARALLEL/OACC_KERNELS (tree codes), and
later GF_OMP_TARGET_KIND_OACC_PARALLEL/GF_OMP_TARGET_KIND_OACC_KERNELS
(GIMPLE_OMP_TARGET subcodes).  Another question/possibly cleanup of
course might be to actually do set the "oacc kernels" attribute in the
front end and merge OACC_KERNELS into OACC_PARALLEL, and
GF_OMP_TARGET_KIND_OACC_KERNELS into GF_OMP_TARGET_KIND_OACC_PARALLEL?

But anyway, as a first step: OK for trunk?

commit 2e6dc8dfd679d8dae814e325afa2547b502827ef
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Tue Jul 26 17:44:31 2016 +0200

    Use "oacc kernels" attribute for OpenACC kernels
    
    	gcc/
    	* omp-low.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
    	Set "oacc kernels" attribute.
    	(set_oacc_fn_attrib): Remove is_kernel formal parameter.  Adjust
    	all users.
    	(oacc_fn_attrib_kernels_p): Remove function.
    	(execute_oacc_device_lower): Look for "oacc kernels" attribute
    	instead of calling oacc_fn_attrib_kernels_p.
    	* tree-ssa-loop.c (gate_oacc_kernels): Likewise.
    	* tree-parloops.c (create_parallel_loop): If oacc_kernels_p,
    	assert "oacc kernels" attribute is set.
---
 gcc/omp-low.c                                      | 53 ++++++++--------------
 gcc/omp-low.h                                      |  3 +-
 gcc/tree-parloops.c                                |  5 +-
 gcc/tree-ssa-loop.c                                |  5 +-
 10 files changed, 34 insertions(+), 48 deletions(-)

diff --git gcc/omp-low.c gcc/omp-low.c
index c75452c..a35556d 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -12552,11 +12552,10 @@ replace_oacc_fn_attrib (tree fn, tree dims)
 
 /* Scan CLAUSES for launch dimensions and attach them to the oacc
    function attribute.  Push any that are non-constant onto the ARGS
-   list, along with an appropriate GOMP_LAUNCH_DIM tag.  IS_KERNEL is
-   true, if these are for a kernels region offload function.  */
+   list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
 
 void
-set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
+set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
 {
   /* Must match GOMP_DIM ordering.  */
   static const omp_clause_code ids[]
@@ -12581,9 +12580,6 @@ set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
 	  non_const |= GOMP_DIM_MASK (ix);
 	}
       attr = tree_cons (NULL_TREE, dim, attr);
-      /* Note kernelness with TREE_PUBLIC.  */
-      if (is_kernel)
-	TREE_PUBLIC (attr) = 1;
     }
 
   replace_oacc_fn_attrib (fn, attr);
@@ -12652,16 +12648,6 @@ get_oacc_fn_attrib (tree fn)
   return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
 }
 
-/* Return true if this oacc fn attrib is for a kernels offload
-   region.  We use the TREE_PUBLIC flag of each dimension -- only
-   need to check the first one.  */
-
-bool
-oacc_fn_attrib_kernels_p (tree attr)
-{
-  return TREE_PUBLIC (TREE_VALUE (attr));
-}
-
 /* Return level at which oacc routine may spawn a partitioned loop, or
    -1 if it is not a routine (i.e. is an offload fn).  */
 
@@ -13044,7 +13030,12 @@ expand_omp_target (struct omp_region *region)
   exit_bb = region->exit;
 
   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
-    mark_loops_in_oacc_kernels_region (region->entry, region->exit);
+    {
+      DECL_ATTRIBUTES (child_fn)
+	= tree_cons (get_identifier ("oacc kernels"),
+		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
+      mark_loops_in_oacc_kernels_region (region->entry, region->exit);
+    }
 
   if (offloaded)
     {
@@ -13224,7 +13215,6 @@ expand_omp_target (struct omp_region *region)
   enum built_in_function start_ix;
   location_t clause_loc;
   unsigned int flags_i = 0;
-  bool oacc_kernels_p = false;
 
   switch (gimple_omp_target_kind (entry_stmt))
     {
@@ -13245,8 +13235,6 @@ expand_omp_target (struct omp_region *region)
       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
       break;
     case GF_OMP_TARGET_KIND_OACC_KERNELS:
-      oacc_kernels_p = true;
-      /* FALLTHROUGH */
     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
       start_ix = BUILT_IN_GOACC_PARALLEL;
       break;
@@ -13409,7 +13397,7 @@ expand_omp_target (struct omp_region *region)
       break;
     case BUILT_IN_GOACC_PARALLEL:
       {
-	set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
+	set_oacc_fn_attrib (child_fn, clauses, &args);
 	tagging = true;
       }
       /* FALLTHRU */
@@ -18851,7 +18839,6 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
   tree purpose[GOMP_DIM_MAX];
   unsigned ix;
   tree pos = TREE_VALUE (attrs);
-  bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
 
   /* Make sure the attribute creator attached the dimension
      information.  */
@@ -18898,13 +18885,9 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
       /* Replace the attribute with new values.  */
       pos = NULL_TREE;
       for (ix = GOMP_DIM_MAX; ix--;)
-	{
-	  pos = tree_cons (purpose[ix],
-			   build_int_cst (integer_type_node, dims[ix]),
-			   pos);
-	  if (is_kernel)
-	    TREE_PUBLIC (pos) = 1;
-	}
+	pos = tree_cons (purpose[ix],
+			 build_int_cst (integer_type_node, dims[ix]),
+			 pos);
       replace_oacc_fn_attrib (fn, pos);
     }
 }
@@ -19644,10 +19627,14 @@ execute_oacc_device_lower ()
   int fn_level = oacc_fn_attrib_level (attrs);
 
   if (dump_file)
-    fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
-	     ? "Function is kernels offload\n"
-	     : fn_level < 0 ? "Function is parallel offload\n"
-	     : "Function is routine level %d\n", fn_level);
+    {
+      if (lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (current_function_decl)))
+	fprintf (dump_file, "Function is OpenACC kernels offload\n");
+      else if (fn_level < 0)
+	fprintf (dump_file, "Function is OpenACC parallel offload\n");
+      else
+	fprintf (dump_file, "Function is OpenACC routine level %d\n", fn_level);
+    }
 
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
diff --git gcc/omp-low.h gcc/omp-low.h
index b1f7885..64caef8 100644
--- gcc/omp-low.h
+++ gcc/omp-low.h
@@ -33,8 +33,7 @@ extern tree omp_member_access_dummy_var (tree);
 extern void replace_oacc_fn_attrib (tree, tree);
 extern tree build_oacc_routine_dims (tree);
 extern tree get_oacc_fn_attrib (tree);
-extern void set_oacc_fn_attrib (tree, tree, bool, vec<tree> *);
-extern bool oacc_fn_attrib_kernels_p (tree);
+extern void set_oacc_fn_attrib (tree, tree, vec<tree> *);
 extern int get_oacc_ifn_dim_arg (const gimple *);
 extern int get_oacc_fn_dim_size (tree, int);
 
diff --git gcc/tree-parloops.c gcc/tree-parloops.c
index a160152..5706b0c 100644
--- gcc/tree-parloops.c
+++ gcc/tree-parloops.c
@@ -2042,10 +2042,13 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
   if (oacc_kernels_p)
     {
+      gcc_checking_assert (lookup_attribute ("oacc kernels",
+					     DECL_ATTRIBUTES (cfun->decl)));
+
       tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
       OMP_CLAUSE_NUM_GANGS_EXPR (clause)
 	= build_int_cst (integer_type_node, n_threads);
-      set_oacc_fn_attrib (cfun->decl, clause, true, NULL);
+      set_oacc_fn_attrib (cfun->decl, clause, NULL);
     }
   else
     {
diff --git gcc/tree-ssa-loop.c gcc/tree-ssa-loop.c
index 06354e3..da9a806 100644
--- gcc/tree-ssa-loop.c
+++ gcc/tree-ssa-loop.c
@@ -151,10 +151,7 @@ gate_oacc_kernels (function *fn)
   if (!flag_openacc)
     return false;
 
-  tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
-  if (oacc_function_attr == NULL_TREE)
-    return false;
-  if (!oacc_fn_attrib_kernels_p (oacc_function_attr))
+  if (!lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn->decl)))
     return false;
 
   struct loop *loop;


Grüße
 Thomas

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Test cases to check OpenACC offloaded function's attributes and classification
  2016-07-27  8:59     ` Test cases to check OpenACC offloaded function's attributes and classification Thomas Schwinge
@ 2016-08-04 14:06       ` Thomas Schwinge
  2017-05-08 17:05         ` Thomas Schwinge
  0 siblings, 1 reply; 20+ messages in thread
From: Thomas Schwinge @ 2016-08-04 14:06 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 10393 bytes --]

Hi!

Ping.

On Wed, 27 Jul 2016 10:59:02 +0200, I wrote:
> Hi!
> 
> OK for trunk?
> 
> commit 8200af082db5438be18bc60f721fcf21641c0d86
> Author: Thomas Schwinge <thomas@codesourcery.com>
> Date:   Tue Jul 26 17:18:21 2016 +0200
> 
>     Test cases to check OpenACC offloaded function's attributes and classification
>     
>     	gcc/testsuite/
>     	* c-c++-common/goacc/oaccdevlow-kernels.c: New file.
>     	* c-c++-common/goacc/oaccdevlow-parallel.c: Likewise.
>     	* c-c++-common/goacc/oaccdevlow-routine.c: Likewise.
>     	* gfortran.dg/goacc/oaccdevlow-kernels.f95: Likewise.
>     	* gfortran.dg/goacc/oaccdevlow-parallel.f95: Likewise.
>     	* gfortran.dg/goacc/oaccdevlow-routine.f95: Likewise.
> ---
>  .../c-c++-common/goacc/oaccdevlow-kernels.c        | 34 ++++++++++++++++++++
>  .../c-c++-common/goacc/oaccdevlow-parallel.c       | 27 ++++++++++++++++
>  .../c-c++-common/goacc/oaccdevlow-routine.c        | 29 +++++++++++++++++
>  .../gfortran.dg/goacc/oaccdevlow-kernels.f95       | 36 ++++++++++++++++++++++
>  .../gfortran.dg/goacc/oaccdevlow-parallel.f95      | 29 +++++++++++++++++
>  .../gfortran.dg/goacc/oaccdevlow-routine.f95       | 28 +++++++++++++++++
>  6 files changed, 183 insertions(+)
> 
> diff --git gcc/testsuite/c-c++-common/goacc/oaccdevlow-kernels.c gcc/testsuite/c-c++-common/goacc/oaccdevlow-kernels.c
> new file mode 100644
> index 0000000..14d650a
> --- /dev/null
> +++ gcc/testsuite/c-c++-common/goacc/oaccdevlow-kernels.c
> @@ -0,0 +1,34 @@
> +/* Check offloaded function's attributes and classification for OpenACC
> +   kernels.  */
> +
> +/* { dg-additional-options "-O2" }
> +   { dg-additional-options "-fdump-tree-ompexp" }
> +   { dg-additional-options "-fdump-tree-parloops1-all" }
> +   { dg-additional-options "-fdump-tree-oaccdevlow" } */
> +
> +#define N (1024 * 512)
> +
> +extern unsigned int *__restrict a;
> +extern unsigned int *__restrict b;
> +extern unsigned int *__restrict c;
> +
> +void KERNELS ()
> +{
> +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
> +  for (unsigned int i = 0; i < N; i++)
> +    c[i] = a[i] + b[i];
> +}
> +
> +/* Check the offloaded function's attributes.
> +   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
> +
> +/* Check that exactly one OpenACC kernels loop is analyzed, and that it can be
> +   parallelized.
> +   { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
> +   { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } }
> +   { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
> +
> +/* Check the offloaded function's classification and compute dimensions (will
> +   always be [1, 1, 1] for target compilation).
> +   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
> +   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } } */
> diff --git gcc/testsuite/c-c++-common/goacc/oaccdevlow-parallel.c gcc/testsuite/c-c++-common/goacc/oaccdevlow-parallel.c
> new file mode 100644
> index 0000000..63c372a
> --- /dev/null
> +++ gcc/testsuite/c-c++-common/goacc/oaccdevlow-parallel.c
> @@ -0,0 +1,27 @@
> +/* Check offloaded function's attributes and classification for OpenACC
> +   parallel.  */
> +
> +/* { dg-additional-options "-O2" }
> +   { dg-additional-options "-fdump-tree-ompexp" }
> +   { dg-additional-options "-fdump-tree-oaccdevlow" } */
> +
> +#define N (1024 * 512)
> +
> +extern unsigned int *__restrict a;
> +extern unsigned int *__restrict b;
> +extern unsigned int *__restrict c;
> +
> +void PARALLEL ()
> +{
> +#pragma acc parallel loop copyin (a[0:N], b[0:N]) copyout (c[0:N])
> +  for (unsigned int i = 0; i < N; i++)
> +    c[i] = a[i] + b[i];
> +}
> +
> +/* Check the offloaded function's attributes.
> +   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
> +
> +/* Check the offloaded function's classification and compute dimensions (will
> +   always be [1, 1, 1] for target compilation).
> +   { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
> +   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } } */
> diff --git gcc/testsuite/c-c++-common/goacc/oaccdevlow-routine.c gcc/testsuite/c-c++-common/goacc/oaccdevlow-routine.c
> new file mode 100644
> index 0000000..fa2eae7
> --- /dev/null
> +++ gcc/testsuite/c-c++-common/goacc/oaccdevlow-routine.c
> @@ -0,0 +1,29 @@
> +/* Check offloaded function's attributes and classification for OpenACC
> +   routine.  */
> +
> +/* { dg-additional-options "-O2" }
> +   { dg-additional-options "-fdump-tree-ompexp" }
> +   { dg-additional-options "-fdump-tree-oaccdevlow" } */
> +
> +#define N (1024 * 512)
> +
> +extern unsigned int *__restrict a;
> +extern unsigned int *__restrict b;
> +extern unsigned int *__restrict c;
> +#pragma acc declare copyin (a, b) create (c)
> +
> +#pragma acc routine worker
> +void ROUTINE ()
> +{
> +#pragma acc loop
> +  for (unsigned int i = 0; i < N; i++)
> +    c[i] = a[i] + b[i];
> +}
> +
> +/* Check the offloaded function's attributes.
> +   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } } */
> +
> +/* Check the offloaded function's classification and compute dimensions (will
> +   always be [1, 1, 1] for target compilation).
> +   { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
> +   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } } */
> diff --git gcc/testsuite/gfortran.dg/goacc/oaccdevlow-kernels.f95 gcc/testsuite/gfortran.dg/goacc/oaccdevlow-kernels.f95
> new file mode 100644
> index 0000000..8ee641e
> --- /dev/null
> +++ gcc/testsuite/gfortran.dg/goacc/oaccdevlow-kernels.f95
> @@ -0,0 +1,36 @@
> +! Check offloaded function's attributes and classification for OpenACC
> +! kernels.
> +
> +! { dg-additional-options "-O2" }
> +! { dg-additional-options "-fdump-tree-ompexp" }
> +! { dg-additional-options "-fdump-tree-parloops1-all" }
> +! { dg-additional-options "-fdump-tree-oaccdevlow" }
> +
> +program main
> +  implicit none
> +  integer, parameter         :: n = 1024
> +  integer, dimension (0:n-1) :: a, b, c
> +  integer                    :: i
> +
> +  call setup(a, b)
> +
> +  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
> +  do i = 0, n - 1
> +     c(i) = a(i) + b(i)
> +  end do
> +  !$acc end kernels
> +end program main
> +
> +! Check the offloaded function's attributes.
> +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
> +
> +! Check that exactly one OpenACC kernels loop is analyzed, and that it can be
> +! parallelized.
> +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
> +! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } }
> +! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
> +
> +! Check the offloaded function's classification and compute dimensions (will
> +! always be [1, 1, 1] for target compilation).
> +! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
> +! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
> diff --git gcc/testsuite/gfortran.dg/goacc/oaccdevlow-parallel.f95 gcc/testsuite/gfortran.dg/goacc/oaccdevlow-parallel.f95
> new file mode 100644
> index 0000000..0975eb8
> --- /dev/null
> +++ gcc/testsuite/gfortran.dg/goacc/oaccdevlow-parallel.f95
> @@ -0,0 +1,29 @@
> +! Check offloaded function's attributes and classification for OpenACC
> +! parallel.
> +
> +! { dg-additional-options "-O2" }
> +! { dg-additional-options "-fdump-tree-ompexp" }
> +! { dg-additional-options "-fdump-tree-oaccdevlow" }
> +
> +program main
> +  implicit none
> +  integer, parameter         :: n = 1024
> +  integer, dimension (0:n-1) :: a, b, c
> +  integer                    :: i
> +
> +  call setup(a, b)
> +
> +  !$acc parallel loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
> +  do i = 0, n - 1
> +     c(i) = a(i) + b(i)
> +  end do
> +  !$acc end parallel loop
> +end program main
> +
> +! Check the offloaded function's attributes.
> +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
> +
> +! Check the offloaded function's classification and compute dimensions (will
> +! always be [1, 1, 1] for target compilation).
> +! { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
> +! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
> diff --git gcc/testsuite/gfortran.dg/goacc/oaccdevlow-routine.f95 gcc/testsuite/gfortran.dg/goacc/oaccdevlow-routine.f95
> new file mode 100644
> index 0000000..a68b5eb
> --- /dev/null
> +++ gcc/testsuite/gfortran.dg/goacc/oaccdevlow-routine.f95
> @@ -0,0 +1,28 @@
> +! Check offloaded function's attributes and classification for OpenACC
> +! routine.
> +
> +! { dg-additional-options "-O2" }
> +! { dg-additional-options "-fdump-tree-ompexp" }
> +! { dg-additional-options "-fdump-tree-oaccdevlow" }
> +
> +subroutine ROUTINE
> +  !$acc routine worker
> +  integer, parameter         :: n = 1024
> +  integer, dimension (0:n-1) :: a, b, c
> +  integer                    :: i
> +
> +  call setup(a, b)
> +
> +  !$acc loop
> +  do i = 0, n - 1
> +     c(i) = a(i) + b(i)
> +  end do
> +end subroutine ROUTINE
> +
> +! Check the offloaded function's attributes.
> +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 0, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } }
> +
> +! Check the offloaded function's classification and compute dimensions (will
> +! always be [1, 1, 1] for target compilation).
> +! { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
> +! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }


Grüße
 Thomas

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Use "oacc kernels" attribute for OpenACC kernels
  2016-07-27 10:07     ` Use "oacc kernels" attribute for OpenACC kernels (was: Mark oacc kernels fns) Thomas Schwinge
@ 2016-08-04 14:07       ` Thomas Schwinge
  2017-05-08 19:29         ` Thomas Schwinge
  0 siblings, 1 reply; 20+ messages in thread
From: Thomas Schwinge @ 2016-08-04 14:07 UTC (permalink / raw)
  To: GCC Patches, Nathan Sidwell; +Cc: Tom de Vries, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 10346 bytes --]

Hi!

Ping.

On Wed, 27 Jul 2016 12:06:59 +0200, I wrote:
> On Mon, 25 Jan 2016 16:09:14 +0100, Jakub Jelinek <jakub@redhat.com> wrote:
> > On Mon, Jan 25, 2016 at 10:06:50AM -0500, Nathan Sidwell wrote:
> > > On 01/04/16 10:39, Nathan Sidwell wrote:
> > > >There's currently no robust predicate to determine whether an oacc offload
> > > >function is for a kernels region (as opposed to a parallel region).
> > > >[...]
> > > >
> > > >This patch marks TREE_PUBLIC on the offload attribute values, to note kernels
> > > >regions,  and adds a predicate to check that.  [...]
> > > >
> > > >Using these predicates improves the dump output of the openacc device lowering
> > > >pass too.
> 
> I just submitted a patch adding "Test cases to check OpenACC offloaded
> function's attributes and classification",
> <http://news.gmane.org/find-root.php?message_id=%3C87zip3jw2x.fsf%40hertz.schwinge.homeip.net%3E>,
> to actually check the dump output of "oaccdevlow" -- it works.  ;-)
> 
> > > https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00092.html
> > > ping?
> > 
> > Ok, thanks.
> 
> It's conceptually and code-wise simpler to just use a "oacc kernels"
> attribute for that.  (And, that will make another patch I'm working on
> less convoluted.)
> 
> I'm open to suggestions if there is a better place to set the "oacc
> kernels" attribute -- I put it into expand_omp_target, where another
> special thing for GF_OMP_TARGET_KIND_OACC_KERNELS is already being done,
> and before "rewriting" GF_OMP_TARGET_KIND_OACC_KERNELS (and
> GF_OMP_TARGET_KIND_OACC_PARALLEL) into BUILT_IN_GOACC_PARALLEL.  My
> reasoning for not setting the attribute earlier (like, in the front
> ends), is that at that point in/before expand_omp_target, we still have
> the distrinction between OACC_PARALLEL/OACC_KERNELS (tree codes), and
> later GF_OMP_TARGET_KIND_OACC_PARALLEL/GF_OMP_TARGET_KIND_OACC_KERNELS
> (GIMPLE_OMP_TARGET subcodes).  Another question/possibly cleanup of
> course might be to actually do set the "oacc kernels" attribute in the
> front end and merge OACC_KERNELS into OACC_PARALLEL, and
> GF_OMP_TARGET_KIND_OACC_KERNELS into GF_OMP_TARGET_KIND_OACC_PARALLEL?
> 
> But anyway, as a first step: OK for trunk?
> 
> commit 2e6dc8dfd679d8dae814e325afa2547b502827ef
> Author: Thomas Schwinge <thomas@codesourcery.com>
> Date:   Tue Jul 26 17:44:31 2016 +0200
> 
>     Use "oacc kernels" attribute for OpenACC kernels
>     
>     	gcc/
>     	* omp-low.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
>     	Set "oacc kernels" attribute.
>     	(set_oacc_fn_attrib): Remove is_kernel formal parameter.  Adjust
>     	all users.
>     	(oacc_fn_attrib_kernels_p): Remove function.
>     	(execute_oacc_device_lower): Look for "oacc kernels" attribute
>     	instead of calling oacc_fn_attrib_kernels_p.
>     	* tree-ssa-loop.c (gate_oacc_kernels): Likewise.
>     	* tree-parloops.c (create_parallel_loop): If oacc_kernels_p,
>     	assert "oacc kernels" attribute is set.
> ---
>  gcc/omp-low.c                                      | 53 ++++++++--------------
>  gcc/omp-low.h                                      |  3 +-
>  gcc/tree-parloops.c                                |  5 +-
>  gcc/tree-ssa-loop.c                                |  5 +-
>  10 files changed, 34 insertions(+), 48 deletions(-)
> 
> diff --git gcc/omp-low.c gcc/omp-low.c
> index c75452c..a35556d 100644
> --- gcc/omp-low.c
> +++ gcc/omp-low.c
> @@ -12552,11 +12552,10 @@ replace_oacc_fn_attrib (tree fn, tree dims)
>  
>  /* Scan CLAUSES for launch dimensions and attach them to the oacc
>     function attribute.  Push any that are non-constant onto the ARGS
> -   list, along with an appropriate GOMP_LAUNCH_DIM tag.  IS_KERNEL is
> -   true, if these are for a kernels region offload function.  */
> +   list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
>  
>  void
> -set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
> +set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
>  {
>    /* Must match GOMP_DIM ordering.  */
>    static const omp_clause_code ids[]
> @@ -12581,9 +12580,6 @@ set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
>  	  non_const |= GOMP_DIM_MASK (ix);
>  	}
>        attr = tree_cons (NULL_TREE, dim, attr);
> -      /* Note kernelness with TREE_PUBLIC.  */
> -      if (is_kernel)
> -	TREE_PUBLIC (attr) = 1;
>      }
>  
>    replace_oacc_fn_attrib (fn, attr);
> @@ -12652,16 +12648,6 @@ get_oacc_fn_attrib (tree fn)
>    return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
>  }
>  
> -/* Return true if this oacc fn attrib is for a kernels offload
> -   region.  We use the TREE_PUBLIC flag of each dimension -- only
> -   need to check the first one.  */
> -
> -bool
> -oacc_fn_attrib_kernels_p (tree attr)
> -{
> -  return TREE_PUBLIC (TREE_VALUE (attr));
> -}
> -
>  /* Return level at which oacc routine may spawn a partitioned loop, or
>     -1 if it is not a routine (i.e. is an offload fn).  */
>  
> @@ -13044,7 +13030,12 @@ expand_omp_target (struct omp_region *region)
>    exit_bb = region->exit;
>  
>    if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
> -    mark_loops_in_oacc_kernels_region (region->entry, region->exit);
> +    {
> +      DECL_ATTRIBUTES (child_fn)
> +	= tree_cons (get_identifier ("oacc kernels"),
> +		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
> +      mark_loops_in_oacc_kernels_region (region->entry, region->exit);
> +    }
>  
>    if (offloaded)
>      {
> @@ -13224,7 +13215,6 @@ expand_omp_target (struct omp_region *region)
>    enum built_in_function start_ix;
>    location_t clause_loc;
>    unsigned int flags_i = 0;
> -  bool oacc_kernels_p = false;
>  
>    switch (gimple_omp_target_kind (entry_stmt))
>      {
> @@ -13245,8 +13235,6 @@ expand_omp_target (struct omp_region *region)
>        flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
>        break;
>      case GF_OMP_TARGET_KIND_OACC_KERNELS:
> -      oacc_kernels_p = true;
> -      /* FALLTHROUGH */
>      case GF_OMP_TARGET_KIND_OACC_PARALLEL:
>        start_ix = BUILT_IN_GOACC_PARALLEL;
>        break;
> @@ -13409,7 +13397,7 @@ expand_omp_target (struct omp_region *region)
>        break;
>      case BUILT_IN_GOACC_PARALLEL:
>        {
> -	set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
> +	set_oacc_fn_attrib (child_fn, clauses, &args);
>  	tagging = true;
>        }
>        /* FALLTHRU */
> @@ -18851,7 +18839,6 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
>    tree purpose[GOMP_DIM_MAX];
>    unsigned ix;
>    tree pos = TREE_VALUE (attrs);
> -  bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
>  
>    /* Make sure the attribute creator attached the dimension
>       information.  */
> @@ -18898,13 +18885,9 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
>        /* Replace the attribute with new values.  */
>        pos = NULL_TREE;
>        for (ix = GOMP_DIM_MAX; ix--;)
> -	{
> -	  pos = tree_cons (purpose[ix],
> -			   build_int_cst (integer_type_node, dims[ix]),
> -			   pos);
> -	  if (is_kernel)
> -	    TREE_PUBLIC (pos) = 1;
> -	}
> +	pos = tree_cons (purpose[ix],
> +			 build_int_cst (integer_type_node, dims[ix]),
> +			 pos);
>        replace_oacc_fn_attrib (fn, pos);
>      }
>  }
> @@ -19644,10 +19627,14 @@ execute_oacc_device_lower ()
>    int fn_level = oacc_fn_attrib_level (attrs);
>  
>    if (dump_file)
> -    fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
> -	     ? "Function is kernels offload\n"
> -	     : fn_level < 0 ? "Function is parallel offload\n"
> -	     : "Function is routine level %d\n", fn_level);
> +    {
> +      if (lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (current_function_decl)))
> +	fprintf (dump_file, "Function is OpenACC kernels offload\n");
> +      else if (fn_level < 0)
> +	fprintf (dump_file, "Function is OpenACC parallel offload\n");
> +      else
> +	fprintf (dump_file, "Function is OpenACC routine level %d\n", fn_level);
> +    }
>  
>    unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
>    unsigned used_mask = oacc_loop_partition (loops, outer_mask);
> diff --git gcc/omp-low.h gcc/omp-low.h
> index b1f7885..64caef8 100644
> --- gcc/omp-low.h
> +++ gcc/omp-low.h
> @@ -33,8 +33,7 @@ extern tree omp_member_access_dummy_var (tree);
>  extern void replace_oacc_fn_attrib (tree, tree);
>  extern tree build_oacc_routine_dims (tree);
>  extern tree get_oacc_fn_attrib (tree);
> -extern void set_oacc_fn_attrib (tree, tree, bool, vec<tree> *);
> -extern bool oacc_fn_attrib_kernels_p (tree);
> +extern void set_oacc_fn_attrib (tree, tree, vec<tree> *);
>  extern int get_oacc_ifn_dim_arg (const gimple *);
>  extern int get_oacc_fn_dim_size (tree, int);
>  
> diff --git gcc/tree-parloops.c gcc/tree-parloops.c
> index a160152..5706b0c 100644
> --- gcc/tree-parloops.c
> +++ gcc/tree-parloops.c
> @@ -2042,10 +2042,13 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
>    /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
>    if (oacc_kernels_p)
>      {
> +      gcc_checking_assert (lookup_attribute ("oacc kernels",
> +					     DECL_ATTRIBUTES (cfun->decl)));
> +
>        tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
>        OMP_CLAUSE_NUM_GANGS_EXPR (clause)
>  	= build_int_cst (integer_type_node, n_threads);
> -      set_oacc_fn_attrib (cfun->decl, clause, true, NULL);
> +      set_oacc_fn_attrib (cfun->decl, clause, NULL);
>      }
>    else
>      {
> diff --git gcc/tree-ssa-loop.c gcc/tree-ssa-loop.c
> index 06354e3..da9a806 100644
> --- gcc/tree-ssa-loop.c
> +++ gcc/tree-ssa-loop.c
> @@ -151,10 +151,7 @@ gate_oacc_kernels (function *fn)
>    if (!flag_openacc)
>      return false;
>  
> -  tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
> -  if (oacc_function_attr == NULL_TREE)
> -    return false;
> -  if (!oacc_fn_attrib_kernels_p (oacc_function_attr))
> +  if (!lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn->decl)))
>      return false;
>  
>    struct loop *loop;


Grüße
 Thomas

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Test cases to check OpenACC offloaded function's attributes and classification
  2016-08-04 14:06       ` Thomas Schwinge
@ 2017-05-08 17:05         ` Thomas Schwinge
  2017-05-10 15:53           ` Jakub Jelinek
  0 siblings, 1 reply; 20+ messages in thread
From: Thomas Schwinge @ 2017-05-08 17:05 UTC (permalink / raw)
  To: gcc-patches, Jakub Jelinek

Hi!

Ping.

On Thu, 4 Aug 2016 16:06:10 +0200, I wrote:
> Ping.
> 
> On Wed, 27 Jul 2016 10:59:02 +0200, I wrote:
> > OK for trunk?

(In the mean time, I also added some more testing.)

commit b7d61270dfc581a6ea130f7a4fa7506a0a5762d8
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Mon May 8 18:22:50 2017 +0200

    Test cases to check OpenACC offloaded function's attributes and classification
    
            gcc/testsuite/
            * c-c++-common/goacc/classify-kernels-unparallelized.c: New file.
            * c-c++-common/goacc/classify-kernels.c: Likewise.
            * c-c++-common/goacc/classify-parallel.c: Likewise.
            * c-c++-common/goacc/classify-routine.c: Likewise.
            * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
            * gfortran.dg/goacc/classify-kernels.f95: Likewise.
            * gfortran.dg/goacc/classify-parallel.f95: Likewise.
            * gfortran.dg/goacc/classify-routine.f95: Likewise.
---
 .../goacc/classify-kernels-unparallelized.c        | 39 ++++++++++++++++++++
 .../c-c++-common/goacc/classify-kernels.c          | 35 ++++++++++++++++++
 .../c-c++-common/goacc/classify-parallel.c         | 28 +++++++++++++++
 .../c-c++-common/goacc/classify-routine.c          | 30 ++++++++++++++++
 .../goacc/classify-kernels-unparallelized.f95      | 41 ++++++++++++++++++++++
 .../gfortran.dg/goacc/classify-kernels.f95         | 37 +++++++++++++++++++
 .../gfortran.dg/goacc/classify-parallel.f95        | 30 ++++++++++++++++
 .../gfortran.dg/goacc/classify-routine.f95         | 29 +++++++++++++++
 8 files changed, 269 insertions(+)

diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
new file mode 100644
index 0000000..a76351c
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -0,0 +1,39 @@
+/* Check offloaded function's attributes and classification for unparallelized
+   OpenACC kernels.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-parloops1-all" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+/* An "extern"al mapping of loop iterations/array indices makes the loop
+   unparallelizable.  */
+extern unsigned int f (unsigned int);
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[f (i)] + b[f (i)];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels construct is analyzed, and that it
+   can't be parallelized.
+   { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
new file mode 100644
index 0000000..199a73e
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -0,0 +1,35 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   kernels.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-parloops1-all" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels construct is analyzed, and that it
+   can be parallelized.
+   { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-parallel.c gcc/testsuite/c-c++-common/goacc/classify-parallel.c
new file mode 100644
index 0000000..9d48c1b
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-parallel.c
@@ -0,0 +1,28 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   parallel.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void PARALLEL ()
+{
+#pragma acc parallel loop copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-routine.c gcc/testsuite/c-c++-common/goacc/classify-routine.c
new file mode 100644
index 0000000..72b02c2
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-routine.c
@@ -0,0 +1,30 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   routine.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+#pragma acc declare copyin (a, b) create (c)
+
+#pragma acc routine worker
+void ROUTINE ()
+{
+#pragma acc loop
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
new file mode 100644
index 0000000..fd46d0d
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -0,0 +1,41 @@
+! Check offloaded function's attributes and classification for unparallelized
+! OpenACC kernels.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  ! An "external" mapping of loop iterations/array indices makes the loop
+  ! unparallelizable.
+  integer, external :: f
+
+  call setup(a, b)
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(f (i)) + b(f (i))
+  end do
+  !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels construct is analyzed, and that it
+! can't be parallelized.
+! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
new file mode 100644
index 0000000..053d27c
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -0,0 +1,37 @@
+! Check offloaded function's attributes and classification for OpenACC
+! kernels.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  call setup(a, b)
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+  !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels construct is analyzed, and that it
+! can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95 gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
new file mode 100644
index 0000000..087ff48
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
@@ -0,0 +1,30 @@
+! Check offloaded function's attributes and classification for OpenACC
+! parallel.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  call setup(a, b)
+
+  !$acc parallel loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+  !$acc end parallel loop
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
new file mode 100644
index 0000000..319d767
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
@@ -0,0 +1,29 @@
+! Check offloaded function's attributes and classification for OpenACC
+! routine.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+subroutine ROUTINE
+  !$acc routine worker
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  call setup(a, b)
+
+  !$acc loop
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+end subroutine ROUTINE
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 0, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 0, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } }


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Use "oacc kernels" attribute for OpenACC kernels
  2016-08-04 14:07       ` Use "oacc kernels" attribute for OpenACC kernels Thomas Schwinge
@ 2017-05-08 19:29         ` Thomas Schwinge
  2017-05-09 20:59           ` Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels (was: Use "oacc kernels" attribute for OpenACC kernels) Thomas Schwinge
  2017-05-10 16:30           ` Use "oacc kernels" attribute for " Jakub Jelinek
  0 siblings, 2 replies; 20+ messages in thread
From: Thomas Schwinge @ 2017-05-08 19:29 UTC (permalink / raw)
  To: GCC Patches, Jakub Jelinek; +Cc: Tom de Vries

Hi!

On Thu, 4 Aug 2016 16:07:00 +0200, I wrote:
> Ping.
> 
> On Wed, 27 Jul 2016 12:06:59 +0200, I wrote:
> > On Mon, 25 Jan 2016 16:09:14 +0100, Jakub Jelinek <jakub@redhat.com> wrote:
> > > On Mon, Jan 25, 2016 at 10:06:50AM -0500, Nathan Sidwell wrote:
> > > > On 01/04/16 10:39, Nathan Sidwell wrote:
> > > > >There's currently no robust predicate to determine whether an oacc offload
> > > > >function is for a kernels region (as opposed to a parallel region).
> > > > >[...]
> > > > >
> > > > >This patch marks TREE_PUBLIC on the offload attribute values, to note kernels
> > > > >regions,  and adds a predicate to check that.  [...]
> > > > >
> > > > >Using these predicates improves the dump output of the openacc device lowering
> > > > >pass too.
> > 
> > I just submitted a patch adding "Test cases to check OpenACC offloaded
> > function's attributes and classification",

(Pinged in
<http://mid.mail-archive.com/877f1r1duw.fsf@hertz.schwinge.homeip.net>.)

> > to actually check the dump output of "oaccdevlow" -- it works.  ;-)
> > 
> > > > https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00092.html
> > > > ping?
> > > 
> > > Ok, thanks.
> > 
> > It's conceptually and code-wise simpler to just use a "oacc kernels"
> > attribute for that.  (And, that will make another patch I'm working on
> > less convoluted.)
> > 
> > I'm open to suggestions if there is a better place to set the "oacc
> > kernels" attribute -- I put it into expand_omp_target, where another
> > special thing for GF_OMP_TARGET_KIND_OACC_KERNELS is already being done,
> > and before "rewriting" GF_OMP_TARGET_KIND_OACC_KERNELS (and
> > GF_OMP_TARGET_KIND_OACC_PARALLEL) into BUILT_IN_GOACC_PARALLEL.  My
> > reasoning for not setting the attribute earlier (like, in the front
> > ends), is that at that point in/before expand_omp_target, we still have
> > the distrinction between OACC_PARALLEL/OACC_KERNELS (tree codes), and
> > later GF_OMP_TARGET_KIND_OACC_PARALLEL/GF_OMP_TARGET_KIND_OACC_KERNELS
> > (GIMPLE_OMP_TARGET subcodes).  Another question/possibly cleanup of
> > course might be to actually do set the "oacc kernels" attribute in the
> > front end and merge OACC_KERNELS into OACC_PARALLEL, and
> > GF_OMP_TARGET_KIND_OACC_KERNELS into GF_OMP_TARGET_KIND_OACC_PARALLEL?
> > 
> > But anyway, as a first step: OK for trunk?

commit fac5c3214f58812881635d3fb1e1751446d4b660
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Mon May 8 21:24:46 2017 +0200

    Use "oacc kernels" attribute for OpenACC kernels
    
            gcc/
            * omp-expand.c (expand_omp_target)
            <GF_OMP_TARGET_KIND_OACC_KERNELS>: Set "oacc kernels" attribute.
            * omp-general.c (oacc_set_fn_attrib): Remove is_kernel formal
            parameter.  Adjust all users.
            (oacc_fn_attrib_kernels_p): Remove function.
            (execute_oacc_device_lower): Look for "oacc kernels" attribute
            instead of calling oacc_fn_attrib_kernels_p.
            * tree-ssa-loop.c (gate_oacc_kernels): Likewise.
            * tree-parloops.c (create_parallel_loop): If oacc_kernels_p,
            assert "oacc kernels" attribute is set.
            gcc/testsuite/
            * c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
            * c-c++-common/goacc/classify-kernels.c: Likewise.
            * c-c++-common/goacc/classify-parallel.c: Likewise.
            * c-c++-common/goacc/classify-routine.c: Likewise.
            * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
            * gfortran.dg/goacc/classify-kernels.f95: Likewise.
            * gfortran.dg/goacc/classify-parallel.f95: Likewise.
            * gfortran.dg/goacc/classify-routine.f95: Likewise.
---
 gcc/omp-expand.c                                   | 16 +++++++++-----
 gcc/omp-general.c                                  | 18 ++--------------
 gcc/omp-general.h                                  |  4 +---
 gcc/omp-offload.c                                  | 25 +++++++++++-----------
 .../goacc/classify-kernels-unparallelized.c        |  8 +++----
 .../c-c++-common/goacc/classify-kernels.c          |  8 +++----
 .../c-c++-common/goacc/classify-parallel.c         |  2 +-
 .../c-c++-common/goacc/classify-routine.c          |  2 +-
 .../goacc/classify-kernels-unparallelized.f95      |  8 +++----
 .../gfortran.dg/goacc/classify-kernels.f95         |  8 +++----
 .../gfortran.dg/goacc/classify-parallel.f95        |  2 +-
 .../gfortran.dg/goacc/classify-routine.f95         |  2 +-
 gcc/tree-parloops.c                                |  5 ++++-
 gcc/tree-ssa-loop.c                                |  5 +----
 14 files changed, 52 insertions(+), 61 deletions(-)

diff --git gcc/omp-expand.c gcc/omp-expand.c
index 5c48b78..405c60e 100644
--- gcc/omp-expand.c
+++ gcc/omp-expand.c
@@ -7083,7 +7083,16 @@ expand_omp_target (struct omp_region *region)
   exit_bb = region->exit;
 
   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
-    mark_loops_in_oacc_kernels_region (region->entry, region->exit);
+    {
+      mark_loops_in_oacc_kernels_region (region->entry, region->exit);
+
+      /* Further down, both OpenACC kernels and OpenACC parallel constructs
+	 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
+	 two, there is an "oacc kernels" attribute set for OpenACC kernels.  */
+      DECL_ATTRIBUTES (child_fn)
+	= tree_cons (get_identifier ("oacc kernels"),
+		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
+    }
 
   if (offloaded)
     {
@@ -7266,7 +7275,6 @@ expand_omp_target (struct omp_region *region)
   enum built_in_function start_ix;
   location_t clause_loc;
   unsigned int flags_i = 0;
-  bool oacc_kernels_p = false;
 
   switch (gimple_omp_target_kind (entry_stmt))
     {
@@ -7287,8 +7295,6 @@ expand_omp_target (struct omp_region *region)
       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
       break;
     case GF_OMP_TARGET_KIND_OACC_KERNELS:
-      oacc_kernels_p = true;
-      /* FALLTHROUGH */
     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
       start_ix = BUILT_IN_GOACC_PARALLEL;
       break;
@@ -7451,7 +7457,7 @@ expand_omp_target (struct omp_region *region)
       break;
     case BUILT_IN_GOACC_PARALLEL:
       {
-	oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
+	oacc_set_fn_attrib (child_fn, clauses, &args);
 	tagging = true;
       }
       /* FALLTHRU */
diff --git gcc/omp-general.c gcc/omp-general.c
index 3f9aec8..9a5ed88 100644
--- gcc/omp-general.c
+++ gcc/omp-general.c
@@ -515,11 +515,10 @@ oacc_replace_fn_attrib (tree fn, tree dims)
 
 /* Scan CLAUSES for launch dimensions and attach them to the oacc
    function attribute.  Push any that are non-constant onto the ARGS
-   list, along with an appropriate GOMP_LAUNCH_DIM tag.  IS_KERNEL is
-   true, if these are for a kernels region offload function.  */
+   list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
 
 void
-oacc_set_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
+oacc_set_fn_attrib (tree fn, tree clauses, vec<tree> *args)
 {
   /* Must match GOMP_DIM ordering.  */
   static const omp_clause_code ids[]
@@ -545,9 +544,6 @@ oacc_set_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
 	  non_const |= GOMP_DIM_MASK (ix);
 	}
       attr = tree_cons (NULL_TREE, dim, attr);
-      /* Note kernelness with TREE_PUBLIC.  */
-      if (is_kernel)
-	TREE_PUBLIC (attr) = 1;
     }
 
   oacc_replace_fn_attrib (fn, attr);
@@ -616,16 +612,6 @@ oacc_get_fn_attrib (tree fn)
   return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
 }
 
-/* Return true if this oacc fn attrib is for a kernels offload
-   region.  We use the TREE_PUBLIC flag of each dimension -- only
-   need to check the first one.  */
-
-bool
-oacc_fn_attrib_kernels_p (tree attr)
-{
-  return TREE_PUBLIC (TREE_VALUE (attr));
-}
-
 /* Extract an oacc execution dimension from FN.  FN must be an
    offloaded function or routine that has already had its execution
    dimensions lowered to the target-specific values.  */
diff --git gcc/omp-general.h gcc/omp-general.h
index 3cf7fce..d28eb4b 100644
--- gcc/omp-general.h
+++ gcc/omp-general.h
@@ -82,11 +82,9 @@ extern int omp_max_vf (void);
 extern int omp_max_simt_vf (void);
 extern tree oacc_launch_pack (unsigned code, tree device, unsigned op);
 extern void oacc_replace_fn_attrib (tree fn, tree dims);
-extern void oacc_set_fn_attrib (tree fn, tree clauses, bool is_kernel,
-				vec<tree> *args);
+extern void oacc_set_fn_attrib (tree fn, tree clauses, vec<tree> *args);
 extern tree oacc_build_routine_dims (tree clauses);
 extern tree oacc_get_fn_attrib (tree fn);
-extern bool oacc_fn_attrib_kernels_p (tree attr);
 extern int oacc_get_fn_dim_size (tree fn, int axis);
 extern int oacc_get_ifn_dim_arg (const gimple *stmt);
 
diff --git gcc/omp-offload.c gcc/omp-offload.c
index beeeb71..15a1cd3 100644
--- gcc/omp-offload.c
+++ gcc/omp-offload.c
@@ -619,7 +619,6 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
   tree purpose[GOMP_DIM_MAX];
   unsigned ix;
   tree pos = TREE_VALUE (attrs);
-  bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
 
   /* Make sure the attribute creator attached the dimension
      information.  */
@@ -666,13 +665,9 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
       /* Replace the attribute with new values.  */
       pos = NULL_TREE;
       for (ix = GOMP_DIM_MAX; ix--;)
-	{
-	  pos = tree_cons (purpose[ix],
-			   build_int_cst (integer_type_node, dims[ix]),
-			   pos);
-	  if (is_kernel)
-	    TREE_PUBLIC (pos) = 1;
-	}
+	pos = tree_cons (purpose[ix],
+			 build_int_cst (integer_type_node, dims[ix]),
+			 pos);
       oacc_replace_fn_attrib (fn, pos);
     }
 }
@@ -1455,10 +1450,16 @@ execute_oacc_device_lower ()
   int fn_level = oacc_fn_attrib_level (attrs);
 
   if (dump_file)
-    fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
-	     ? "Function is kernels offload\n"
-	     : fn_level < 0 ? "Function is parallel offload\n"
-	     : "Function is routine level %d\n", fn_level);
+    {
+      if (lookup_attribute ("oacc kernels",
+			    DECL_ATTRIBUTES (current_function_decl)))
+	fprintf (dump_file, "Function is OpenACC kernels offload\n");
+      else if (fn_level < 0)
+	fprintf (dump_file, "Function is OpenACC parallel offload\n");
+      else
+	fprintf (dump_file, "Function is OpenACC routine level %d\n",
+		 fn_level);
+    }
 
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
index a76351c..70ff428 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -24,16 +24,16 @@ void KERNELS ()
 }
 
 /* Check the offloaded function's attributes.
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
 
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can't be parallelized.
    { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
index 199a73e..c8b0fda 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -20,16 +20,16 @@ void KERNELS ()
 }
 
 /* Check the offloaded function's attributes.
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
 
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can be parallelized.
    { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-parallel.c gcc/testsuite/c-c++-common/goacc/classify-parallel.c
index 9d48c1b..4f97301 100644
--- gcc/testsuite/c-c++-common/goacc/classify-parallel.c
+++ gcc/testsuite/c-c++-common/goacc/classify-parallel.c
@@ -23,6 +23,6 @@ void PARALLEL ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-routine.c gcc/testsuite/c-c++-common/goacc/classify-routine.c
index 72b02c2..fd89fc1 100644
--- gcc/testsuite/c-c++-common/goacc/classify-routine.c
+++ gcc/testsuite/c-c++-common/goacc/classify-routine.c
@@ -25,6 +25,6 @@ void ROUTINE ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC routine level 1" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
index fd46d0d..9887d35 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -26,16 +26,16 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
 
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can't be parallelized.
 ! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
index 053d27c..69c89a9 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -22,16 +22,16 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
 
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95 gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
index 087ff48..e215c79 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
@@ -25,6 +25,6 @@ end program main
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
index 319d767..4ca4067 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
@@ -24,6 +24,6 @@ end subroutine ROUTINE
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC routine level 1" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 0, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/tree-parloops.c gcc/tree-parloops.c
index 7393011..6ce9d84 100644
--- gcc/tree-parloops.c
+++ gcc/tree-parloops.c
@@ -2043,10 +2043,13 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
   if (oacc_kernels_p)
     {
+      gcc_checking_assert (lookup_attribute ("oacc kernels",
+					     DECL_ATTRIBUTES (cfun->decl)));
+
       tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
       OMP_CLAUSE_NUM_GANGS_EXPR (clause)
 	= build_int_cst (integer_type_node, n_threads);
-      oacc_set_fn_attrib (cfun->decl, clause, true, NULL);
+      oacc_set_fn_attrib (cfun->decl, clause, NULL);
     }
   else
     {
diff --git gcc/tree-ssa-loop.c gcc/tree-ssa-loop.c
index 8b25b41..10c43f3 100644
--- gcc/tree-ssa-loop.c
+++ gcc/tree-ssa-loop.c
@@ -152,10 +152,7 @@ gate_oacc_kernels (function *fn)
   if (!flag_openacc)
     return false;
 
-  tree oacc_function_attr = oacc_get_fn_attrib (fn->decl);
-  if (oacc_function_attr == NULL_TREE)
-    return false;
-  if (!oacc_fn_attrib_kernels_p (oacc_function_attr))
+  if (!lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn->decl)))
     return false;
 
   struct loop *loop;


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels (was: Use "oacc kernels" attribute for OpenACC kernels)
  2017-05-08 19:29         ` Thomas Schwinge
@ 2017-05-09 20:59           ` Thomas Schwinge
  2017-05-10 16:32             ` Jakub Jelinek
  2017-05-10 16:30           ` Use "oacc kernels" attribute for " Jakub Jelinek
  1 sibling, 1 reply; 20+ messages in thread
From: Thomas Schwinge @ 2017-05-09 20:59 UTC (permalink / raw)
  To: GCC Patches, Jakub Jelinek; +Cc: Tom de Vries

Hi!

On Mon, 08 May 2017 21:29:28 +0200, I wrote:
> On Thu, 4 Aug 2016 16:07:00 +0200, I wrote:
> > Ping.
> > 
> > On Wed, 27 Jul 2016 12:06:59 +0200, I wrote:
> > > On Mon, 25 Jan 2016 16:09:14 +0100, Jakub Jelinek <jakub@redhat.com> wrote:
> > > > On Mon, Jan 25, 2016 at 10:06:50AM -0500, Nathan Sidwell wrote:
> > > > > On 01/04/16 10:39, Nathan Sidwell wrote:
> > > > > >There's currently no robust predicate to determine whether an oacc offload
> > > > > >function is for a kernels region (as opposed to a parallel region).
> > > > > >[...]
> > > > > >
> > > > > >This patch marks TREE_PUBLIC on the offload attribute values, to note kernels
> > > > > >regions,  and adds a predicate to check that.  [...]
> > > > > >
> > > > > >Using these predicates improves the dump output of the openacc device lowering
> > > > > >pass too.
> > > 
> > > I just submitted a patch adding "Test cases to check OpenACC offloaded
> > > function's attributes and classification",
> 
> (Pinged in
> <http://mid.mail-archive.com/877f1r1duw.fsf@hertz.schwinge.homeip.net>.)
> 
> > > to actually check the dump output of "oaccdevlow" -- it works.  ;-)
> > > 
> > > > > https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00092.html
> > > > > ping?
> > > > 
> > > > Ok, thanks.
> > > 
> > > It's conceptually and code-wise simpler to just use a "oacc kernels"
> > > attribute for that.  (And, that will make another patch I'm working on
> > > less convoluted.)
> > > 
> > > I'm open to suggestions if there is a better place to set the "oacc
> > > kernels" attribute -- I put it into expand_omp_target, where another
> > > special thing for GF_OMP_TARGET_KIND_OACC_KERNELS is already being done,
> > > and before "rewriting" GF_OMP_TARGET_KIND_OACC_KERNELS (and
> > > GF_OMP_TARGET_KIND_OACC_PARALLEL) into BUILT_IN_GOACC_PARALLEL.  My
> > > reasoning for not setting the attribute earlier (like, in the front
> > > ends), is that at that point in/before expand_omp_target, we still have
> > > the distrinction between OACC_PARALLEL/OACC_KERNELS (tree codes), and
> > > later GF_OMP_TARGET_KIND_OACC_PARALLEL/GF_OMP_TARGET_KIND_OACC_KERNELS
> > > (GIMPLE_OMP_TARGET subcodes).  Another question/possibly cleanup of
> > > course might be to actually do set the "oacc kernels" attribute in the
> > > front end and merge OACC_KERNELS into OACC_PARALLEL, and
> > > GF_OMP_TARGET_KIND_OACC_KERNELS into GF_OMP_TARGET_KIND_OACC_PARALLEL?
> > > 
> > > But anyway, as a first step: OK for trunk?
> 
> commit fac5c3214f58812881635d3fb1e1751446d4b660
> Author: Thomas Schwinge <thomas@codesourcery.com>
> Date:   Mon May 8 21:24:46 2017 +0200
> 
>     Use "oacc kernels" attribute for OpenACC kernels

And on top of that, we can then 'use "oacc kernels parallelized"
attribute for parallelized OpenACC kernels', to also make that more
explicit (and pave the way for another change later on).  OK for trunk?

commit b6b5d549089423e3fbe387f63467d052b956f3f7
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Tue May 9 20:14:03 2017 +0200

    Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels
    
            gcc/
            * tree-parloops.c (create_parallel_loop): Set "oacc kernels
            parallelized" attribute for parallelized OpenACC kernels.
            * omp-offload.c (execute_oacc_device_lower): Use it.
            gcc/testsuite/
            * c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
            * c-c++-common/goacc/classify-kernels.c: Likewise.
            * c-c++-common/goacc/kernels-counter-vars-function-scope.c:
            Likewise.
            * c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
            * c-c++-common/goacc/kernels-double-reduction.c: Likewise.
            * c-c++-common/goacc/kernels-loop-2.c: Likewise.
            * c-c++-common/goacc/kernels-loop-3.c: Likewise.
            * c-c++-common/goacc/kernels-loop-g.c: Likewise.
            * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
            * c-c++-common/goacc/kernels-loop-n.c: Likewise.
            * c-c++-common/goacc/kernels-loop-nest.c: Likewise.
            * c-c++-common/goacc/kernels-loop.c: Likewise.
            * c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
            * c-c++-common/goacc/kernels-reduction.c: Likewise.
            * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
            * gfortran.dg/goacc/classify-kernels.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop.f95: Likewise.
---
 gcc/omp-offload.c                                  | 24 ++++++++++++++++++----
 .../goacc/classify-kernels-unparallelized.c        |  2 +-
 .../c-c++-common/goacc/classify-kernels.c          |  6 +++---
 .../goacc/kernels-counter-vars-function-scope.c    |  3 +--
 .../goacc/kernels-double-reduction-n.c             |  3 +--
 .../c-c++-common/goacc/kernels-double-reduction.c  |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c  |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c  |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c  |  3 +--
 .../c-c++-common/goacc/kernels-loop-mod-not-zero.c |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c  |  3 +--
 .../c-c++-common/goacc/kernels-loop-nest.c         |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop.c    |  3 +--
 .../c-c++-common/goacc/kernels-one-counter-var.c   |  3 +--
 .../c-c++-common/goacc/kernels-reduction.c         |  3 +--
 .../goacc/classify-kernels-unparallelized.f95      |  2 +-
 .../gfortran.dg/goacc/classify-kernels.f95         |  6 +++---
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 |  3 +--
 .../gfortran.dg/goacc/kernels-loop-data-2.f95      |  3 +--
 .../goacc/kernels-loop-data-enter-exit-2.f95       |  3 +--
 .../goacc/kernels-loop-data-enter-exit.f95         |  3 +--
 .../gfortran.dg/goacc/kernels-loop-data-update.f95 |  3 +--
 .../gfortran.dg/goacc/kernels-loop-data.f95        |  3 +--
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 |  5 ++---
 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95   |  3 +--
 gcc/tree-parloops.c                                | 16 ++++++++-------
 26 files changed, 58 insertions(+), 60 deletions(-)

diff --git gcc/omp-offload.c gcc/omp-offload.c
index 5e1eac4..e954ee9 100644
--- gcc/omp-offload.c
+++ gcc/omp-offload.c
@@ -1445,6 +1445,13 @@ execute_oacc_device_lower ()
       flag_openacc_dims = (char *)&flag_openacc_dims;
     }
 
+  bool is_oacc_kernels
+    = (lookup_attribute ("oacc kernels",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+  bool is_oacc_kernels_parallelized
+    = (lookup_attribute ("oacc kernels parallelized",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+
   /* Discover, partition and process the loops.  */
   oacc_loop *loops = oacc_loop_discovery ();
   int fn_level = oacc_fn_attrib_level (attrs);
@@ -1454,17 +1461,26 @@ execute_oacc_device_lower ()
       if (fn_level >= 0)
 	fprintf (dump_file, "Function is OpenACC routine level %d\n",
 		 fn_level);
-      else if (lookup_attribute ("oacc kernels",
-				 DECL_ATTRIBUTES (current_function_decl)))
-	fprintf (dump_file, "Function is OpenACC kernels offload\n");
+      else if (is_oacc_kernels)
+	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
+		 (is_oacc_kernels_parallelized
+		  ? "parallelized" : "unparallelized"));
       else
 	fprintf (dump_file, "Function is OpenACC parallel offload\n");
     }
 
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
+  /* OpenACC kernels constructs are special: they currently don't use the
+     generic oacc_loop infrastructure and attribute/dimension processing.  */
+  if (is_oacc_kernels && is_oacc_kernels_parallelized)
+    {
+      /* Parallelized OpenACC kernels constructs use gang parallelism.  See
+	 also tree-parloops.c:create_parallel_loop.  */
+      used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
+    }
+
   int dims[GOMP_DIM_MAX];
-
   oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
 
   if (dump_file)
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
index 70ff428..626f6b4 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -34,6 +34,6 @@ void KERNELS ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
index c8b0fda..95037e6 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -25,11 +25,11 @@ void KERNELS ()
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can be parallelized.
    { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
index 17f240e..c475333 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
@@ -45,9 +45,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
index 750f576..27ea2e9 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
@@ -27,10 +27,9 @@ foo (unsigned int n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
index df60d6a..0841e90 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
@@ -27,10 +27,9 @@ foo (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
index 913d91f..acef6a1 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
@@ -59,11 +59,10 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
index 1822d2a..75e2bb7 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
@@ -39,9 +39,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
index e946319..73b469d 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
@@ -7,9 +7,8 @@
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
index 9b63b45..5592623 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
@@ -43,9 +43,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
index 279f797..e86be1b 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
@@ -46,9 +46,8 @@ foo (COUNTERTYPE n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
index db1071f..2b0e186 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
@@ -30,9 +30,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop.c
index abf7a3c..9619d53 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop.c
@@ -46,9 +46,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
index 95f4817..69539b2 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
@@ -44,9 +44,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
index 6f5a418..4a18272 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
@@ -26,9 +26,8 @@ foo (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
index 9887d35..4b282ca 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -36,6 +36,6 @@ end program main
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
index 69c89a9..da025c1 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -27,11 +27,11 @@ end program main
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
index 865f7a6..516aede 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
@@ -34,11 +34,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
index c9f3a62..ff3788a 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
@@ -40,11 +40,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
index 3361607..60a5c96 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
@@ -40,11 +40,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
index 5ba56fb..ce04749 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
@@ -38,11 +38,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
index a622a96..d2de138 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
@@ -38,10 +38,9 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
index 4ec2ac3..92872b2 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
@@ -38,11 +38,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
index 409fe6f..079712f2 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
@@ -32,10 +32,9 @@ end module test
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! TODO, PR70545.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function __test_MOD_foo._omp_fn.0 " 1 "optimized" } }
-
-! TODO, PR70545.
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" { xfail *-*-* } } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
index ae2cac6..cc9a3a9 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
@@ -30,9 +30,8 @@ end program main
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } }
diff --git gcc/tree-parloops.c gcc/tree-parloops.c
index 6ce9d84..f826154 100644
--- gcc/tree-parloops.c
+++ gcc/tree-parloops.c
@@ -2040,19 +2040,20 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
   edge exit, nexit, guard, end, e;
 
-  /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
   if (oacc_kernels_p)
     {
       gcc_checking_assert (lookup_attribute ("oacc kernels",
 					     DECL_ATTRIBUTES (cfun->decl)));
-
-      tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
-      OMP_CLAUSE_NUM_GANGS_EXPR (clause)
-	= build_int_cst (integer_type_node, n_threads);
-      oacc_set_fn_attrib (cfun->decl, clause, NULL);
+      /* Indicate to later processing that this is a parallelized OpenACC
+	 kernels construct.  */
+      DECL_ATTRIBUTES (cfun->decl)
+	= tree_cons (get_identifier ("oacc kernels parallelized"),
+		     NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
     }
   else
     {
+      /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
+
       basic_block bb = loop_preheader_edge (loop)->src;
       basic_block paral_bb = single_pred (bb);
       gsi = gsi_last_bb (paral_bb);
@@ -2154,7 +2155,8 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
 
   /* Emit GIMPLE_OMP_FOR.  */
   if (oacc_kernels_p)
-    /* In combination with the NUM_GANGS on the parallel.  */
+    /* Parallelized OpenACC kernels constructs use gang parallelism.  See also
+       omp-offload.c:execute_oacc_device_lower.  */
     t = build_omp_clause (loc, OMP_CLAUSE_GANG);
   else
     {


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Test cases to check OpenACC offloaded function's attributes and classification
  2017-05-08 17:05         ` Thomas Schwinge
@ 2017-05-10 15:53           ` Jakub Jelinek
  2017-05-12  9:09             ` Thomas Schwinge
  0 siblings, 1 reply; 20+ messages in thread
From: Jakub Jelinek @ 2017-05-10 15:53 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: gcc-patches

On Mon, May 08, 2017 at 07:02:15PM +0200, Thomas Schwinge wrote:
> Hi!
> 
> Ping.
> 
> On Thu, 4 Aug 2016 16:06:10 +0200, I wrote:
> > Ping.
> > 
> > On Wed, 27 Jul 2016 10:59:02 +0200, I wrote:
> > > OK for trunk?
> 
> (In the mean time, I also added some more testing.)
> 
> commit b7d61270dfc581a6ea130f7a4fa7506a0a5762d8
> Author: Thomas Schwinge <thomas@codesourcery.com>
> Date:   Mon May 8 18:22:50 2017 +0200
> 
>     Test cases to check OpenACC offloaded function's attributes and classification
>     
>             gcc/testsuite/
>             * c-c++-common/goacc/classify-kernels-unparallelized.c: New file.
>             * c-c++-common/goacc/classify-kernels.c: Likewise.
>             * c-c++-common/goacc/classify-parallel.c: Likewise.
>             * c-c++-common/goacc/classify-routine.c: Likewise.
>             * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
>             * gfortran.dg/goacc/classify-kernels.f95: Likewise.
>             * gfortran.dg/goacc/classify-parallel.f95: Likewise.
>             * gfortran.dg/goacc/classify-routine.f95: Likewise.

Dunno if it isn't too fragile, but if you are willing to maintain it, ok.

	Jakub

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Use "oacc kernels" attribute for OpenACC kernels
  2017-05-08 19:29         ` Thomas Schwinge
  2017-05-09 20:59           ` Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels (was: Use "oacc kernels" attribute for OpenACC kernels) Thomas Schwinge
@ 2017-05-10 16:30           ` Jakub Jelinek
  2017-05-12  9:22             ` Thomas Schwinge
  1 sibling, 1 reply; 20+ messages in thread
From: Jakub Jelinek @ 2017-05-10 16:30 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: GCC Patches, Tom de Vries

On Mon, May 08, 2017 at 09:29:28PM +0200, Thomas Schwinge wrote:
> commit fac5c3214f58812881635d3fb1e1751446d4b660
> Author: Thomas Schwinge <thomas@codesourcery.com>
> Date:   Mon May 8 21:24:46 2017 +0200
> 
>     Use "oacc kernels" attribute for OpenACC kernels
>     
>             gcc/
>             * omp-expand.c (expand_omp_target)
>             <GF_OMP_TARGET_KIND_OACC_KERNELS>: Set "oacc kernels" attribute.

I think
	* omp-expand.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
	Set "oacc kernels" attribute.
fits better.

>             * omp-general.c (oacc_set_fn_attrib): Remove is_kernel formal
>             parameter.  Adjust all users.
>             (oacc_fn_attrib_kernels_p): Remove function.
>             (execute_oacc_device_lower): Look for "oacc kernels" attribute
>             instead of calling oacc_fn_attrib_kernels_p.
>             * tree-ssa-loop.c (gate_oacc_kernels): Likewise.
>             * tree-parloops.c (create_parallel_loop): If oacc_kernels_p,
>             assert "oacc kernels" attribute is set.
>             gcc/testsuite/
>             * c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
>             * c-c++-common/goacc/classify-kernels.c: Likewise.
>             * c-c++-common/goacc/classify-parallel.c: Likewise.
>             * c-c++-common/goacc/classify-routine.c: Likewise.
>             * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
>             * gfortran.dg/goacc/classify-kernels.f95: Likewise.
>             * gfortran.dg/goacc/classify-parallel.f95: Likewise.
>             * gfortran.dg/goacc/classify-routine.f95: Likewise.

> @@ -7451,7 +7457,7 @@ expand_omp_target (struct omp_region *region)
>        break;
>      case BUILT_IN_GOACC_PARALLEL:
>        {
> -	oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
> +	oacc_set_fn_attrib (child_fn, clauses, &args);
>  	tagging = true;
>        }
>        /* FALLTHRU */

The {}s aren't needed around this, could you drop them?

> +	pos = tree_cons (purpose[ix],
> +			 build_int_cst (integer_type_node, dims[ix]),
> +			 pos);

pos); would fit on the earlier line.

Ok with those changes.

	Jakub

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels (was: Use "oacc kernels" attribute for OpenACC kernels)
  2017-05-09 20:59           ` Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels (was: Use "oacc kernels" attribute for OpenACC kernels) Thomas Schwinge
@ 2017-05-10 16:32             ` Jakub Jelinek
  2017-05-12  9:24               ` Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels Thomas Schwinge
  0 siblings, 1 reply; 20+ messages in thread
From: Jakub Jelinek @ 2017-05-10 16:32 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: GCC Patches, Tom de Vries

On Tue, May 09, 2017 at 10:57:34PM +0200, Thomas Schwinge wrote:
> commit b6b5d549089423e3fbe387f63467d052b956f3f7
> Author: Thomas Schwinge <thomas@codesourcery.com>
> Date:   Tue May 9 20:14:03 2017 +0200
> 
>     Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels
>     
>             gcc/
>             * tree-parloops.c (create_parallel_loop): Set "oacc kernels
>             parallelized" attribute for parallelized OpenACC kernels.
>             * omp-offload.c (execute_oacc_device_lower): Use it.
>             gcc/testsuite/
>             * c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
>             * c-c++-common/goacc/classify-kernels.c: Likewise.
>             * c-c++-common/goacc/kernels-counter-vars-function-scope.c:
>             Likewise.
>             * c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
>             * c-c++-common/goacc/kernels-double-reduction.c: Likewise.
>             * c-c++-common/goacc/kernels-loop-2.c: Likewise.
>             * c-c++-common/goacc/kernels-loop-3.c: Likewise.
>             * c-c++-common/goacc/kernels-loop-g.c: Likewise.
>             * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
>             * c-c++-common/goacc/kernels-loop-n.c: Likewise.
>             * c-c++-common/goacc/kernels-loop-nest.c: Likewise.
>             * c-c++-common/goacc/kernels-loop.c: Likewise.
>             * c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
>             * c-c++-common/goacc/kernels-reduction.c: Likewise.
>             * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
>             * gfortran.dg/goacc/classify-kernels.f95: Likewise.
>             * gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
>             * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
>             * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
>             * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
>             * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
>             * gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
>             * gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
>             * gfortran.dg/goacc/kernels-loop.f95: Likewise.

Ok.

	Jakub

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Test cases to check OpenACC offloaded function's attributes and classification
  2017-05-10 15:53           ` Jakub Jelinek
@ 2017-05-12  9:09             ` Thomas Schwinge
  2022-03-04 13:19               ` Update 'c-c++-common/goacc/classify-*', 'gfortran.dg/goacc/classify-*' Thomas Schwinge
  0 siblings, 1 reply; 20+ messages in thread
From: Thomas Schwinge @ 2017-05-12  9:09 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches

Hi!

On Wed, 10 May 2017 17:49:48 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, May 08, 2017 at 07:02:15PM +0200, Thomas Schwinge wrote:
> > On Thu, 4 Aug 2016 16:06:10 +0200, I wrote:
> > > On Wed, 27 Jul 2016 10:59:02 +0200, I wrote:
> > > > OK for trunk?
> > 
> > (In the mean time, I also added some more testing.)

> >     Test cases to check OpenACC offloaded function's attributes and classification

> Dunno if it isn't too fragile, but if you are willing to maintain it, ok.

Sure.  (Why would I propose it otherwise?)

Committed to trunk in r247953:

commit 692b887e5afc026d8217f0654896f6777edbf7a7
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri May 12 08:42:31 2017 +0000

    Test cases to check OpenACC offloaded function's attributes and classification
    
            gcc/testsuite/
            * c-c++-common/goacc/classify-kernels-unparallelized.c: New file.
            * c-c++-common/goacc/classify-kernels.c: Likewise.
            * c-c++-common/goacc/classify-parallel.c: Likewise.
            * c-c++-common/goacc/classify-routine.c: Likewise.
            * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
            * gfortran.dg/goacc/classify-kernels.f95: Likewise.
            * gfortran.dg/goacc/classify-parallel.f95: Likewise.
            * gfortran.dg/goacc/classify-routine.f95: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@247953 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/testsuite/ChangeLog                            | 11 ++++++
 .../goacc/classify-kernels-unparallelized.c        | 39 ++++++++++++++++++++
 .../c-c++-common/goacc/classify-kernels.c          | 35 ++++++++++++++++++
 .../c-c++-common/goacc/classify-parallel.c         | 28 +++++++++++++++
 .../c-c++-common/goacc/classify-routine.c          | 30 ++++++++++++++++
 .../goacc/classify-kernels-unparallelized.f95      | 41 ++++++++++++++++++++++
 .../gfortran.dg/goacc/classify-kernels.f95         | 37 +++++++++++++++++++
 .../gfortran.dg/goacc/classify-parallel.f95        | 30 ++++++++++++++++
 .../gfortran.dg/goacc/classify-routine.f95         | 29 +++++++++++++++
 9 files changed, 280 insertions(+)

diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog
index 3553c99..5ed40a5 100644
--- gcc/testsuite/ChangeLog
+++ gcc/testsuite/ChangeLog
@@ -1,3 +1,14 @@
+2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* c-c++-common/goacc/classify-kernels-unparallelized.c: New file.
+	* c-c++-common/goacc/classify-kernels.c: Likewise.
+	* c-c++-common/goacc/classify-parallel.c: Likewise.
+	* c-c++-common/goacc/classify-routine.c: Likewise.
+	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
+	* gfortran.dg/goacc/classify-kernels.f95: Likewise.
+	* gfortran.dg/goacc/classify-parallel.f95: Likewise.
+	* gfortran.dg/goacc/classify-routine.f95: Likewise.
+
 2017-05-11  Nathan Sidwell  <nathan@acm.org>
 
 	* lib/gcc-dg.exp (schedule-cleanups): Add lang dump capability.
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
new file mode 100644
index 0000000..a76351c
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -0,0 +1,39 @@
+/* Check offloaded function's attributes and classification for unparallelized
+   OpenACC kernels.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-parloops1-all" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+/* An "extern"al mapping of loop iterations/array indices makes the loop
+   unparallelizable.  */
+extern unsigned int f (unsigned int);
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[f (i)] + b[f (i)];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels construct is analyzed, and that it
+   can't be parallelized.
+   { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
new file mode 100644
index 0000000..199a73e
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -0,0 +1,35 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   kernels.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-parloops1-all" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels construct is analyzed, and that it
+   can be parallelized.
+   { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-parallel.c gcc/testsuite/c-c++-common/goacc/classify-parallel.c
new file mode 100644
index 0000000..9d48c1b
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-parallel.c
@@ -0,0 +1,28 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   parallel.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void PARALLEL ()
+{
+#pragma acc parallel loop copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-routine.c gcc/testsuite/c-c++-common/goacc/classify-routine.c
new file mode 100644
index 0000000..72b02c2
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-routine.c
@@ -0,0 +1,30 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   routine.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+#pragma acc declare copyin (a, b) create (c)
+
+#pragma acc routine worker
+void ROUTINE ()
+{
+#pragma acc loop
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
new file mode 100644
index 0000000..fd46d0d
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -0,0 +1,41 @@
+! Check offloaded function's attributes and classification for unparallelized
+! OpenACC kernels.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  ! An "external" mapping of loop iterations/array indices makes the loop
+  ! unparallelizable.
+  integer, external :: f
+
+  call setup(a, b)
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(f (i)) + b(f (i))
+  end do
+  !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels construct is analyzed, and that it
+! can't be parallelized.
+! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
new file mode 100644
index 0000000..053d27c
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -0,0 +1,37 @@
+! Check offloaded function's attributes and classification for OpenACC
+! kernels.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  call setup(a, b)
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+  !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels construct is analyzed, and that it
+! can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95 gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
new file mode 100644
index 0000000..087ff48
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
@@ -0,0 +1,30 @@
+! Check offloaded function's attributes and classification for OpenACC
+! parallel.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  call setup(a, b)
+
+  !$acc parallel loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+  !$acc end parallel loop
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
new file mode 100644
index 0000000..319d767
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
@@ -0,0 +1,29 @@
+! Check offloaded function's attributes and classification for OpenACC
+! routine.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+subroutine ROUTINE
+  !$acc routine worker
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  call setup(a, b)
+
+  !$acc loop
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+end subroutine ROUTINE
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 0, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 0, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } }

Committed to gomp-4_0-branch in r247954:

commit 5ed11508df8ec3803667636117834b85005f5990
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri May 12 08:43:45 2017 +0000

    Test cases to check OpenACC offloaded function's attributes and classification
    
            gcc/testsuite/
            * c-c++-common/goacc/classify-kernels-unparallelized.c: New file.
            * c-c++-common/goacc/classify-kernels.c: Likewise.
            * c-c++-common/goacc/classify-parallel.c: Likewise.
            * c-c++-common/goacc/classify-routine.c: Likewise.
            * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
            * gfortran.dg/goacc/classify-kernels.f95: Likewise.
            * gfortran.dg/goacc/classify-parallel.f95: Likewise.
            * gfortran.dg/goacc/classify-routine.f95: Likewise.
    
    trunk r247953
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@247954 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/testsuite/ChangeLog.gomp                       | 11 ++++++
 .../goacc/classify-kernels-unparallelized.c        | 39 ++++++++++++++++++++
 .../c-c++-common/goacc/classify-kernels.c          | 35 ++++++++++++++++++
 .../c-c++-common/goacc/classify-parallel.c         | 28 +++++++++++++++
 .../c-c++-common/goacc/classify-routine.c          | 30 ++++++++++++++++
 .../goacc/classify-kernels-unparallelized.f95      | 41 ++++++++++++++++++++++
 .../gfortran.dg/goacc/classify-kernels.f95         | 37 +++++++++++++++++++
 .../gfortran.dg/goacc/classify-parallel.f95        | 30 ++++++++++++++++
 .../gfortran.dg/goacc/classify-routine.f95         | 29 +++++++++++++++
 9 files changed, 280 insertions(+)

diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index 952b101..b5dd1a4 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,3 +1,14 @@
+2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* c-c++-common/goacc/classify-kernels-unparallelized.c: New file.
+	* c-c++-common/goacc/classify-kernels.c: Likewise.
+	* c-c++-common/goacc/classify-parallel.c: Likewise.
+	* c-c++-common/goacc/classify-routine.c: Likewise.
+	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
+	* gfortran.dg/goacc/classify-kernels.f95: Likewise.
+	* gfortran.dg/goacc/classify-parallel.f95: Likewise.
+	* gfortran.dg/goacc/classify-routine.f95: Likewise.
+
 2017-05-09  Cesar Philippidis  <cesar@codesourcery.com>
 
 	* c-c++-common/goacc/update-if_present-1.c: Update test case.
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
new file mode 100644
index 0000000..a76351c
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -0,0 +1,39 @@
+/* Check offloaded function's attributes and classification for unparallelized
+   OpenACC kernels.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-parloops1-all" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+/* An "extern"al mapping of loop iterations/array indices makes the loop
+   unparallelizable.  */
+extern unsigned int f (unsigned int);
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[f (i)] + b[f (i)];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels construct is analyzed, and that it
+   can't be parallelized.
+   { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
new file mode 100644
index 0000000..199a73e
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -0,0 +1,35 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   kernels.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-parloops1-all" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels construct is analyzed, and that it
+   can be parallelized.
+   { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-parallel.c gcc/testsuite/c-c++-common/goacc/classify-parallel.c
new file mode 100644
index 0000000..9d48c1b
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-parallel.c
@@ -0,0 +1,28 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   parallel.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void PARALLEL ()
+{
+#pragma acc parallel loop copyin (a[0:N], b[0:N]) copyout (c[0:N])
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-routine.c gcc/testsuite/c-c++-common/goacc/classify-routine.c
new file mode 100644
index 0000000..d37fb4a
--- /dev/null
+++ gcc/testsuite/c-c++-common/goacc/classify-routine.c
@@ -0,0 +1,30 @@
+/* Check offloaded function's attributes and classification for OpenACC
+   routine.  */
+
+/* { dg-additional-options "-O2" }
+   { dg-additional-options "-fdump-tree-ompexp" }
+   { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+#pragma acc declare copyin (a, b) create (c)
+
+#pragma acc routine worker
+void ROUTINE ()
+{
+#pragma acc loop
+  for (unsigned int i = 0; i < N; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target \\(worker\\), oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+   always be 1 x 1 x 1 for non-offloading compilation).
+   { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target \\(worker\\), oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
new file mode 100644
index 0000000..fd46d0d
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -0,0 +1,41 @@
+! Check offloaded function's attributes and classification for unparallelized
+! OpenACC kernels.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  ! An "external" mapping of loop iterations/array indices makes the loop
+  ! unparallelizable.
+  integer, external :: f
+
+  call setup(a, b)
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(f (i)) + b(f (i))
+  end do
+  !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels construct is analyzed, and that it
+! can't be parallelized.
+! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
new file mode 100644
index 0000000..053d27c
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -0,0 +1,37 @@
+! Check offloaded function's attributes and classification for OpenACC
+! kernels.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  call setup(a, b)
+
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+  !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels construct is analyzed, and that it
+! can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95 gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
new file mode 100644
index 0000000..087ff48
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
@@ -0,0 +1,30 @@
+! Check offloaded function's attributes and classification for OpenACC
+! parallel.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+  implicit none
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  call setup(a, b)
+
+  !$acc parallel loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1))
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+  !$acc end parallel loop
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
new file mode 100644
index 0000000..dd71a84
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
@@ -0,0 +1,29 @@
+! Check offloaded function's attributes and classification for OpenACC
+! routine.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+subroutine ROUTINE
+  !$acc routine worker
+  integer, parameter :: n = 1024
+  integer, dimension (0:n-1) :: a, b, c
+  integer :: i
+
+  call setup(a, b)
+
+  !$acc loop
+  do i = 0, n - 1
+     c(i) = a(i) + b(i)
+  end do
+end subroutine ROUTINE
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } }


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Use "oacc kernels" attribute for OpenACC kernels
  2017-05-10 16:30           ` Use "oacc kernels" attribute for " Jakub Jelinek
@ 2017-05-12  9:22             ` Thomas Schwinge
  0 siblings, 0 replies; 20+ messages in thread
From: Thomas Schwinge @ 2017-05-12  9:22 UTC (permalink / raw)
  To: Jakub Jelinek, GCC Patches; +Cc: Tom de Vries

Hi!

On Wed, 10 May 2017 18:28:38 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, May 08, 2017 at 09:29:28PM +0200, Thomas Schwinge wrote:
> >     Use "oacc kernels" attribute for OpenACC kernels

> >             * omp-expand.c (expand_omp_target)
> >             <GF_OMP_TARGET_KIND_OACC_KERNELS>: Set "oacc kernels" attribute.
> 
> I think
> 	* omp-expand.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
> 	Set "oacc kernels" attribute.
> fits better.

So you overrule how Emacs Change Log mode's fill-paragraph things this
should be formatted.  ;-)

> > @@ -7451,7 +7457,7 @@ expand_omp_target (struct omp_region *region)
> >        break;
> >      case BUILT_IN_GOACC_PARALLEL:
> >        {
> > -	oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
> > +	oacc_set_fn_attrib (child_fn, clauses, &args);
> >  	tagging = true;
> >        }
> >        /* FALLTHRU */
> 
> The {}s aren't needed around this, could you drop them?

Done, but aren't such cleanups usually requested to be done separately of
actual code changes?

> > +	pos = tree_cons (purpose[ix],
> > +			 build_int_cst (integer_type_node, dims[ix]),
> > +			 pos);
> 
> pos); would fit on the earlier line.

As already split over more than one line, I thought it was clearer if the
"chain" parameter was on its own line -- but you get to overrule me
there, too.  ;-)

> Ok with those changes.

Thanks; committed to trunk in r247955:

commit 1d3ea8fcacec29c9a89d9d0a505ed5fbdd5ad73e
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri May 12 09:02:55 2017 +0000

    Use "oacc kernels" attribute for OpenACC kernels
    
            gcc/
            * omp-expand.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
            Set "oacc kernels" attribute.
            * omp-general.c (oacc_set_fn_attrib): Remove is_kernel formal
            parameter.  Adjust all users.
            (oacc_fn_attrib_kernels_p): Remove function.
            * omp-offload.c (execute_oacc_device_lower): Look for "oacc
            kernels" attribute instead of calling oacc_fn_attrib_kernels_p.
            * tree-ssa-loop.c (gate_oacc_kernels): Likewise.
            * tree-parloops.c (create_parallel_loop): If oacc_kernels_p,
            assert "oacc kernels" attribute is set.
            gcc/testsuite/
            * c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
            * c-c++-common/goacc/classify-kernels.c: Likewise.
            * c-c++-common/goacc/classify-parallel.c: Likewise.
            * c-c++-common/goacc/classify-routine.c: Likewise.
            * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
            * gfortran.dg/goacc/classify-kernels.f95: Likewise.
            * gfortran.dg/goacc/classify-parallel.f95: Likewise.
            * gfortran.dg/goacc/classify-routine.f95: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@247955 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                                      | 13 ++++++++++++
 gcc/omp-expand.c                                   | 20 ++++++++++--------
 gcc/omp-general.c                                  | 18 ++--------------
 gcc/omp-general.h                                  |  4 +---
 gcc/omp-offload.c                                  | 24 +++++++++++-----------
 gcc/testsuite/ChangeLog                            |  9 ++++++++
 .../goacc/classify-kernels-unparallelized.c        |  8 ++++----
 .../c-c++-common/goacc/classify-kernels.c          |  8 ++++----
 .../c-c++-common/goacc/classify-parallel.c         |  2 +-
 .../c-c++-common/goacc/classify-routine.c          |  2 +-
 .../goacc/classify-kernels-unparallelized.f95      |  8 ++++----
 .../gfortran.dg/goacc/classify-kernels.f95         |  8 ++++----
 .../gfortran.dg/goacc/classify-parallel.f95        |  2 +-
 .../gfortran.dg/goacc/classify-routine.f95         |  2 +-
 gcc/tree-parloops.c                                |  5 ++++-
 gcc/tree-ssa-loop.c                                |  5 +----
 16 files changed, 74 insertions(+), 64 deletions(-)

diff --git gcc/ChangeLog gcc/ChangeLog
index e1f8cf5..aeb22df 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,3 +1,16 @@
+2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* omp-expand.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
+	Set "oacc kernels" attribute.
+	* omp-general.c (oacc_set_fn_attrib): Remove is_kernel formal
+	parameter.  Adjust all users.
+	(oacc_fn_attrib_kernels_p): Remove function.
+	* omp-offload.c (execute_oacc_device_lower): Look for "oacc
+	kernels" attribute instead of calling oacc_fn_attrib_kernels_p.
+	* tree-ssa-loop.c (gate_oacc_kernels): Likewise.
+	* tree-parloops.c (create_parallel_loop): If oacc_kernels_p,
+	assert "oacc kernels" attribute is set.
+
 2017-05-11  Carl Love  <cel@us.ibm.com>
 
 	* config/rs6000/rs6000-c: Add support for built-in functions
diff --git gcc/omp-expand.c gcc/omp-expand.c
index 5c48b78..7a7c747 100644
--- gcc/omp-expand.c
+++ gcc/omp-expand.c
@@ -7083,7 +7083,16 @@ expand_omp_target (struct omp_region *region)
   exit_bb = region->exit;
 
   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
-    mark_loops_in_oacc_kernels_region (region->entry, region->exit);
+    {
+      mark_loops_in_oacc_kernels_region (region->entry, region->exit);
+
+      /* Further down, both OpenACC kernels and OpenACC parallel constructs
+	 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
+	 two, there is an "oacc kernels" attribute set for OpenACC kernels.  */
+      DECL_ATTRIBUTES (child_fn)
+	= tree_cons (get_identifier ("oacc kernels"),
+		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
+    }
 
   if (offloaded)
     {
@@ -7266,7 +7275,6 @@ expand_omp_target (struct omp_region *region)
   enum built_in_function start_ix;
   location_t clause_loc;
   unsigned int flags_i = 0;
-  bool oacc_kernels_p = false;
 
   switch (gimple_omp_target_kind (entry_stmt))
     {
@@ -7287,8 +7295,6 @@ expand_omp_target (struct omp_region *region)
       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
       break;
     case GF_OMP_TARGET_KIND_OACC_KERNELS:
-      oacc_kernels_p = true;
-      /* FALLTHROUGH */
     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
       start_ix = BUILT_IN_GOACC_PARALLEL;
       break;
@@ -7450,10 +7456,8 @@ expand_omp_target (struct omp_region *region)
 	args.quick_push (get_target_arguments (&gsi, entry_stmt));
       break;
     case BUILT_IN_GOACC_PARALLEL:
-      {
-	oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
-	tagging = true;
-      }
+      oacc_set_fn_attrib (child_fn, clauses, &args);
+      tagging = true;
       /* FALLTHRU */
     case BUILT_IN_GOACC_ENTER_EXIT_DATA:
     case BUILT_IN_GOACC_UPDATE:
diff --git gcc/omp-general.c gcc/omp-general.c
index 3f9aec8..9a5ed88 100644
--- gcc/omp-general.c
+++ gcc/omp-general.c
@@ -515,11 +515,10 @@ oacc_replace_fn_attrib (tree fn, tree dims)
 
 /* Scan CLAUSES for launch dimensions and attach them to the oacc
    function attribute.  Push any that are non-constant onto the ARGS
-   list, along with an appropriate GOMP_LAUNCH_DIM tag.  IS_KERNEL is
-   true, if these are for a kernels region offload function.  */
+   list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
 
 void
-oacc_set_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
+oacc_set_fn_attrib (tree fn, tree clauses, vec<tree> *args)
 {
   /* Must match GOMP_DIM ordering.  */
   static const omp_clause_code ids[]
@@ -545,9 +544,6 @@ oacc_set_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
 	  non_const |= GOMP_DIM_MASK (ix);
 	}
       attr = tree_cons (NULL_TREE, dim, attr);
-      /* Note kernelness with TREE_PUBLIC.  */
-      if (is_kernel)
-	TREE_PUBLIC (attr) = 1;
     }
 
   oacc_replace_fn_attrib (fn, attr);
@@ -616,16 +612,6 @@ oacc_get_fn_attrib (tree fn)
   return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
 }
 
-/* Return true if this oacc fn attrib is for a kernels offload
-   region.  We use the TREE_PUBLIC flag of each dimension -- only
-   need to check the first one.  */
-
-bool
-oacc_fn_attrib_kernels_p (tree attr)
-{
-  return TREE_PUBLIC (TREE_VALUE (attr));
-}
-
 /* Extract an oacc execution dimension from FN.  FN must be an
    offloaded function or routine that has already had its execution
    dimensions lowered to the target-specific values.  */
diff --git gcc/omp-general.h gcc/omp-general.h
index 3cf7fce..d28eb4b 100644
--- gcc/omp-general.h
+++ gcc/omp-general.h
@@ -82,11 +82,9 @@ extern int omp_max_vf (void);
 extern int omp_max_simt_vf (void);
 extern tree oacc_launch_pack (unsigned code, tree device, unsigned op);
 extern void oacc_replace_fn_attrib (tree fn, tree dims);
-extern void oacc_set_fn_attrib (tree fn, tree clauses, bool is_kernel,
-				vec<tree> *args);
+extern void oacc_set_fn_attrib (tree fn, tree clauses, vec<tree> *args);
 extern tree oacc_build_routine_dims (tree clauses);
 extern tree oacc_get_fn_attrib (tree fn);
-extern bool oacc_fn_attrib_kernels_p (tree attr);
 extern int oacc_get_fn_dim_size (tree fn, int axis);
 extern int oacc_get_ifn_dim_arg (const gimple *stmt);
 
diff --git gcc/omp-offload.c gcc/omp-offload.c
index beeeb71..d24f131 100644
--- gcc/omp-offload.c
+++ gcc/omp-offload.c
@@ -619,7 +619,6 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
   tree purpose[GOMP_DIM_MAX];
   unsigned ix;
   tree pos = TREE_VALUE (attrs);
-  bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
 
   /* Make sure the attribute creator attached the dimension
      information.  */
@@ -666,13 +665,8 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
       /* Replace the attribute with new values.  */
       pos = NULL_TREE;
       for (ix = GOMP_DIM_MAX; ix--;)
-	{
-	  pos = tree_cons (purpose[ix],
-			   build_int_cst (integer_type_node, dims[ix]),
-			   pos);
-	  if (is_kernel)
-	    TREE_PUBLIC (pos) = 1;
-	}
+	pos = tree_cons (purpose[ix],
+			 build_int_cst (integer_type_node, dims[ix]), pos);
       oacc_replace_fn_attrib (fn, pos);
     }
 }
@@ -1455,10 +1449,16 @@ execute_oacc_device_lower ()
   int fn_level = oacc_fn_attrib_level (attrs);
 
   if (dump_file)
-    fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
-	     ? "Function is kernels offload\n"
-	     : fn_level < 0 ? "Function is parallel offload\n"
-	     : "Function is routine level %d\n", fn_level);
+    {
+      if (fn_level >= 0)
+	fprintf (dump_file, "Function is OpenACC routine level %d\n",
+		 fn_level);
+      else if (lookup_attribute ("oacc kernels",
+				 DECL_ATTRIBUTES (current_function_decl)))
+	fprintf (dump_file, "Function is OpenACC kernels offload\n");
+      else
+	fprintf (dump_file, "Function is OpenACC parallel offload\n");
+    }
 
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog
index 5ed40a5..52865d3 100644
--- gcc/testsuite/ChangeLog
+++ gcc/testsuite/ChangeLog
@@ -1,5 +1,14 @@
 2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
 
+	* c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
+	* c-c++-common/goacc/classify-kernels.c: Likewise.
+	* c-c++-common/goacc/classify-parallel.c: Likewise.
+	* c-c++-common/goacc/classify-routine.c: Likewise.
+	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
+	* gfortran.dg/goacc/classify-kernels.f95: Likewise.
+	* gfortran.dg/goacc/classify-parallel.f95: Likewise.
+	* gfortran.dg/goacc/classify-routine.f95: Likewise.
+
 	* c-c++-common/goacc/classify-kernels-unparallelized.c: New file.
 	* c-c++-common/goacc/classify-kernels.c: Likewise.
 	* c-c++-common/goacc/classify-parallel.c: Likewise.
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
index a76351c..70ff428 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -24,16 +24,16 @@ void KERNELS ()
 }
 
 /* Check the offloaded function's attributes.
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
 
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can't be parallelized.
    { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
index 199a73e..c8b0fda 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -20,16 +20,16 @@ void KERNELS ()
 }
 
 /* Check the offloaded function's attributes.
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
 
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can be parallelized.
    { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-parallel.c gcc/testsuite/c-c++-common/goacc/classify-parallel.c
index 9d48c1b..4f97301 100644
--- gcc/testsuite/c-c++-common/goacc/classify-parallel.c
+++ gcc/testsuite/c-c++-common/goacc/classify-parallel.c
@@ -23,6 +23,6 @@ void PARALLEL ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-routine.c gcc/testsuite/c-c++-common/goacc/classify-routine.c
index 72b02c2..fd89fc1 100644
--- gcc/testsuite/c-c++-common/goacc/classify-routine.c
+++ gcc/testsuite/c-c++-common/goacc/classify-routine.c
@@ -25,6 +25,6 @@ void ROUTINE ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC routine level 1" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
index fd46d0d..9887d35 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -26,16 +26,16 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
 
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can't be parallelized.
 ! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
index 053d27c..69c89a9 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -22,16 +22,16 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
 
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95 gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
index 087ff48..e215c79 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
@@ -25,6 +25,6 @@ end program main
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
index 319d767..4ca4067 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
@@ -24,6 +24,6 @@ end subroutine ROUTINE
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC routine level 1" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 0, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/tree-parloops.c gcc/tree-parloops.c
index 7393011..6ce9d84 100644
--- gcc/tree-parloops.c
+++ gcc/tree-parloops.c
@@ -2043,10 +2043,13 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
   if (oacc_kernels_p)
     {
+      gcc_checking_assert (lookup_attribute ("oacc kernels",
+					     DECL_ATTRIBUTES (cfun->decl)));
+
       tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
       OMP_CLAUSE_NUM_GANGS_EXPR (clause)
 	= build_int_cst (integer_type_node, n_threads);
-      oacc_set_fn_attrib (cfun->decl, clause, true, NULL);
+      oacc_set_fn_attrib (cfun->decl, clause, NULL);
     }
   else
     {
diff --git gcc/tree-ssa-loop.c gcc/tree-ssa-loop.c
index 8b25b41..10c43f3 100644
--- gcc/tree-ssa-loop.c
+++ gcc/tree-ssa-loop.c
@@ -152,10 +152,7 @@ gate_oacc_kernels (function *fn)
   if (!flag_openacc)
     return false;
 
-  tree oacc_function_attr = oacc_get_fn_attrib (fn->decl);
-  if (oacc_function_attr == NULL_TREE)
-    return false;
-  if (!oacc_fn_attrib_kernels_p (oacc_function_attr))
+  if (!lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn->decl)))
     return false;
 
   struct loop *loop;

Committed to gomp-4_0-branch in r247956:

commit 01a98ef0ceb7afc15b7a97054bbff1db0ccf9fb4
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri May 12 09:03:37 2017 +0000

    Use "oacc kernels" attribute for OpenACC kernels
    
            gcc/
            * omp-low.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
            Set "oacc kernels" attribute.
            (set_oacc_fn_attrib): Remove is_kernel formal parameter.  Adjust
            all users.
            (oacc_fn_attrib_kernels_p): Remove function.
            (oacc_validate_dims, execute_oacc_device_lower): Look for "oacc
            kernels" attribute instead of calling oacc_fn_attrib_kernels_p.
            * tree-ssa-loop.c (gate_oacc_kernels): Likewise.
            * config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Likewise.
            * tree-parloops.c (create_parallel_loop): If oacc_kernels_p,
            assert "oacc kernels" attribute is set.
            gcc/testsuite/
            * c-c++-common/goacc/oaccdevlow-kernels.c: Adjust.
            * c-c++-common/goacc/oaccdevlow-parallel.c: Likewise.
            * c-c++-common/goacc/oaccdevlow-routine.c: Likewise.
            * gfortran.dg/goacc/oaccdevlow-kernels.f95: Likewise.
            * gfortran.dg/goacc/oaccdevlow-parallel.f95: Likewise.
            * gfortran.dg/goacc/oaccdevlow-routine.f95: Likewise.
    
    trunk r247955
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@247956 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                                 | 14 +++++
 gcc/config/nvptx/nvptx.c                           |  4 +-
 gcc/omp-low.c                                      | 65 ++++++++++------------
 gcc/omp-low.h                                      |  3 +-
 gcc/testsuite/ChangeLog.gomp                       |  7 +++
 .../goacc/classify-kernels-unparallelized.c        |  8 +--
 .../c-c++-common/goacc/classify-kernels.c          |  8 +--
 .../c-c++-common/goacc/classify-parallel.c         |  2 +-
 .../c-c++-common/goacc/classify-routine.c          |  2 +-
 .../goacc/classify-kernels-unparallelized.f95      |  8 +--
 .../gfortran.dg/goacc/classify-kernels.f95         |  8 +--
 .../gfortran.dg/goacc/classify-parallel.f95        |  2 +-
 .../gfortran.dg/goacc/classify-routine.f95         |  2 +-
 gcc/tree-parloops.c                                |  5 +-
 gcc/tree-ssa-loop.c                                |  5 +-
 15 files changed, 76 insertions(+), 67 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index 5139796..8cd46c3 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,17 @@
+2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* omp-low.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
+	Set "oacc kernels" attribute.
+	(set_oacc_fn_attrib): Remove is_kernel formal parameter.  Adjust
+	all users.
+	(oacc_fn_attrib_kernels_p): Remove function.
+	(oacc_validate_dims, execute_oacc_device_lower): Look for "oacc
+	kernels" attribute instead of calling oacc_fn_attrib_kernels_p.
+	* tree-ssa-loop.c (gate_oacc_kernels): Likewise.
+	* config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Likewise.
+	* tree-parloops.c (create_parallel_loop): If oacc_kernels_p,
+	assert "oacc kernels" attribute is set.
+
 2017-05-11  Cesar Philippidis  <cesar@codesourcery.com>
 
 	* gimplify.c (gomp_needs_data_present): Ensure that the
diff --git gcc/config/nvptx/nvptx.c gcc/config/nvptx/nvptx.c
index f790728..f3c1525 100644
--- gcc/config/nvptx/nvptx.c
+++ gcc/config/nvptx/nvptx.c
@@ -4284,10 +4284,8 @@ nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level)
 	 construct could not be parallelized, but only do that for -O2 and
 	 higher, as otherwise we're not expecting any parallelization to
 	 happen.  */
-      tree oacc_function_attr = get_oacc_fn_attrib (decl);
       if (optimize >= 2
-	  && oacc_function_attr
-	  && oacc_fn_attrib_kernels_p (oacc_function_attr))
+	  && lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (decl)))
 	{
 	  bool avoid_offloading_p = true;
 	  for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
diff --git gcc/omp-low.c gcc/omp-low.c
index a681800..18872f9 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -13136,11 +13136,10 @@ replace_oacc_fn_attrib (tree fn, tree dims)
 
 /* Scan CLAUSES for launch dimensions and attach them to the oacc
    function attribute.  Push any that are non-constant onto the ARGS
-   list, along with an appropriate GOMP_LAUNCH_DIM tag.  IS_KERNEL is
-   true, if these are for a kernels region offload function.  */
+   list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
 
 void
-set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
+set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
 {
   /* Must match GOMP_DIM ordering.  */
   static const omp_clause_code ids[]
@@ -13165,9 +13164,6 @@ set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
 	  non_const |= GOMP_DIM_MASK (ix);
 	}
       attr = tree_cons (NULL_TREE, dim, attr);
-      /* Note kernelness with TREE_PUBLIC.  */
-      if (is_kernel)
-	TREE_PUBLIC (attr) = 1;
     }
 
   replace_oacc_fn_attrib (fn, attr);
@@ -13423,16 +13419,6 @@ get_oacc_fn_attrib (tree fn)
   return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
 }
 
-/* Return true if this oacc fn attrib is for a kernels offload
-   region.  We use the TREE_PUBLIC flag of each dimension -- only
-   need to check the first one.  */
-
-bool
-oacc_fn_attrib_kernels_p (tree attr)
-{
-  return TREE_PUBLIC (TREE_VALUE (attr));
-}
-
 /* Return level at which oacc routine may spawn a partitioned loop, or
    -1 if it is not a routine (i.e. is an offload fn).  */
 
@@ -13815,7 +13801,16 @@ expand_omp_target (struct omp_region *region)
   exit_bb = region->exit;
 
   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
-    mark_loops_in_oacc_kernels_region (region->entry, region->exit);
+    {
+      mark_loops_in_oacc_kernels_region (region->entry, region->exit);
+
+      /* Further down, both OpenACC kernels and OpenACC parallel constructs
+	 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
+	 two, there is an "oacc kernels" attribute set for OpenACC kernels.  */
+      DECL_ATTRIBUTES (child_fn)
+	= tree_cons (get_identifier ("oacc kernels"),
+		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
+    }
 
   if (offloaded)
     {
@@ -13995,7 +13990,6 @@ expand_omp_target (struct omp_region *region)
   enum built_in_function start_ix;
   location_t clause_loc;
   unsigned int flags_i = 0;
-  bool oacc_kernels_p = false;
 
   switch (gimple_omp_target_kind (entry_stmt))
     {
@@ -14016,8 +14010,6 @@ expand_omp_target (struct omp_region *region)
       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
       break;
     case GF_OMP_TARGET_KIND_OACC_KERNELS:
-      oacc_kernels_p = true;
-      /* FALLTHROUGH */
     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
       start_ix = BUILT_IN_GOACC_PARALLEL;
       break;
@@ -14179,10 +14171,8 @@ expand_omp_target (struct omp_region *region)
 	args.quick_push (get_target_arguments (&gsi, entry_stmt));
       break;
     case BUILT_IN_GOACC_PARALLEL:
-      {
-	set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
-	tagging = true;
-      }
+      set_oacc_fn_attrib (child_fn, clauses, &args);
+      tagging = true;
       /* FALLTHRU */
     case BUILT_IN_GOACC_ENTER_EXIT_DATA:
     case BUILT_IN_GOACC_UPDATE:
@@ -20249,7 +20239,6 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
   tree purpose[GOMP_DIM_MAX];
   unsigned ix;
   tree pos = TREE_VALUE (attrs);
-  bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
 
   /* Make sure the attribute creator attached the dimension
      information.  */
@@ -20270,7 +20259,8 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
      and/or dimensions.  */
   check = false;
 #endif
-  if (!is_kernel && check)
+  if (check
+      && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn)))
     {
       static char const *const axes[] =
       /* Must be kept in sync with GOMP_DIM enumeration.  */
@@ -20326,13 +20316,8 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
       /* Replace the attribute with new values.  */
       pos = NULL_TREE;
       for (ix = GOMP_DIM_MAX; ix--;)
-	{
-	  pos = tree_cons (purpose[ix],
-			   build_int_cst (integer_type_node, dims[ix]),
-			   pos);
-	  if (is_kernel)
-	    TREE_PUBLIC (pos) = 1;
-	}
+	pos = tree_cons (purpose[ix],
+			 build_int_cst (integer_type_node, dims[ix]), pos);
       replace_oacc_fn_attrib (fn, pos);
     }
 }
@@ -21191,10 +21176,16 @@ execute_oacc_device_lower ()
   int fn_level = oacc_fn_attrib_level (attrs);
 
   if (dump_file)
-    fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
-	     ? "Function is kernels offload\n"
-	     : fn_level < 0 ? "Function is parallel offload\n"
-	     : "Function is routine level %d\n", fn_level);
+    {
+      if (fn_level >= 0)
+	fprintf (dump_file, "Function is OpenACC routine level %d\n",
+		 fn_level);
+      else if (lookup_attribute ("oacc kernels",
+				 DECL_ATTRIBUTES (current_function_decl)))
+	fprintf (dump_file, "Function is OpenACC kernels offload\n");
+      else
+	fprintf (dump_file, "Function is OpenACC parallel offload\n");
+    }
 
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
diff --git gcc/omp-low.h gcc/omp-low.h
index 2602a12..0ea5308 100644
--- gcc/omp-low.h
+++ gcc/omp-low.h
@@ -34,8 +34,7 @@ extern void replace_oacc_fn_attrib (tree, tree);
 extern int verify_oacc_routine_clauses (tree, tree *, location_t, const char *);
 extern tree build_oacc_routine_dims (tree);
 extern tree get_oacc_fn_attrib (tree);
-extern void set_oacc_fn_attrib (tree, tree, bool, vec<tree> *);
-extern bool oacc_fn_attrib_kernels_p (tree);
+extern void set_oacc_fn_attrib (tree, tree, vec<tree> *);
 extern int get_oacc_ifn_dim_arg (const gimple *);
 extern int get_oacc_fn_dim_size (tree, int);
 
diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index b5dd1a4..9f2b7be 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,5 +1,12 @@
 2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
 
+	* c-c++-common/goacc/oaccdevlow-kernels.c: Adjust.
+	* c-c++-common/goacc/oaccdevlow-parallel.c: Likewise.
+	* c-c++-common/goacc/oaccdevlow-routine.c: Likewise.
+	* gfortran.dg/goacc/oaccdevlow-kernels.f95: Likewise.
+	* gfortran.dg/goacc/oaccdevlow-parallel.f95: Likewise.
+	* gfortran.dg/goacc/oaccdevlow-routine.f95: Likewise.
+
 	* c-c++-common/goacc/classify-kernels-unparallelized.c: New file.
 	* c-c++-common/goacc/classify-kernels.c: Likewise.
 	* c-c++-common/goacc/classify-parallel.c: Likewise.
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
index a76351c..70ff428 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -24,16 +24,16 @@ void KERNELS ()
 }
 
 /* Check the offloaded function's attributes.
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
 
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can't be parallelized.
    { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
index 199a73e..c8b0fda 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -20,16 +20,16 @@ void KERNELS ()
 }
 
 /* Check the offloaded function's attributes.
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
 
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can be parallelized.
    { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-parallel.c gcc/testsuite/c-c++-common/goacc/classify-parallel.c
index 9d48c1b..4f97301 100644
--- gcc/testsuite/c-c++-common/goacc/classify-parallel.c
+++ gcc/testsuite/c-c++-common/goacc/classify-parallel.c
@@ -23,6 +23,6 @@ void PARALLEL ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-routine.c gcc/testsuite/c-c++-common/goacc/classify-routine.c
index d37fb4a..f54c394 100644
--- gcc/testsuite/c-c++-common/goacc/classify-routine.c
+++ gcc/testsuite/c-c++-common/goacc/classify-routine.c
@@ -25,6 +25,6 @@ void ROUTINE ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC routine level 1" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target \\(worker\\), oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
index fd46d0d..9887d35 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -26,16 +26,16 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
 
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can't be parallelized.
 ! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
index 053d27c..69c89a9 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -22,16 +22,16 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
 
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95 gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
index 087ff48..e215c79 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
@@ -25,6 +25,6 @@ end program main
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
index dd71a84..445ff9a 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
@@ -24,6 +24,6 @@ end subroutine ROUTINE
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is routine level 1" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC routine level 1" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0 1, 1 1, 1 1\\), omp declare target, oacc function \\(0 1, 1 0, 1 0\\)\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/tree-parloops.c gcc/tree-parloops.c
index 2e55b79..a94f7a2 100644
--- gcc/tree-parloops.c
+++ gcc/tree-parloops.c
@@ -2041,10 +2041,13 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
   if (oacc_kernels_p)
     {
+      gcc_checking_assert (lookup_attribute ("oacc kernels",
+					     DECL_ATTRIBUTES (cfun->decl)));
+
       tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
       OMP_CLAUSE_NUM_GANGS_EXPR (clause)
 	= build_int_cst (integer_type_node, n_threads);
-      set_oacc_fn_attrib (cfun->decl, clause, true, NULL);
+      set_oacc_fn_attrib (cfun->decl, clause, NULL);
     }
   else
     {
diff --git gcc/tree-ssa-loop.c gcc/tree-ssa-loop.c
index 06354e3..da9a806 100644
--- gcc/tree-ssa-loop.c
+++ gcc/tree-ssa-loop.c
@@ -151,10 +151,7 @@ gate_oacc_kernels (function *fn)
   if (!flag_openacc)
     return false;
 
-  tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
-  if (oacc_function_attr == NULL_TREE)
-    return false;
-  if (!oacc_fn_attrib_kernels_p (oacc_function_attr))
+  if (!lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn->decl)))
     return false;
 
   struct loop *loop;


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels
  2017-05-10 16:32             ` Jakub Jelinek
@ 2017-05-12  9:24               ` Thomas Schwinge
  0 siblings, 0 replies; 20+ messages in thread
From: Thomas Schwinge @ 2017-05-12  9:24 UTC (permalink / raw)
  To: Jakub Jelinek, GCC Patches; +Cc: Tom de Vries

Hi!

On Wed, 10 May 2017 18:30:54 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, May 09, 2017 at 10:57:34PM +0200, Thomas Schwinge wrote:
> >     Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels

> Ok.

Thanks.  Committed to trunk in r247957:

commit 5dd0c4e81e7a79afccfc936407affbdda2e3b737
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri May 12 09:18:34 2017 +0000

    [PR middle-end/69921] Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels
    
            gcc/
            PR middle-end/69921
            * tree-parloops.c (create_parallel_loop): Set "oacc kernels
            parallelized" attribute for parallelized OpenACC kernels.
            * omp-offload.c (execute_oacc_device_lower): Use it.
            gcc/testsuite/
            * c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
            * c-c++-common/goacc/classify-kernels.c: Likewise.
            * c-c++-common/goacc/kernels-counter-vars-function-scope.c:
            Likewise.
            * c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
            * c-c++-common/goacc/kernels-double-reduction.c: Likewise.
            * c-c++-common/goacc/kernels-loop-2.c: Likewise.
            * c-c++-common/goacc/kernels-loop-3.c: Likewise.
            * c-c++-common/goacc/kernels-loop-g.c: Likewise.
            * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
            * c-c++-common/goacc/kernels-loop-n.c: Likewise.
            * c-c++-common/goacc/kernels-loop-nest.c: Likewise.
            * c-c++-common/goacc/kernels-loop.c: Likewise.
            * c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
            * c-c++-common/goacc/kernels-reduction.c: Likewise.
            * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
            * gfortran.dg/goacc/classify-kernels.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop.f95: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@247957 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                                      |  5 +++++
 gcc/omp-offload.c                                  | 24 ++++++++++++++++----
 gcc/testsuite/ChangeLog                            | 26 ++++++++++++++++++++++
 .../goacc/classify-kernels-unparallelized.c        |  2 +-
 .../c-c++-common/goacc/classify-kernels.c          |  6 ++---
 .../goacc/kernels-counter-vars-function-scope.c    |  3 +--
 .../goacc/kernels-double-reduction-n.c             |  3 +--
 .../c-c++-common/goacc/kernels-double-reduction.c  |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c  |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c  |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c  |  3 +--
 .../c-c++-common/goacc/kernels-loop-mod-not-zero.c |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c  |  3 +--
 .../c-c++-common/goacc/kernels-loop-nest.c         |  3 +--
 gcc/testsuite/c-c++-common/goacc/kernels-loop.c    |  3 +--
 .../c-c++-common/goacc/kernels-one-counter-var.c   |  3 +--
 .../c-c++-common/goacc/kernels-reduction.c         |  3 +--
 .../goacc/classify-kernels-unparallelized.f95      |  2 +-
 .../gfortran.dg/goacc/classify-kernels.f95         |  6 ++---
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 |  3 +--
 .../gfortran.dg/goacc/kernels-loop-data-2.f95      |  3 +--
 .../goacc/kernels-loop-data-enter-exit-2.f95       |  3 +--
 .../goacc/kernels-loop-data-enter-exit.f95         |  3 +--
 .../gfortran.dg/goacc/kernels-loop-data-update.f95 |  3 +--
 .../gfortran.dg/goacc/kernels-loop-data.f95        |  3 +--
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 |  5 ++---
 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95   |  3 +--
 gcc/tree-parloops.c                                | 16 +++++++------
 28 files changed, 89 insertions(+), 60 deletions(-)

diff --git gcc/ChangeLog gcc/ChangeLog
index aeb22df..580a3db 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,5 +1,10 @@
 2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/69921
+	* tree-parloops.c (create_parallel_loop): Set "oacc kernels
+	parallelized" attribute for parallelized OpenACC kernels.
+	* omp-offload.c (execute_oacc_device_lower): Use it.
+
 	* omp-expand.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
 	Set "oacc kernels" attribute.
 	* omp-general.c (oacc_set_fn_attrib): Remove is_kernel formal
diff --git gcc/omp-offload.c gcc/omp-offload.c
index d24f131..9372f9e 100644
--- gcc/omp-offload.c
+++ gcc/omp-offload.c
@@ -1444,6 +1444,13 @@ execute_oacc_device_lower ()
       flag_openacc_dims = (char *)&flag_openacc_dims;
     }
 
+  bool is_oacc_kernels
+    = (lookup_attribute ("oacc kernels",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+  bool is_oacc_kernels_parallelized
+    = (lookup_attribute ("oacc kernels parallelized",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+
   /* Discover, partition and process the loops.  */
   oacc_loop *loops = oacc_loop_discovery ();
   int fn_level = oacc_fn_attrib_level (attrs);
@@ -1453,17 +1460,26 @@ execute_oacc_device_lower ()
       if (fn_level >= 0)
 	fprintf (dump_file, "Function is OpenACC routine level %d\n",
 		 fn_level);
-      else if (lookup_attribute ("oacc kernels",
-				 DECL_ATTRIBUTES (current_function_decl)))
-	fprintf (dump_file, "Function is OpenACC kernels offload\n");
+      else if (is_oacc_kernels)
+	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
+		 (is_oacc_kernels_parallelized
+		  ? "parallelized" : "unparallelized"));
       else
 	fprintf (dump_file, "Function is OpenACC parallel offload\n");
     }
 
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
+  /* OpenACC kernels constructs are special: they currently don't use the
+     generic oacc_loop infrastructure and attribute/dimension processing.  */
+  if (is_oacc_kernels && is_oacc_kernels_parallelized)
+    {
+      /* Parallelized OpenACC kernels constructs use gang parallelism.  See
+	 also tree-parloops.c:create_parallel_loop.  */
+      used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
+    }
+
   int dims[GOMP_DIM_MAX];
-
   oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
 
   if (dump_file)
diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog
index 52865d3..e1e2641 100644
--- gcc/testsuite/ChangeLog
+++ gcc/testsuite/ChangeLog
@@ -2,6 +2,32 @@
 
 	* c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
 	* c-c++-common/goacc/classify-kernels.c: Likewise.
+	* c-c++-common/goacc/kernels-counter-vars-function-scope.c:
+	Likewise.
+	* c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
+	* c-c++-common/goacc/kernels-double-reduction.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-2.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-3.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-g.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-n.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-nest.c: Likewise.
+	* c-c++-common/goacc/kernels-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
+	* c-c++-common/goacc/kernels-reduction.c: Likewise.
+	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
+	* gfortran.dg/goacc/classify-kernels.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop.f95: Likewise.
+
+	* c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
+	* c-c++-common/goacc/classify-kernels.c: Likewise.
 	* c-c++-common/goacc/classify-parallel.c: Likewise.
 	* c-c++-common/goacc/classify-routine.c: Likewise.
 	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
index 70ff428..626f6b4 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -34,6 +34,6 @@ void KERNELS ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
index c8b0fda..95037e6 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -25,11 +25,11 @@ void KERNELS ()
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can be parallelized.
    { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
index 17f240e..c475333 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
@@ -45,9 +45,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
index 750f576..27ea2e9 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
@@ -27,10 +27,9 @@ foo (unsigned int n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
index df60d6a..0841e90 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
@@ -27,10 +27,9 @@ foo (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
index 913d91f..acef6a1 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
@@ -59,11 +59,10 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
index 1822d2a..75e2bb7 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
@@ -39,9 +39,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
index e946319..73b469d 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
@@ -7,9 +7,8 @@
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
index 9b63b45..5592623 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
@@ -43,9 +43,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
index 279f797..e86be1b 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
@@ -46,9 +46,8 @@ foo (COUNTERTYPE n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
index db1071f..2b0e186 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
@@ -30,9 +30,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop.c
index abf7a3c..9619d53 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop.c
@@ -46,9 +46,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
index 95f4817..69539b2 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
@@ -44,9 +44,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
index 6f5a418..4a18272 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
@@ -26,9 +26,8 @@ foo (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
index 9887d35..4b282ca 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -36,6 +36,6 @@ end program main
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
index 69c89a9..da025c1 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -27,11 +27,11 @@ end program main
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
index 865f7a6..516aede 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
@@ -34,11 +34,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
index c9f3a62..ff3788a 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
@@ -40,11 +40,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
index 3361607..60a5c96 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
@@ -40,11 +40,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
index 5ba56fb..ce04749 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
@@ -38,11 +38,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
index a622a96..d2de138 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
@@ -38,10 +38,9 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
index 4ec2ac3..92872b2 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
@@ -38,11 +38,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
index 409fe6f..079712f2 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
@@ -32,10 +32,9 @@ end module test
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! TODO, PR70545.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function __test_MOD_foo._omp_fn.0 " 1 "optimized" } }
-
-! TODO, PR70545.
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" { xfail *-*-* } } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
index ae2cac6..cc9a3a9 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
@@ -30,9 +30,8 @@ end program main
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } }
diff --git gcc/tree-parloops.c gcc/tree-parloops.c
index 6ce9d84..f826154 100644
--- gcc/tree-parloops.c
+++ gcc/tree-parloops.c
@@ -2040,19 +2040,20 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
   edge exit, nexit, guard, end, e;
 
-  /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
   if (oacc_kernels_p)
     {
       gcc_checking_assert (lookup_attribute ("oacc kernels",
 					     DECL_ATTRIBUTES (cfun->decl)));
-
-      tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
-      OMP_CLAUSE_NUM_GANGS_EXPR (clause)
-	= build_int_cst (integer_type_node, n_threads);
-      oacc_set_fn_attrib (cfun->decl, clause, NULL);
+      /* Indicate to later processing that this is a parallelized OpenACC
+	 kernels construct.  */
+      DECL_ATTRIBUTES (cfun->decl)
+	= tree_cons (get_identifier ("oacc kernels parallelized"),
+		     NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
     }
   else
     {
+      /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
+
       basic_block bb = loop_preheader_edge (loop)->src;
       basic_block paral_bb = single_pred (bb);
       gsi = gsi_last_bb (paral_bb);
@@ -2154,7 +2155,8 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
 
   /* Emit GIMPLE_OMP_FOR.  */
   if (oacc_kernels_p)
-    /* In combination with the NUM_GANGS on the parallel.  */
+    /* Parallelized OpenACC kernels constructs use gang parallelism.  See also
+       omp-offload.c:execute_oacc_device_lower.  */
     t = build_omp_clause (loc, OMP_CLAUSE_GANG);
   else
     {

Committed to gomp-4_0-branch in r247958:

commit 2630763958847ab6841dc1164d29ead4ac90fe00
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri May 12 09:20:35 2017 +0000

    [PR middle-end/69921] Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels
    
            gcc/
            PR middle-end/69921
            * tree-parloops.c (create_parallel_loop): Set "oacc kernels
            parallelized" attribute for parallelized OpenACC kernels.
            * omp-low.c (execute_oacc_device_lower): Use it.
            * config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Likewise.
            * omp-low.c (set_oacc_fn_attrib): Make it "static".
            * omp-low.h (set_oacc_fn_attrib): Remove prototype.
            gcc/testsuite/
            * c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
            * c-c++-common/goacc/classify-kernels.c: Likewise.
            * c-c++-common/goacc/kernels-acc-loop-reduction.c: Likewise.
            * c-c++-common/goacc/kernels-acc-loop-smaller-equal.c: Likewise.
            * c-c++-common/goacc/kernels-counter-vars-function-scope.c:
            Likewise.
            * c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
            * c-c++-common/goacc/kernels-double-reduction.c: Likewise.
            * c-c++-common/goacc/kernels-loop-2-acc-loop.c: Likewise.
            * c-c++-common/goacc/kernels-loop-2.c: Likewise.
            * c-c++-common/goacc/kernels-loop-3-acc-loop.c: Likewise.
            * c-c++-common/goacc/kernels-loop-3.c: Likewise.
            * c-c++-common/goacc/kernels-loop-acc-loop.c: Likewise.
            * c-c++-common/goacc/kernels-loop-data-2.c: Likewise.
            * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise.
            * c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise.
            * c-c++-common/goacc/kernels-loop-data-update.c: Likewise.
            * c-c++-common/goacc/kernels-loop-data.c: Likewise.
            * c-c++-common/goacc/kernels-loop-g.c: Likewise.
            * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
            * c-c++-common/goacc/kernels-loop-n-acc-loop.c: Likewise.
            * c-c++-common/goacc/kernels-loop-n.c: Likewise.
            * c-c++-common/goacc/kernels-loop-nest.c: Likewise.
            * c-c++-common/goacc/kernels-loop.c: Likewise.
            * c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
            * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c:
            Likewise.
            * c-c++-common/goacc/kernels-reduction.c: Likewise.
            * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
            * gfortran.dg/goacc/classify-kernels.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
            * gfortran.dg/goacc/kernels-loop.f95: Likewise.
            * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95:
            Likewise.
    
    trunk r247957
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@247958 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                                 |  8 +++++
 gcc/config/nvptx/nvptx.c                           | 31 ++++++----------
 gcc/omp-low.c                                      | 26 +++++++++++---
 gcc/omp-low.h                                      |  1 -
 gcc/testsuite/ChangeLog.gomp                       | 41 ++++++++++++++++++++++
 .../goacc/classify-kernels-unparallelized.c        |  2 +-
 .../c-c++-common/goacc/classify-kernels.c          |  6 ++--
 .../goacc/kernels-acc-loop-reduction.c             |  3 +-
 .../goacc/kernels-acc-loop-smaller-equal.c         |  3 +-
 .../goacc/kernels-counter-vars-function-scope.c    |  3 +-
 .../goacc/kernels-double-reduction-n.c             |  3 +-
 .../c-c++-common/goacc/kernels-double-reduction.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-2-acc-loop.c   |  3 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-3-acc-loop.c   |  3 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-acc-loop.c     |  3 +-
 .../c-c++-common/goacc/kernels-loop-data-2.c       |  3 +-
 .../goacc/kernels-loop-data-enter-exit-2.c         |  3 +-
 .../goacc/kernels-loop-data-enter-exit.c           |  3 +-
 .../c-c++-common/goacc/kernels-loop-data-update.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-data.c         |  3 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-mod-not-zero.c |  3 +-
 .../c-c++-common/goacc/kernels-loop-n-acc-loop.c   |  3 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c  |  3 +-
 .../c-c++-common/goacc/kernels-loop-nest.c         |  3 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop.c    |  3 +-
 .../c-c++-common/goacc/kernels-one-counter-var.c   |  3 +-
 .../goacc/kernels-parallel-loop-data-enter-exit.c  |  3 +-
 .../c-c++-common/goacc/kernels-reduction.c         |  3 +-
 .../goacc/classify-kernels-unparallelized.f95      |  2 +-
 .../gfortran.dg/goacc/classify-kernels.f95         |  6 ++--
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 |  3 +-
 .../gfortran.dg/goacc/kernels-loop-data-2.f95      |  3 +-
 .../goacc/kernels-loop-data-enter-exit-2.f95       |  3 +-
 .../goacc/kernels-loop-data-enter-exit.f95         |  3 +-
 .../gfortran.dg/goacc/kernels-loop-data-update.f95 |  3 +-
 .../gfortran.dg/goacc/kernels-loop-data.f95        |  3 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 |  7 ++--
 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95   |  3 +-
 .../kernels-parallel-loop-data-enter-exit.f95      |  3 +-
 gcc/tree-parloops.c                                | 16 +++++----
 43 files changed, 132 insertions(+), 110 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index 8cd46c3..b34e987 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,5 +1,13 @@
 2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/69921
+	* tree-parloops.c (create_parallel_loop): Set "oacc kernels
+	parallelized" attribute for parallelized OpenACC kernels.
+	* omp-low.c (execute_oacc_device_lower): Use it.
+	* config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Likewise.
+	* omp-low.c (set_oacc_fn_attrib): Make it "static".
+	* omp-low.h (set_oacc_fn_attrib): Remove prototype.
+
 	* omp-low.c (expand_omp_target) <GF_OMP_TARGET_KIND_OACC_KERNELS>:
 	Set "oacc kernels" attribute.
 	(set_oacc_fn_attrib): Remove is_kernel formal parameter.  Adjust
diff --git gcc/config/nvptx/nvptx.c gcc/config/nvptx/nvptx.c
index f3c1525..fd8da8d 100644
--- gcc/config/nvptx/nvptx.c
+++ gcc/config/nvptx/nvptx.c
@@ -4285,28 +4285,17 @@ nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level)
 	 higher, as otherwise we're not expecting any parallelization to
 	 happen.  */
       if (optimize >= 2
-	  && lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (decl)))
+	  && lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (decl))
+	  && !lookup_attribute ("oacc kernels parallelized",
+				DECL_ATTRIBUTES (decl)))
 	{
-	  bool avoid_offloading_p = true;
-	  for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
-	    {
-	      if (dims[ix] == 0 || dims[ix] > 1)
-		{
-		  avoid_offloading_p = false;
-		  break;
-		}
-	    }
-	  if (avoid_offloading_p)
-	    {
-	      warning_at (DECL_SOURCE_LOCATION (decl), 0,
-			  "OpenACC kernels construct will be executed"
-			  " sequentially; will by default avoid offloading to"
-			  " prevent data copy penalty");
-	      DECL_ATTRIBUTES (decl)
-		= tree_cons (get_identifier ("omp avoid offloading"),
-			     NULL_TREE, DECL_ATTRIBUTES (decl));
-
-	    }
+	  warning_at (DECL_SOURCE_LOCATION (decl), 0,
+		      "OpenACC kernels construct will be executed"
+		      " sequentially; will by default avoid offloading to"
+		      " prevent data copy penalty");
+	  DECL_ATTRIBUTES (decl)
+	    = tree_cons (get_identifier ("omp avoid offloading"),
+			 NULL_TREE, DECL_ATTRIBUTES (decl));
 	}
     }
 
diff --git gcc/omp-low.c gcc/omp-low.c
index 18872f9..0fbc3ff 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -13138,7 +13138,7 @@ replace_oacc_fn_attrib (tree fn, tree dims)
    function attribute.  Push any that are non-constant onto the ARGS
    list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
 
-void
+static void
 set_oacc_fn_attrib (tree fn, tree clauses, vec<tree> *args)
 {
   /* Must match GOMP_DIM ordering.  */
@@ -21171,6 +21171,13 @@ execute_oacc_device_lower ()
       flag_openacc_dims = (char *)&flag_openacc_dims;
     } 
 
+  bool is_oacc_kernels
+    = (lookup_attribute ("oacc kernels",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+  bool is_oacc_kernels_parallelized
+    = (lookup_attribute ("oacc kernels parallelized",
+			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
+
   /* Discover, partition and process the loops.  */
   oacc_loop *loops = oacc_loop_discovery ();
   int fn_level = oacc_fn_attrib_level (attrs);
@@ -21180,17 +21187,26 @@ execute_oacc_device_lower ()
       if (fn_level >= 0)
 	fprintf (dump_file, "Function is OpenACC routine level %d\n",
 		 fn_level);
-      else if (lookup_attribute ("oacc kernels",
-				 DECL_ATTRIBUTES (current_function_decl)))
-	fprintf (dump_file, "Function is OpenACC kernels offload\n");
+      else if (is_oacc_kernels)
+	fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
+		 (is_oacc_kernels_parallelized
+		  ? "parallelized" : "unparallelized"));
       else
 	fprintf (dump_file, "Function is OpenACC parallel offload\n");
     }
 
   unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
   unsigned used_mask = oacc_loop_partition (loops, outer_mask);
+  /* OpenACC kernels constructs are special: they currently don't use the
+     generic oacc_loop infrastructure and attribute/dimension processing.  */
+  if (is_oacc_kernels && is_oacc_kernels_parallelized)
+    {
+      /* Parallelized OpenACC kernels constructs use gang parallelism.  See
+	 also tree-parloops.c:create_parallel_loop.  */
+      used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
+    }
+
   int dims[GOMP_DIM_MAX];
-
   oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
 
   if (dump_file)
diff --git gcc/omp-low.h gcc/omp-low.h
index 0ea5308..65b2433 100644
--- gcc/omp-low.h
+++ gcc/omp-low.h
@@ -34,7 +34,6 @@ extern void replace_oacc_fn_attrib (tree, tree);
 extern int verify_oacc_routine_clauses (tree, tree *, location_t, const char *);
 extern tree build_oacc_routine_dims (tree);
 extern tree get_oacc_fn_attrib (tree);
-extern void set_oacc_fn_attrib (tree, tree, vec<tree> *);
 extern int get_oacc_ifn_dim_arg (const gimple *);
 extern int get_oacc_fn_dim_size (tree, int);
 
diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index 9f2b7be..dadff1a 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,5 +1,46 @@
 2017-05-12  Thomas Schwinge  <thomas@codesourcery.com>
 
+	* c-c++-common/goacc/classify-kernels-unparallelized.c: Adjust.
+	* c-c++-common/goacc/classify-kernels.c: Likewise.
+	* c-c++-common/goacc/kernels-acc-loop-reduction.c: Likewise.
+	* c-c++-common/goacc/kernels-acc-loop-smaller-equal.c: Likewise.
+	* c-c++-common/goacc/kernels-counter-vars-function-scope.c:
+	Likewise.
+	* c-c++-common/goacc/kernels-double-reduction-n.c: Likewise.
+	* c-c++-common/goacc/kernels-double-reduction.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-2-acc-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-2.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-3-acc-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-3.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-acc-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-data-2.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-data-update.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-data.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-g.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-n-acc-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-n.c: Likewise.
+	* c-c++-common/goacc/kernels-loop-nest.c: Likewise.
+	* c-c++-common/goacc/kernels-loop.c: Likewise.
+	* c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
+	* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c:
+	Likewise.
+	* c-c++-common/goacc/kernels-reduction.c: Likewise.
+	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
+	* gfortran.dg/goacc/classify-kernels.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
+	* gfortran.dg/goacc/kernels-loop.f95: Likewise.
+	* gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95:
+	Likewise.
+
 	* c-c++-common/goacc/oaccdevlow-kernels.c: Adjust.
 	* c-c++-common/goacc/oaccdevlow-parallel.c: Likewise.
 	* c-c++-common/goacc/oaccdevlow-routine.c: Likewise.
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
index 70ff428..626f6b4 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -34,6 +34,6 @@ void KERNELS ()
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/classify-kernels.c gcc/testsuite/c-c++-common/goacc/classify-kernels.c
index c8b0fda..95037e6 100644
--- gcc/testsuite/c-c++-common/goacc/classify-kernels.c
+++ gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -25,11 +25,11 @@ void KERNELS ()
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can be parallelized.
    { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
-   { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+   { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccdevlow" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c
index b52f280..4824e53 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c
@@ -16,9 +16,8 @@ foo (int n, unsigned int *a)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c
index dd10c46..d70afb0 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c
@@ -16,9 +16,8 @@ foo (int n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
index 17f240e..c475333 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
@@ -45,9 +45,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
index 750f576..27ea2e9 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-double-reduction-n.c
@@ -27,10 +27,9 @@ foo (unsigned int n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
index df60d6a..0841e90 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c
@@ -27,10 +27,9 @@ foo (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c
index 21b2a70..7b127cb 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c
@@ -9,11 +9,10 @@
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
index 913d91f..acef6a1 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
@@ -59,11 +59,10 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c
index d82debc..a040e09 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c
@@ -8,9 +8,8 @@
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
index 1822d2a..75e2bb7 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
@@ -39,9 +39,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c
index dc3bb43..070a5b5 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c
@@ -8,9 +8,8 @@
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
index 8046ae9..7180021 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
@@ -59,11 +59,10 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c
index ac977d2..0c9f833 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c
@@ -57,11 +57,10 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c
index 72e18a7..0bd21b6 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c
@@ -54,11 +54,10 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
index 94d5702..dd5a841 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
@@ -55,10 +55,9 @@ main (void)
 /* Check that only two loops are analyzed, and that both can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c
index cc49699..a658182 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c
@@ -53,11 +53,10 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
index e946319..73b469d 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
@@ -7,9 +7,8 @@
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
index 9b63b45..5592623 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
@@ -43,9 +43,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c
index 685cb7f..1f25e63 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c
@@ -8,9 +8,8 @@
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
index 279f797..e86be1b 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
@@ -46,9 +46,8 @@ foo (COUNTERTYPE n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
index db1071f..2b0e186 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
@@ -30,9 +30,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop.c gcc/testsuite/c-c++-common/goacc/kernels-loop.c
index abf7a3c..9619d53 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-loop.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-loop.c
@@ -46,9 +46,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
index 95f4817..69539b2 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
@@ -44,9 +44,8 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c
index 70c5469..58c9416 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c
@@ -55,11 +55,10 @@ main (void)
 /* Check that only two loops are analyzed, and that both can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" { xfail *-*-* } } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" { xfail *-*-* } } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" { xfail *-*-* } } } */
diff --git gcc/testsuite/c-c++-common/goacc/kernels-reduction.c gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
index 6f5a418..4a18272 100644
--- gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
+++ gcc/testsuite/c-c++-common/goacc/kernels-reduction.c
@@ -26,9 +26,8 @@ foo (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
-
-/* { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } } */
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
index 9887d35..4b282ca 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -36,6 +36,6 @@ end program main
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
index 69c89a9..da025c1 100644
--- gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
+++ gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -27,11 +27,11 @@ end program main
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(0, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
-! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC kernels offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccdevlow" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
index 865f7a6..516aede 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
@@ -34,11 +34,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
index c9f3a62..ff3788a 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
@@ -40,11 +40,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
index 3361607..60a5c96 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
@@ -40,11 +40,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
index 5ba56fb..ce04749 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
@@ -38,11 +38,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
index a622a96..d2de138 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
@@ -38,10 +38,9 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
index 4ec2ac3..92872b2 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
@@ -38,11 +38,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 3 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
index bdfebde..7c56897 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
@@ -32,11 +32,10 @@ end module test
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! This failure was introduced with the GOMP_MAP_POINTER ->
+! GOMP_MAP_FIRSTPRIVATE_POINTER conversion.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function __test_MOD_foo._omp_fn.0 " 1 "optimized" } }
-
-! This failure was introduced with the GOMP_MAP_POINTER ->
-! GOMP_MAP_FIRSTPRIVATE_POINTER conversion.
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" { xfail *-*-* } } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
index ae2cac6..cc9a3a9 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
@@ -30,9 +30,8 @@ end program main
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" } }
diff --git gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95
index 49b767c..16c9b80 100644
--- gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95
+++ gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95
@@ -39,11 +39,10 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
-
-! { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 2 "parloops1" } }
diff --git gcc/tree-parloops.c gcc/tree-parloops.c
index a94f7a2..02736e0 100644
--- gcc/tree-parloops.c
+++ gcc/tree-parloops.c
@@ -2038,19 +2038,20 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
   edge exit, nexit, guard, end, e;
 
-  /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
   if (oacc_kernels_p)
     {
       gcc_checking_assert (lookup_attribute ("oacc kernels",
 					     DECL_ATTRIBUTES (cfun->decl)));
-
-      tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
-      OMP_CLAUSE_NUM_GANGS_EXPR (clause)
-	= build_int_cst (integer_type_node, n_threads);
-      set_oacc_fn_attrib (cfun->decl, clause, NULL);
+      /* Indicate to later processing that this is a parallelized OpenACC
+	 kernels construct.  */
+      DECL_ATTRIBUTES (cfun->decl)
+	= tree_cons (get_identifier ("oacc kernels parallelized"),
+		     NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
     }
   else
     {
+      /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
+
       basic_block bb = loop_preheader_edge (loop)->src;
       basic_block paral_bb = single_pred (bb);
       gsi = gsi_last_bb (paral_bb);
@@ -2152,7 +2153,8 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
 
   /* Emit GIMPLE_OMP_FOR.  */
   if (oacc_kernels_p)
-    /* In combination with the NUM_GANGS on the parallel.  */
+    /* Parallelized OpenACC kernels constructs use gang parallelism.  See also
+       omp-low.c:execute_oacc_device_lower.  */
     t = build_omp_clause (loc, OMP_CLAUSE_GANG);
   else
     {


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Update 'c-c++-common/goacc/classify-*', 'gfortran.dg/goacc/classify-*'
  2017-05-12  9:09             ` Thomas Schwinge
@ 2022-03-04 13:19               ` Thomas Schwinge
  0 siblings, 0 replies; 20+ messages in thread
From: Thomas Schwinge @ 2022-03-04 13:19 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 466 bytes --]

Hi!

Pushed to master branch commit fda0b0eb4f744f012f21c6976c2e42df87c313bb
"Update 'c-c++-common/goacc/classify-*', 'gfortran.dg/goacc/classify-*'",
see attached.


Grüße
 Thomas


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Update-c-c-common-goacc-classify-gfortran.dg-goacc-c.patch --]
[-- Type: text/x-diff, Size: 16635 bytes --]

From fda0b0eb4f744f012f21c6976c2e42df87c313bb Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Tue, 1 Mar 2022 14:57:38 +0100
Subject: [PATCH] Update 'c-c++-common/goacc/classify-*',
 'gfortran.dg/goacc/classify-*'

... to use 'dg-line', simplifying later changes.  Also some minor miscellaneous
diagnostics scanning maintenance.

	gcc/testsuite/
	* c-c++-common/goacc/classify-kernels-parloops.c: Update.
	* c-c++-common/goacc/classify-kernels-unparallelized-parloops.c:
	Likewise.
	* c-c++-common/goacc/classify-kernels-unparallelized.c: Likewise.
	* c-c++-common/goacc/classify-kernels.c: Likewise.
	* c-c++-common/goacc/classify-parallel.c: Likewise.
	* c-c++-common/goacc/classify-routine-nohost.c: Likewise.
	* c-c++-common/goacc/classify-routine.c: Likewise.
	* c-c++-common/goacc/classify-serial.c: Likewise.
	* gfortran.dg/goacc/classify-kernels-parloops.f95: Likewise.
	* gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95:
	Likewise.
	* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
	* gfortran.dg/goacc/classify-kernels.f95: Likewise.
	* gfortran.dg/goacc/classify-parallel.f95: Likewise.
	* gfortran.dg/goacc/classify-routine-nohost.f95: Likewise.
	* gfortran.dg/goacc/classify-routine.f95: Likewise.
	* gfortran.dg/goacc/classify-serial.f95: Likewise.
---
 .../c-c++-common/goacc/classify-kernels-parloops.c       | 3 ++-
 .../goacc/classify-kernels-unparallelized-parloops.c     | 3 ++-
 .../c-c++-common/goacc/classify-kernels-unparallelized.c | 3 ++-
 gcc/testsuite/c-c++-common/goacc/classify-kernels.c      | 3 ++-
 gcc/testsuite/c-c++-common/goacc/classify-parallel.c     | 3 ++-
 .../c-c++-common/goacc/classify-routine-nohost.c         | 3 ++-
 gcc/testsuite/c-c++-common/goacc/classify-routine.c      | 3 ++-
 gcc/testsuite/c-c++-common/goacc/classify-serial.c       | 9 +++++----
 .../gfortran.dg/goacc/classify-kernels-parloops.f95      | 3 ++-
 .../goacc/classify-kernels-unparallelized-parloops.f95   | 3 ++-
 .../goacc/classify-kernels-unparallelized.f95            | 3 ++-
 gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95     | 3 ++-
 gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95    | 3 ++-
 .../gfortran.dg/goacc/classify-routine-nohost.f95        | 3 ++-
 gcc/testsuite/gfortran.dg/goacc/classify-routine.f95     | 3 ++-
 gcc/testsuite/gfortran.dg/goacc/classify-serial.f95      | 9 +++++----
 16 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c
index f3685f2e8c5..5f470eb86bc 100644
--- a/gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c
+++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c
@@ -20,7 +20,8 @@ extern unsigned int *__restrict c;
 
 void KERNELS ()
 {
-#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-line l_compute1 } */
+  /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_compute1 } */
   for (unsigned int i = 0; i < N; i++)
     c[i] = a[i] + b[i];
 }
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c
index 6522caf9135..06c70fb9d9f 100644
--- a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c
+++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c
@@ -24,7 +24,8 @@ extern unsigned int f (unsigned int);
 
 void KERNELS ()
 {
-#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-line l_compute1 } */
+  /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 } */
   for (unsigned int i = 0; i < N; i++)
     c[i] = a[f (i)] + b[f (i)];
 }
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
index daa8fcb7662..4ee8e9d5f39 100644
--- a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
+++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -24,7 +24,8 @@ extern unsigned int f (unsigned int);
 
 void KERNELS ()
 {
-#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-line l_compute1 } */
+  /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 } */
   /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
   for (unsigned int i = 0; i < N; i++)
     c[i] = a[f (i)] + b[f (i)];
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels.c
index b54a71e788a..74acca8b4a6 100644
--- a/gcc/testsuite/c-c++-common/goacc/classify-kernels.c
+++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -20,7 +20,8 @@ extern unsigned int *__restrict c;
 
 void KERNELS ()
 {
-#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-line l_compute1 } */
+  /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_compute1 } */
   /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
   for (unsigned int i = 0; i < N; i++)
     c[i] = a[i] + b[i];
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-parallel.c b/gcc/testsuite/c-c++-common/goacc/classify-parallel.c
index 9056aa69dad..61d03c0a5c4 100644
--- a/gcc/testsuite/c-c++-common/goacc/classify-parallel.c
+++ b/gcc/testsuite/c-c++-common/goacc/classify-parallel.c
@@ -17,7 +17,8 @@ extern unsigned int *__restrict c;
 
 void PARALLEL ()
 {
-#pragma acc parallel loop copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang vector loop parallelism" } */
+#pragma acc parallel loop copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-line l_compute_loop_i1 } */
+  /* { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l_compute_loop_i1 } */
   for (unsigned int i = 0; i < N; i++)
     c[i] = a[i] + b[i];
 }
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-routine-nohost.c b/gcc/testsuite/c-c++-common/goacc/classify-routine-nohost.c
index 99855822011..24dc1b271d7 100644
--- a/gcc/testsuite/c-c++-common/goacc/classify-routine-nohost.c
+++ b/gcc/testsuite/c-c++-common/goacc/classify-routine-nohost.c
@@ -19,7 +19,8 @@ extern unsigned int *__restrict c;
 #pragma acc routine nohost worker
 void ROUTINE ()
 {
-#pragma acc loop /* { dg-bogus "assigned OpenACC .* loop parallelism" } */
+#pragma acc loop /* { dg-line l_loop_i1 } */
+  /* { dg-bogus {optimized: assigned OpenACC [^\n\r]+ loop parallelism} {} { target *-*-* } l_loop_i1 } */
   for (unsigned int i = 0; i < N; i++)
     c[i] = a[i] + b[i];
 }
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-routine.c b/gcc/testsuite/c-c++-common/goacc/classify-routine.c
index f7f0454009b..075822e81b6 100644
--- a/gcc/testsuite/c-c++-common/goacc/classify-routine.c
+++ b/gcc/testsuite/c-c++-common/goacc/classify-routine.c
@@ -19,7 +19,8 @@ extern unsigned int *__restrict c;
 #pragma acc routine worker
 void ROUTINE ()
 {
-#pragma acc loop /* { dg-message "optimized: assigned OpenACC worker vector loop parallelism" } */
+#pragma acc loop /* { dg-line l_loop_i1 } */
+  /* { dg-optimized {assigned OpenACC worker vector loop parallelism} {} { target *-*-* } l_loop_i1 } */
   for (unsigned int i = 0; i < N; i++)
     c[i] = a[i] + b[i];
 }
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-serial.c b/gcc/testsuite/c-c++-common/goacc/classify-serial.c
index f41c141bcd5..71b8c727cdf 100644
--- a/gcc/testsuite/c-c++-common/goacc/classify-serial.c
+++ b/gcc/testsuite/c-c++-common/goacc/classify-serial.c
@@ -17,12 +17,13 @@ extern unsigned int *__restrict c;
 
 void SERIAL ()
 {
-#pragma acc serial loop copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang vector loop parallelism" } */
-  /* { dg-bogus "warning: region contains gang partitioned code but is not gang partitioned" "TODO 'serial'" { xfail *-*-* } .-1 }
-     { dg-bogus "warning: region contains worker partitioned code but is not worker partitioned" "" { target *-*-* } .-2 }
-     { dg-bogus "warning: region contains vector partitioned code but is not vector partitioned" "TODO 'serial'" { xfail *-*-* } .-3 }
+#pragma acc serial loop copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-line l_compute_loop_i1 } */
+  /* { dg-bogus "warning: region contains gang partitioned code but is not gang partitioned" "TODO 'serial'" { xfail *-*-* } l_compute_loop_i1 }
+     { dg-bogus "warning: region contains worker partitioned code but is not worker partitioned" "" { target *-*-* } l_compute_loop_i1 }
+     { dg-bogus "warning: region contains vector partitioned code but is not vector partitioned" "TODO 'serial'" { xfail *-*-* } l_compute_loop_i1 }
      TODO Should we really diagnose this if the user explicitly requested 'serial'?
      TODO Should we instead diagnose ('-Wextra' category?) that the user may enable use of parallelism if replacing 'serial' with 'parallel', if applicable?  */
+  /* { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l_compute_loop_i1 } */
   for (unsigned int i = 0; i < N; i++)
     c[i] = a[i] + b[i];
 }
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95
index b8c2d99a8a4..96814a1697d 100644
--- a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95
@@ -20,7 +20,8 @@ program main
 
   call setup(a, b)
 
-  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang loop parallelism" }
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-line l_compute1 }
+  ! { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_compute1 }
   do i = 0, n - 1
      c(i) = a(i) + b(i)
   end do
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95
index 37733273440..e858617bbc6 100644
--- a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95
@@ -24,7 +24,8 @@ program main
 
   call setup(a, b)
 
-  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC seq loop parallelism" }
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-line l_compute1 }
+  ! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 }
   do i = 0, n - 1
      c(i) = a(f (i)) + b(f (i))
   end do
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
index ee8e2899a8a..f8897fc5b34 100644
--- a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -24,7 +24,8 @@ program main
 
   call setup(a, b)
 
-  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC seq loop parallelism" }
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-line l_compute1 }
+  ! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 }
   ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 }
   do i = 0, n - 1
      c(i) = a(f (i)) + b(f (i))
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
index a4bcca05cc1..3fe9b34c9c8 100644
--- a/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -20,7 +20,8 @@ program main
 
   call setup(a, b)
 
-  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang loop parallelism" }
+  !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-line l_compute1 }
+  ! { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_compute1 }
   ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 }
   do i = 0, n - 1
      c(i) = a(i) + b(i)
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
index ce4c08ff219..adc38465d52 100644
--- a/gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
@@ -17,7 +17,8 @@ program main
 
   call setup(a, b)
 
-  !$acc parallel loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang vector loop parallelism" }
+  !$acc parallel loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-line l_compute_loop_i1 }
+  ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l_compute_loop_i1 }
   do i = 0, n - 1
      c(i) = a(i) + b(i)
   end do
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-routine-nohost.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-routine-nohost.f95
index 07e2063551f..b02fbec89c0 100644
--- a/gcc/testsuite/gfortran.dg/goacc/classify-routine-nohost.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-routine-nohost.f95
@@ -17,7 +17,8 @@ subroutine ROUTINE
 
   call setup(a, b)
 
-  !$acc loop ! { dg-bogus "assigned OpenACC .* loop parallelism" }
+  !$acc loop ! { dg-line l_loop_i1 }
+  ! { dg-bogus {optimized: assigned OpenACC [^\n\r]+ loop parallelism} {} { target *-*-* } l_loop_i1 }
   do i = 0, n - 1
      c(i) = a(i) + b(i)
   end do
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-routine.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
index b065ccadacd..e991783da1e 100644
--- a/gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-routine.f95
@@ -17,7 +17,8 @@ subroutine ROUTINE
 
   call setup(a, b)
 
-  !$acc loop ! { dg-message "optimized: assigned OpenACC worker vector loop parallelism" }
+  !$acc loop ! { dg-line l_loop_i1 }
+  ! { dg-optimized {assigned OpenACC worker vector loop parallelism} {} { target *-*-* } l_loop_i1 }
   do i = 0, n - 1
      c(i) = a(i) + b(i)
   end do
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-serial.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-serial.f95
index f5cb3fe50c5..21015879703 100644
--- a/gcc/testsuite/gfortran.dg/goacc/classify-serial.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-serial.f95
@@ -17,10 +17,11 @@ program main
 
   call setup(a, b)
 
-  !$acc serial loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang vector loop parallelism" }
-  ! { dg-bogus "\[Ww\]arning: region contains gang partitioned code but is not gang partitioned" "TODO 'serial'" { xfail *-*-* } .-1 }
-  ! { dg-bogus "\[Ww\]arning: region contains worker partitioned code but is not worker partitioned" "" { target *-*-* } .-2 }
-  ! { dg-bogus "\[Ww\]arning: region contains vector partitioned code but is not vector partitioned" "TODO 'serial'" { xfail *-*-* } .-3 }
+  !$acc serial loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-line l_compute_loop_i1 }
+  ! { dg-bogus "\[Ww\]arning: region contains gang partitioned code but is not gang partitioned" "TODO 'serial'" { xfail *-*-* } l_compute_loop_i1 }
+  ! { dg-bogus "\[Ww\]arning: region contains worker partitioned code but is not worker partitioned" "" { target *-*-* } l_compute_loop_i1 }
+  ! { dg-bogus "\[Ww\]arning: region contains vector partitioned code but is not vector partitioned" "TODO 'serial'" { xfail *-*-* } l_compute_loop_i1 }
+  ! { dg-optimized {assigned OpenACC gang vector loop parallelism} {} { target *-*-* } l_compute_loop_i1 }
   do i = 0, n - 1
      c(i) = a(i) + b(i)
   end do
-- 
2.34.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2022-03-04 13:19 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-04 15:39 Mark oacc kernels fns Nathan Sidwell
2016-01-25 15:06 ` Nathan Sidwell
2015-12-29 14:19   ` [gomp4] kernels offload fns Nathan Sidwell
2016-01-07  0:11     ` Cesar Philippidis
2016-01-07 13:35       ` Nathan Sidwell
2016-07-27  8:59     ` Test cases to check OpenACC offloaded function's attributes and classification Thomas Schwinge
2016-08-04 14:06       ` Thomas Schwinge
2017-05-08 17:05         ` Thomas Schwinge
2017-05-10 15:53           ` Jakub Jelinek
2017-05-12  9:09             ` Thomas Schwinge
2022-03-04 13:19               ` Update 'c-c++-common/goacc/classify-*', 'gfortran.dg/goacc/classify-*' Thomas Schwinge
2016-01-25 15:09   ` Mark oacc kernels fns Jakub Jelinek
2016-07-27 10:07     ` Use "oacc kernels" attribute for OpenACC kernels (was: Mark oacc kernels fns) Thomas Schwinge
2016-08-04 14:07       ` Use "oacc kernels" attribute for OpenACC kernels Thomas Schwinge
2017-05-08 19:29         ` Thomas Schwinge
2017-05-09 20:59           ` Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels (was: Use "oacc kernels" attribute for OpenACC kernels) Thomas Schwinge
2017-05-10 16:32             ` Jakub Jelinek
2017-05-12  9:24               ` Use "oacc kernels parallelized" attribute for parallelized OpenACC kernels Thomas Schwinge
2017-05-10 16:30           ` Use "oacc kernels" attribute for " Jakub Jelinek
2017-05-12  9:22             ` Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).