public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [gomp4] privatize internal array variables introduced by the fortran FE
@ 2015-10-13 20:12 Cesar Philippidis
  2015-10-13 20:29 ` Jakub Jelinek
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Cesar Philippidis @ 2015-10-13 20:12 UTC (permalink / raw)
  To: Fortran List, gcc-patches; +Cc: Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 1113 bytes --]

Arrays in fortran have a couple of internal variables associated with
them, e.g. stride, lbound, ubound, size, etc. Depending on how and where
the array was declared, these internal variables may be packed inside an
array descriptor represented by a struct or defined individually. The
major problem with this is that kernels and parallel regions with
default(none) will generate errors if those internal variables are
defined individually since the user has no way to add clauses to them. I
suspect this is also true for arrays inside omp target regions.

My fix for this involves two parts. First, I reinitialize those private
array variables which aren't associated with array descriptors at the
beginning of the parallel/kernels region they are used in. Second, I
added OMP_CLAUSE_PRIVATE for those internal variables.

I'll apply this patch to gomp-4_0-branch shortly.

Is there any reason why only certain arrays have array descriptors? The
arrays with descriptors don't have this problem. It's only the ones
without descriptors that leak new internal variables that cause errors
with default(none).

Cesar

[-- Attachment #2: gfc_array_privatization.diff --]
[-- Type: text/x-patch, Size: 11924 bytes --]

2015-10-13  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/fortran/
	* trans-array.c (gfc_trans_array_bounds): Add an INIT_VLA argument
	to control whether VLAs should be initialized.  Don't mark this
	function as static.
	(gfc_trans_auto_array_allocation): Update call to
	gfc_trans_array_bounds.
	(gfc_trans_g77_array): Likewise.
	* trans-array.h: Declare gfc_trans_array_bounds.
	* trans-openmp.c (gfc_scan_nodesc_arrays): New function.
	(gfc_privatize_nodesc_arrays_1): New function.
	(gfc_privatize_nodesc_arrays): New function.
	(gfc_init_nodesc_arrays): New function.
	(gfc_trans_oacc_construct): Initialize any internal variables for
	arrays without array descriptors inside the offloaded parallel and
	kernels region.
	(gfc_trans_oacc_combined_directive): Likewise.

	gcc/testsuite/
	* gfortran.dg/goacc/default_none.f95: New test.

diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index a6b761b..86f983a 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -5709,9 +5709,9 @@ gfc_trans_array_cobounds (tree type, stmtblock_t * pblock,
 /* Generate code to evaluate non-constant array bounds.  Sets *poffset and
    returns the size (in elements) of the array.  */
 
-static tree
+tree
 gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset,
-                        stmtblock_t * pblock)
+                        stmtblock_t * pblock, bool init_vla)
 {
   gfc_array_spec *as;
   tree size;
@@ -5788,7 +5788,9 @@ gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset,
     }
 
   gfc_trans_array_cobounds (type, pblock, sym);
-  gfc_trans_vla_type_sizes (sym, pblock);
+
+  if (init_vla)
+    gfc_trans_vla_type_sizes (sym, pblock);
 
   *poffset = offset;
   return size;
@@ -5852,7 +5854,7 @@ gfc_trans_auto_array_allocation (tree decl, gfc_symbol * sym,
       && !INTEGER_CST_P (sym->ts.u.cl->backend_decl))
     gfc_conv_string_length (sym->ts.u.cl, NULL, &init);
 
-  size = gfc_trans_array_bounds (type, sym, &offset, &init);
+  size = gfc_trans_array_bounds (type, sym, &offset, &init, true);
 
   /* Don't actually allocate space for Cray Pointees.  */
   if (sym->attr.cray_pointee)
@@ -5947,7 +5949,7 @@ gfc_trans_g77_array (gfc_symbol * sym, gfc_wrapped_block * block)
     gfc_conv_string_length (sym->ts.u.cl, NULL, &init);
 
   /* Evaluate the bounds of the array.  */
-  gfc_trans_array_bounds (type, sym, &offset, &init);
+  gfc_trans_array_bounds (type, sym, &offset, &init, true);
 
   /* Set the offset.  */
   if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h
index 52f1c9a..8dbafb9 100644
--- a/gcc/fortran/trans-array.h
+++ b/gcc/fortran/trans-array.h
@@ -44,6 +44,8 @@ void gfc_trans_g77_array (gfc_symbol *, gfc_wrapped_block *);
 /* Generate code to deallocate an array, if it is allocated.  */
 tree gfc_trans_dealloc_allocated (tree, bool, gfc_expr *);
 
+tree gfc_trans_array_bounds (tree, gfc_symbol *, tree *, stmtblock_t *, bool);
+
 tree gfc_full_array_size (stmtblock_t *, tree, int);
 
 tree gfc_duplicate_allocatable (tree, tree, tree, int, tree);
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 8c1e897..f2e9803 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "arith.h"
 #include "omp-low.h"
 #include "gomp-constants.h"
+#include "hash-set.h"
+#include "tree-iterator.h"
 
 int ompws_flags;
 
@@ -2716,22 +2718,157 @@ gfc_trans_omp_code (gfc_code *code, bool force_empty)
   return stmt;
 }
 
+void gfc_debug_expr (gfc_expr *);
+
+/* Add any array that does not have an array descriptor to the hash_set
+   pointed to by DATA.  */
+
+static int
+gfc_scan_nodesc_arrays (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED,
+		void *data)
+{
+  hash_set<gfc_symbol *> *arrays = (hash_set<gfc_symbol *> *)data;
+
+  if ((*e)->expr_type == EXPR_VARIABLE)
+    {
+      gfc_symbol *sym = (*e)->symtree->n.sym;
+
+      if (sym->attr.dimension && gfc_is_nodesc_array (sym))
+	arrays->add (sym);
+    }
+
+  return 0;
+}
+
+/* Build a set of internal array variables (lbound, ubound, stride, etc.)
+   that need privatization.  */
+
+static tree
+gfc_privatize_nodesc_arrays_1 (tree *tp, int *walk_subtrees, void *data)
+{
+  hash_set<tree> *decls = (hash_set<tree> *)data;
+
+  if (TREE_CODE (*tp) == MODIFY_EXPR)
+    {
+      tree lhs = TREE_OPERAND (*tp, 0);
+      if (DECL_P (lhs))
+	decls->add (lhs);
+    }
+
+  if (IS_TYPE_OR_DECL_P (*tp))
+    *walk_subtrees = false;
+
+  return NULL;
+}
+
+/* Reinitialize all of the arrays inside ARRAY_SET in BLOCK.  Append private
+   clauses for those arrays in CLAUSES.  */
+
+static tree
+gfc_privatize_nodesc_arrays (hash_set<gfc_symbol *> *array_set,
+			     stmtblock_t *block, tree clauses)
+{
+  hash_set<gfc_symbol *>::iterator its = array_set->begin ();
+  hash_set<tree> *private_decls = new hash_set<tree>;
+
+  for (; its != array_set->end (); ++its)
+    {
+      gfc_symbol *sym = *its;
+      tree parm = sym->backend_decl;
+      tree type = TREE_TYPE (parm);
+      tree offset, tmp;
+
+      /* Evaluate the bounds of the array.  */
+      gfc_trans_array_bounds (type, sym, &offset, block, false);
+
+      /* Set the offset.  */
+      if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
+	gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset);
+
+      /* Set the pointer itself if we aren't using the parameter
+	 directly.  */
+      if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm)
+	  && GFC_DECL_SAVED_DESCRIPTOR (parm))
+	{
+	  tmp = convert (TREE_TYPE (parm),
+			 GFC_DECL_SAVED_DESCRIPTOR (parm));
+	  gfc_add_modify (block, parm, tmp);
+	}
+    }
+
+  /* Add private clauses for any variables that are used by
+     gfc_trans_array_bounds.  */
+  walk_tree_without_duplicates (&block->head, gfc_privatize_nodesc_arrays_1,
+				private_decls);
+
+  hash_set<tree>::iterator itt = private_decls->begin ();
+
+  for (; itt != private_decls->end (); ++itt)
+    {
+      tree nc = build_omp_clause (input_location, OMP_CLAUSE_PRIVATE);
+      OMP_CLAUSE_DECL (nc) = *itt;
+      OMP_CLAUSE_CHAIN (nc) = clauses;
+      clauses = nc;
+    }
+
+  delete private_decls;
+
+  return clauses;
+}
+
+/* Reinitialize any arrays in CLAUSES used inside CODE which do not contain
+   array descriptors if SCAN_NODESC_ARRAYS is TRUE.  Place the initialization
+   sequences in CODE.  Update CLAUSES to contain OMP_CLAUSE_PRIVATE for any
+   arrays which were initialized.  */
+
+static hash_set<gfc_symbol *> *
+gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code,
+			bool scan_nodesc_arrays)
+{
+  hash_set<gfc_symbol *> *array_set = NULL;
+
+  if (!scan_nodesc_arrays)
+    return NULL;
+
+  array_set = new hash_set<gfc_symbol *>;
+  gfc_code_walker (&code, gfc_dummy_code_callback, gfc_scan_nodesc_arrays,
+		   array_set);
+
+  if (array_set->elements ())
+    {
+      gfc_start_block (inner);
+      pushlevel ();
+      *clauses = gfc_privatize_nodesc_arrays (array_set, inner, *clauses);
+    }
+  else
+    {
+      delete array_set;
+      array_set = NULL;
+    }
+
+  return array_set;
+}
+
 /* Trans OpenACC directives. */
 /* parallel, kernels, data and host_data. */
 static tree
 gfc_trans_oacc_construct (gfc_code *code)
 {
-  stmtblock_t block;
+  stmtblock_t block, inner;
   tree stmt, oacc_clauses;
   enum tree_code construct_code;
+  bool scan_nodesc_arrays = false;
+  hash_set<gfc_symbol *> *array_set = NULL;
 
   switch (code->op)
     {
       case EXEC_OACC_PARALLEL:
 	construct_code = OACC_PARALLEL;
+	scan_nodesc_arrays = true;
 	break;
       case EXEC_OACC_KERNELS:
 	construct_code = OACC_KERNELS;
+	scan_nodesc_arrays = true;
 	break;
       case EXEC_OACC_DATA:
 	construct_code = OACC_DATA;
@@ -2746,10 +2883,25 @@ gfc_trans_oacc_construct (gfc_code *code)
   gfc_start_block (&block);
   oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses,
 					code->loc);
+
+  array_set = gfc_init_nodesc_arrays (&inner, &oacc_clauses, code,
+				      scan_nodesc_arrays);
+
   stmt = gfc_trans_omp_code (code->block->next, true);
+
+  if (array_set && array_set->elements ())
+    {
+      gfc_add_expr_to_block (&inner, stmt);
+      stmt = gfc_finish_block (&inner);
+      stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+      delete array_set;
+    }
+
   stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
 		     oacc_clauses);
+
   gfc_add_expr_to_block (&block, stmt);
+
   return gfc_finish_block (&block);
 }
 
@@ -3483,18 +3635,22 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
 static tree
 gfc_trans_oacc_combined_directive (gfc_code *code)
 {
-  stmtblock_t block, *pblock = NULL;
+  stmtblock_t block, inner, *pblock = NULL;
   gfc_omp_clauses construct_clauses, loop_clauses;
   tree stmt, oacc_clauses = NULL_TREE;
   enum tree_code construct_code;
+  bool scan_nodesc_arrays = false;
+  hash_set<gfc_symbol *> *array_set = NULL;
 
   switch (code->op)
     {
       case EXEC_OACC_PARALLEL_LOOP:
 	construct_code = OACC_PARALLEL;
+	scan_nodesc_arrays = true;
 	break;
       case EXEC_OACC_KERNELS_LOOP:
 	construct_code = OACC_KERNELS;
+	scan_nodesc_arrays = true;
 	break;
       default:
 	gcc_unreachable ();
@@ -3526,18 +3682,35 @@ gfc_trans_oacc_combined_directive (gfc_code *code)
       oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses,
 					    code->loc);
     }
+
+  array_set = gfc_init_nodesc_arrays (&inner, &oacc_clauses, code,
+				      scan_nodesc_arrays);
+
   if (!loop_clauses.seq)
-    pblock = &block;
+    pblock = (array_set && array_set->elements ()) ? &inner : &block;
   else
     pushlevel ();
   stmt = gfc_trans_omp_do (code, EXEC_OACC_LOOP, pblock, &loop_clauses, NULL);
+
+  if (array_set && array_set->elements ())
+    gfc_add_expr_to_block (&inner, stmt);
+
   if (TREE_CODE (stmt) != BIND_EXPR)
     stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
   else
     poplevel (0, 0);
+
+  if (array_set && array_set->elements ())
+    {
+      stmt = gfc_finish_block (&inner);
+      stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+      delete array_set;
+    }
+
   stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
 		     oacc_clauses);
   gfc_add_expr_to_block (&block, stmt);
+
   return gfc_finish_block (&block);
 }
 
diff --git a/gcc/testsuite/gfortran.dg/goacc/default_none.f95 b/gcc/testsuite/gfortran.dg/goacc/default_none.f95
new file mode 100644
index 0000000..5ce66ae
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/default_none.f95
@@ -0,0 +1,59 @@
+! Ensure that the internal array variables, offset, lbound, etc., don't
+! trigger errors with default(none).
+
+! { dg-do compile }
+
+program main
+  implicit none
+  integer i
+  integer,parameter :: n = 100
+  integer,allocatable :: a1(:), a2(:,:)
+
+  allocate (a1 (n))
+  allocate (a2 (-n:n,-n:n))
+  a1 (:) = -1
+
+  !$acc parallel loop default(none) copy (a1(1:n))
+  do i = 1,n
+     a1(i) = i
+  end do
+  !$acc end parallel loop
+
+  call foo (a1)
+  call bar (a1, n)
+  call foobar (a2,n)
+
+contains
+
+  subroutine foo (da1)
+    integer :: da1(n)
+
+    !$acc parallel loop default(none) copy (da1(1:n))
+    do i = 1,n
+       da1(i) = i*2
+    end do
+    !$acc end parallel loop
+  end subroutine foo
+end program main
+
+subroutine bar (da2,n)
+  integer :: n, da2(n)
+  integer i
+
+  !$acc parallel loop default(none) copy (da2(1:n)) firstprivate(n)
+  do i = 1,n
+     da2(i) = i*3
+  end do
+  !$acc end parallel loop
+end subroutine bar
+
+subroutine foobar (da3,n)
+  integer :: n, da3(-n:n,-n:n)
+  integer i
+
+  !$acc parallel loop default(none) copy (da3(-n:n,-n:n)) firstprivate(n)
+  do i = 1,n
+     da3(i,0) = i*3
+  end do
+  !$acc end parallel loop
+end subroutine foobar

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [gomp4] privatize internal array variables introduced by the fortran FE
  2015-10-13 20:12 [gomp4] privatize internal array variables introduced by the fortran FE Cesar Philippidis
@ 2015-10-13 20:29 ` Jakub Jelinek
  2015-10-13 20:40   ` Cesar Philippidis
  2015-10-14 11:51 ` Paul Richard Thomas
  2016-01-13  1:50 ` [gomp4] arrays inside modules Cesar Philippidis
  2 siblings, 1 reply; 9+ messages in thread
From: Jakub Jelinek @ 2015-10-13 20:29 UTC (permalink / raw)
  To: Cesar Philippidis; +Cc: Fortran List, gcc-patches

On Tue, Oct 13, 2015 at 01:12:25PM -0700, Cesar Philippidis wrote:
> Arrays in fortran have a couple of internal variables associated with
> them, e.g. stride, lbound, ubound, size, etc. Depending on how and where
> the array was declared, these internal variables may be packed inside an
> array descriptor represented by a struct or defined individually. The
> major problem with this is that kernels and parallel regions with
> default(none) will generate errors if those internal variables are
> defined individually since the user has no way to add clauses to them. I
> suspect this is also true for arrays inside omp target regions.

I believe gfc_omp_predetermined_sharing is supposed to handle this,
returning predetermined shared for certain DECL_ARTIFICIAL decls.
If you are not using that hook, perhaps you should have similar one tuned
for OpenACC purposes?

	Jakub

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [gomp4] privatize internal array variables introduced by the fortran FE
  2015-10-13 20:29 ` Jakub Jelinek
@ 2015-10-13 20:40   ` Cesar Philippidis
  0 siblings, 0 replies; 9+ messages in thread
From: Cesar Philippidis @ 2015-10-13 20:40 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Fortran List, gcc-patches

On 10/13/2015 01:29 PM, Jakub Jelinek wrote:
> On Tue, Oct 13, 2015 at 01:12:25PM -0700, Cesar Philippidis wrote:
>> Arrays in fortran have a couple of internal variables associated with
>> them, e.g. stride, lbound, ubound, size, etc. Depending on how and where
>> the array was declared, these internal variables may be packed inside an
>> array descriptor represented by a struct or defined individually. The
>> major problem with this is that kernels and parallel regions with
>> default(none) will generate errors if those internal variables are
>> defined individually since the user has no way to add clauses to them. I
>> suspect this is also true for arrays inside omp target regions.
> 
> I believe gfc_omp_predetermined_sharing is supposed to handle this,
> returning predetermined shared for certain DECL_ARTIFICIAL decls.
> If you are not using that hook, perhaps you should have similar one tuned
> for OpenACC purposes?

We do have one for openacc. I thought it's job was to mark variables as
firstprivate or pcopy as necessary. Anyway, it might be too late to call
gfc_omp_predetermined_sharing from the gimplifier from a performance
standpoint. Consider something like this:

  !$acc data copy (array)
  do i = 1,n
    !$acc parallel loop
     do j = 1,n
       ...array...
     end do
  end do
  !$acc end data

The problem here is that all of those internal variables would end up
getting marked as firstprivate. And that would cause more data to be
transferred to the accelerator. This patch reinitialized those variables
on the accelerator so they don't have to be transferred at all.

Cesar

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [gomp4] privatize internal array variables introduced by the fortran FE
  2015-10-13 20:12 [gomp4] privatize internal array variables introduced by the fortran FE Cesar Philippidis
  2015-10-13 20:29 ` Jakub Jelinek
@ 2015-10-14 11:51 ` Paul Richard Thomas
  2016-01-13  1:50 ` [gomp4] arrays inside modules Cesar Philippidis
  2 siblings, 0 replies; 9+ messages in thread
From: Paul Richard Thomas @ 2015-10-14 11:51 UTC (permalink / raw)
  To: Cesar Philippidis; +Cc: Fortran List, gcc-patches, Jakub Jelinek

Dear Cesar,

>
> Is there any reason why only certain arrays have array descriptors? The
> arrays with descriptors don't have this problem. It's only the ones
> without descriptors that leak new internal variables that cause errors
> with default(none).
>

This is an obvious question to which there is no obvious answer. When
asked it of one of the originators of gfortran, I was told that they
tried but got into some unspecified mess.

I would add the question as to why characters and scalars do not have
descriptors as well? One day, the volunteer maintainers will have
sorted out enough of the PRs to turn to issues like this. However,
simplification of this kind is just not on the cards at present.

Cheers

Paul

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [gomp4] arrays inside modules
@ 2016-01-13  1:50 ` Cesar Philippidis
  2019-01-25 14:19   ` [PATCH, OpenACC] Rework OpenACC Fortran DO loop initialization Gergö Barany
  0 siblings, 1 reply; 9+ messages in thread
From: Cesar Philippidis @ 2016-01-13  1:50 UTC (permalink / raw)
  To: gcc-patches, Fortran List

[-- Attachment #1: Type: text/plain, Size: 575 bytes --]

I've applied this patch to gomp-4_0-branch which fixes an ICE when an
array declared inside a module is used inside an offloaded acc region.
Bad things happen when you try to use sym->backend_decl when it wasn't
defined.

This patch was part of an optimization that I implemented in gomp4 in an
attempt to move all of the non-array descriptor array variables into the
offloaded region. Applications, such as cloverleaf, sometimes have a lot
of small offloaded regions, and treating those supplementary array
variables as firstprivate caused a measurable I/O overhead.

Cesar

[-- Attachment #2: gfc_array_privatization.diff --]
[-- Type: text/x-patch, Size: 11924 bytes --]

2015-10-13  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/fortran/
	* trans-array.c (gfc_trans_array_bounds): Add an INIT_VLA argument
	to control whether VLAs should be initialized.  Don't mark this
	function as static.
	(gfc_trans_auto_array_allocation): Update call to
	gfc_trans_array_bounds.
	(gfc_trans_g77_array): Likewise.
	* trans-array.h: Declare gfc_trans_array_bounds.
	* trans-openmp.c (gfc_scan_nodesc_arrays): New function.
	(gfc_privatize_nodesc_arrays_1): New function.
	(gfc_privatize_nodesc_arrays): New function.
	(gfc_init_nodesc_arrays): New function.
	(gfc_trans_oacc_construct): Initialize any internal variables for
	arrays without array descriptors inside the offloaded parallel and
	kernels region.
	(gfc_trans_oacc_combined_directive): Likewise.

	gcc/testsuite/
	* gfortran.dg/goacc/default_none.f95: New test.

diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index a6b761b..86f983a 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -5709,9 +5709,9 @@ gfc_trans_array_cobounds (tree type, stmtblock_t * pblock,
 /* Generate code to evaluate non-constant array bounds.  Sets *poffset and
    returns the size (in elements) of the array.  */
 
-static tree
+tree
 gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset,
-                        stmtblock_t * pblock)
+                        stmtblock_t * pblock, bool init_vla)
 {
   gfc_array_spec *as;
   tree size;
@@ -5788,7 +5788,9 @@ gfc_trans_array_bounds (tree type, gfc_symbol * sym, tree * poffset,
     }
 
   gfc_trans_array_cobounds (type, pblock, sym);
-  gfc_trans_vla_type_sizes (sym, pblock);
+
+  if (init_vla)
+    gfc_trans_vla_type_sizes (sym, pblock);
 
   *poffset = offset;
   return size;
@@ -5852,7 +5854,7 @@ gfc_trans_auto_array_allocation (tree decl, gfc_symbol * sym,
       && !INTEGER_CST_P (sym->ts.u.cl->backend_decl))
     gfc_conv_string_length (sym->ts.u.cl, NULL, &init);
 
-  size = gfc_trans_array_bounds (type, sym, &offset, &init);
+  size = gfc_trans_array_bounds (type, sym, &offset, &init, true);
 
   /* Don't actually allocate space for Cray Pointees.  */
   if (sym->attr.cray_pointee)
@@ -5947,7 +5949,7 @@ gfc_trans_g77_array (gfc_symbol * sym, gfc_wrapped_block * block)
     gfc_conv_string_length (sym->ts.u.cl, NULL, &init);
 
   /* Evaluate the bounds of the array.  */
-  gfc_trans_array_bounds (type, sym, &offset, &init);
+  gfc_trans_array_bounds (type, sym, &offset, &init, true);
 
   /* Set the offset.  */
   if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h
index 52f1c9a..8dbafb9 100644
--- a/gcc/fortran/trans-array.h
+++ b/gcc/fortran/trans-array.h
@@ -44,6 +44,8 @@ void gfc_trans_g77_array (gfc_symbol *, gfc_wrapped_block *);
 /* Generate code to deallocate an array, if it is allocated.  */
 tree gfc_trans_dealloc_allocated (tree, bool, gfc_expr *);
 
+tree gfc_trans_array_bounds (tree, gfc_symbol *, tree *, stmtblock_t *, bool);
+
 tree gfc_full_array_size (stmtblock_t *, tree, int);
 
 tree gfc_duplicate_allocatable (tree, tree, tree, int, tree);
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 8c1e897..f2e9803 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "arith.h"
 #include "omp-low.h"
 #include "gomp-constants.h"
+#include "hash-set.h"
+#include "tree-iterator.h"
 
 int ompws_flags;
 
@@ -2716,22 +2718,157 @@ gfc_trans_omp_code (gfc_code *code, bool force_empty)
   return stmt;
 }
 
+void gfc_debug_expr (gfc_expr *);
+
+/* Add any array that does not have an array descriptor to the hash_set
+   pointed to by DATA.  */
+
+static int
+gfc_scan_nodesc_arrays (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED,
+		void *data)
+{
+  hash_set<gfc_symbol *> *arrays = (hash_set<gfc_symbol *> *)data;
+
+  if ((*e)->expr_type == EXPR_VARIABLE)
+    {
+      gfc_symbol *sym = (*e)->symtree->n.sym;
+
+      if (sym->attr.dimension && gfc_is_nodesc_array (sym))
+	arrays->add (sym);
+    }
+
+  return 0;
+}
+
+/* Build a set of internal array variables (lbound, ubound, stride, etc.)
+   that need privatization.  */
+
+static tree
+gfc_privatize_nodesc_arrays_1 (tree *tp, int *walk_subtrees, void *data)
+{
+  hash_set<tree> *decls = (hash_set<tree> *)data;
+
+  if (TREE_CODE (*tp) == MODIFY_EXPR)
+    {
+      tree lhs = TREE_OPERAND (*tp, 0);
+      if (DECL_P (lhs))
+	decls->add (lhs);
+    }
+
+  if (IS_TYPE_OR_DECL_P (*tp))
+    *walk_subtrees = false;
+
+  return NULL;
+}
+
+/* Reinitialize all of the arrays inside ARRAY_SET in BLOCK.  Append private
+   clauses for those arrays in CLAUSES.  */
+
+static tree
+gfc_privatize_nodesc_arrays (hash_set<gfc_symbol *> *array_set,
+			     stmtblock_t *block, tree clauses)
+{
+  hash_set<gfc_symbol *>::iterator its = array_set->begin ();
+  hash_set<tree> *private_decls = new hash_set<tree>;
+
+  for (; its != array_set->end (); ++its)
+    {
+      gfc_symbol *sym = *its;
+      tree parm = sym->backend_decl;
+      tree type = TREE_TYPE (parm);
+      tree offset, tmp;
+
+      /* Evaluate the bounds of the array.  */
+      gfc_trans_array_bounds (type, sym, &offset, block, false);
+
+      /* Set the offset.  */
+      if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
+	gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset);
+
+      /* Set the pointer itself if we aren't using the parameter
+	 directly.  */
+      if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm)
+	  && GFC_DECL_SAVED_DESCRIPTOR (parm))
+	{
+	  tmp = convert (TREE_TYPE (parm),
+			 GFC_DECL_SAVED_DESCRIPTOR (parm));
+	  gfc_add_modify (block, parm, tmp);
+	}
+    }
+
+  /* Add private clauses for any variables that are used by
+     gfc_trans_array_bounds.  */
+  walk_tree_without_duplicates (&block->head, gfc_privatize_nodesc_arrays_1,
+				private_decls);
+
+  hash_set<tree>::iterator itt = private_decls->begin ();
+
+  for (; itt != private_decls->end (); ++itt)
+    {
+      tree nc = build_omp_clause (input_location, OMP_CLAUSE_PRIVATE);
+      OMP_CLAUSE_DECL (nc) = *itt;
+      OMP_CLAUSE_CHAIN (nc) = clauses;
+      clauses = nc;
+    }
+
+  delete private_decls;
+
+  return clauses;
+}
+
+/* Reinitialize any arrays in CLAUSES used inside CODE which do not contain
+   array descriptors if SCAN_NODESC_ARRAYS is TRUE.  Place the initialization
+   sequences in CODE.  Update CLAUSES to contain OMP_CLAUSE_PRIVATE for any
+   arrays which were initialized.  */
+
+static hash_set<gfc_symbol *> *
+gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code,
+			bool scan_nodesc_arrays)
+{
+  hash_set<gfc_symbol *> *array_set = NULL;
+
+  if (!scan_nodesc_arrays)
+    return NULL;
+
+  array_set = new hash_set<gfc_symbol *>;
+  gfc_code_walker (&code, gfc_dummy_code_callback, gfc_scan_nodesc_arrays,
+		   array_set);
+
+  if (array_set->elements ())
+    {
+      gfc_start_block (inner);
+      pushlevel ();
+      *clauses = gfc_privatize_nodesc_arrays (array_set, inner, *clauses);
+    }
+  else
+    {
+      delete array_set;
+      array_set = NULL;
+    }
+
+  return array_set;
+}
+
 /* Trans OpenACC directives. */
 /* parallel, kernels, data and host_data. */
 static tree
 gfc_trans_oacc_construct (gfc_code *code)
 {
-  stmtblock_t block;
+  stmtblock_t block, inner;
   tree stmt, oacc_clauses;
   enum tree_code construct_code;
+  bool scan_nodesc_arrays = false;
+  hash_set<gfc_symbol *> *array_set = NULL;
 
   switch (code->op)
     {
       case EXEC_OACC_PARALLEL:
 	construct_code = OACC_PARALLEL;
+	scan_nodesc_arrays = true;
 	break;
       case EXEC_OACC_KERNELS:
 	construct_code = OACC_KERNELS;
+	scan_nodesc_arrays = true;
 	break;
       case EXEC_OACC_DATA:
 	construct_code = OACC_DATA;
@@ -2746,10 +2883,25 @@ gfc_trans_oacc_construct (gfc_code *code)
   gfc_start_block (&block);
   oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses,
 					code->loc);
+
+  array_set = gfc_init_nodesc_arrays (&inner, &oacc_clauses, code,
+				      scan_nodesc_arrays);
+
   stmt = gfc_trans_omp_code (code->block->next, true);
+
+  if (array_set && array_set->elements ())
+    {
+      gfc_add_expr_to_block (&inner, stmt);
+      stmt = gfc_finish_block (&inner);
+      stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+      delete array_set;
+    }
+
   stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
 		     oacc_clauses);
+
   gfc_add_expr_to_block (&block, stmt);
+
   return gfc_finish_block (&block);
 }
 
@@ -3483,18 +3635,22 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
 static tree
 gfc_trans_oacc_combined_directive (gfc_code *code)
 {
-  stmtblock_t block, *pblock = NULL;
+  stmtblock_t block, inner, *pblock = NULL;
   gfc_omp_clauses construct_clauses, loop_clauses;
   tree stmt, oacc_clauses = NULL_TREE;
   enum tree_code construct_code;
+  bool scan_nodesc_arrays = false;
+  hash_set<gfc_symbol *> *array_set = NULL;
 
   switch (code->op)
     {
       case EXEC_OACC_PARALLEL_LOOP:
 	construct_code = OACC_PARALLEL;
+	scan_nodesc_arrays = true;
 	break;
       case EXEC_OACC_KERNELS_LOOP:
 	construct_code = OACC_KERNELS;
+	scan_nodesc_arrays = true;
 	break;
       default:
 	gcc_unreachable ();
@@ -3526,18 +3682,35 @@ gfc_trans_oacc_combined_directive (gfc_code *code)
       oacc_clauses = gfc_trans_omp_clauses (&block, &construct_clauses,
 					    code->loc);
     }
+
+  array_set = gfc_init_nodesc_arrays (&inner, &oacc_clauses, code,
+				      scan_nodesc_arrays);
+
   if (!loop_clauses.seq)
-    pblock = &block;
+    pblock = (array_set && array_set->elements ()) ? &inner : &block;
   else
     pushlevel ();
   stmt = gfc_trans_omp_do (code, EXEC_OACC_LOOP, pblock, &loop_clauses, NULL);
+
+  if (array_set && array_set->elements ())
+    gfc_add_expr_to_block (&inner, stmt);
+
   if (TREE_CODE (stmt) != BIND_EXPR)
     stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
   else
     poplevel (0, 0);
+
+  if (array_set && array_set->elements ())
+    {
+      stmt = gfc_finish_block (&inner);
+      stmt = build3_v (BIND_EXPR, NULL, stmt, poplevel (1, 0));
+      delete array_set;
+    }
+
   stmt = build2_loc (input_location, construct_code, void_type_node, stmt,
 		     oacc_clauses);
   gfc_add_expr_to_block (&block, stmt);
+
   return gfc_finish_block (&block);
 }
 
diff --git a/gcc/testsuite/gfortran.dg/goacc/default_none.f95 b/gcc/testsuite/gfortran.dg/goacc/default_none.f95
new file mode 100644
index 0000000..5ce66ae
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/default_none.f95
@@ -0,0 +1,59 @@
+! Ensure that the internal array variables, offset, lbound, etc., don't
+! trigger errors with default(none).
+
+! { dg-do compile }
+
+program main
+  implicit none
+  integer i
+  integer,parameter :: n = 100
+  integer,allocatable :: a1(:), a2(:,:)
+
+  allocate (a1 (n))
+  allocate (a2 (-n:n,-n:n))
+  a1 (:) = -1
+
+  !$acc parallel loop default(none) copy (a1(1:n))
+  do i = 1,n
+     a1(i) = i
+  end do
+  !$acc end parallel loop
+
+  call foo (a1)
+  call bar (a1, n)
+  call foobar (a2,n)
+
+contains
+
+  subroutine foo (da1)
+    integer :: da1(n)
+
+    !$acc parallel loop default(none) copy (da1(1:n))
+    do i = 1,n
+       da1(i) = i*2
+    end do
+    !$acc end parallel loop
+  end subroutine foo
+end program main
+
+subroutine bar (da2,n)
+  integer :: n, da2(n)
+  integer i
+
+  !$acc parallel loop default(none) copy (da2(1:n)) firstprivate(n)
+  do i = 1,n
+     da2(i) = i*3
+  end do
+  !$acc end parallel loop
+end subroutine bar
+
+subroutine foobar (da3,n)
+  integer :: n, da3(-n:n,-n:n)
+  integer i
+
+  !$acc parallel loop default(none) copy (da3(-n:n,-n:n)) firstprivate(n)
+  do i = 1,n
+     da3(i,0) = i*3
+  end do
+  !$acc end parallel loop
+end subroutine foobar

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH, OpenACC] Rework OpenACC Fortran DO loop initialization
@ 2019-01-25 14:19   ` Gergö Barany
  2019-01-28 20:15     ` Thomas Schwinge
  2019-02-19 12:28     ` [PATCH, OpenACC, og8] Fix incorrect test case Gergö Barany
  0 siblings, 2 replies; 9+ messages in thread
From: Gergö Barany @ 2019-01-25 14:19 UTC (permalink / raw)
  To: gcc-patches; +Cc: Thomas Schwinge

[-- Attachment #1: Type: text/plain, Size: 642 bytes --]

This patch moves OpenACC Fortan DO loop setup code from the head of a 
region to just before each loop. This is in preparation for upcoming 
patches reworking the handling of OpenACC kernels regions.

OK for openacc-gcc-8-branch?

Thanks,
Gergö


     gcc/fortran/
     * trans-openmp.c (gfc_privatize_nodesc_array_clauses): Renamed from
     gfc_privatize_nodesc_arrays, initialization part factored out to...
     (gfc_reinitialize_privatized_arrays): ... this new function, called...
     (gfc_trans_omp_do): ... from here for OpenACC loops.

     libgomp/
     * testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90: New 
test.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Rework-OpenACC-Fortran-DO-loop-initialization.patch --]
[-- Type: text/x-patch; name="0001-Rework-OpenACC-Fortran-DO-loop-initialization.patch", Size: 8359 bytes --]

From f4768a88a4e2ab5dc80feb7bfb06cd273c849f72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gerg=C3=B6=20Barany?= <gergo@codesourcery.com>
Date: Mon, 21 Jan 2019 03:08:57 -0800
Subject: [PATCH] Rework OpenACC Fortran DO loop initialization

Fortran DO loops on arrays with non-constant bounds (like a(lo:hi)) need
special setup code to compute the bounds and offsets for the iteration. In
an OpenACC region containing multiple loops, this used to be done in a block
of code at the start of the region for all of the loops. But the upcoming
kernels conversion expects this kind of setup code to immediately precede
the corresponding loop, and variables are not mapped correctly otherwise.
This patch separates out the initialization part for each loop and places it
immediately before the loop.

    gcc/fortran/
    * trans-openmp.c (gfc_privatize_nodesc_array_clauses): Renamed from
    gfc_privatize_nodesc_arrays, initialization part factored out to...
    (gfc_reinitialize_privatized_arrays): ... this new function, called...
    (gfc_trans_omp_do): ... from here for OpenACC loops.

    libgomp/
    * testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90: New test.
---
 gcc/fortran/ChangeLog.openacc                      |  7 ++
 gcc/fortran/trans-openmp.c                         | 86 +++++++++++++---------
 libgomp/ChangeLog.openacc                          |  4 +
 .../initialize_kernels_loops.f90                   | 31 ++++++++
 4 files changed, 92 insertions(+), 36 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90

diff --git a/gcc/fortran/ChangeLog.openacc b/gcc/fortran/ChangeLog.openacc
index 0f31f3e..450056d 100644
--- a/gcc/fortran/ChangeLog.openacc
+++ b/gcc/fortran/ChangeLog.openacc
@@ -1,3 +1,10 @@
+2019-01-24  Gergö Barany  <gergo@codesourcery.com>
+
+	* trans-openmp.c (gfc_privatize_nodesc_array_clauses): Renamed from
+	gfc_privatize_nodesc_arrays, initialization part factored out to...
+	(gfc_reinitialize_privatized_arrays): ... this new function, called...
+	(gfc_trans_omp_do): ... from here for OpenACC loops.
+
 2019-01-09  Julian Brown  <julian@codesourcery.com>
 
 	* cpp.c (cpp_define_builtins): Update _OPENACC define to 201711.
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index d5dbf18..5a444c3 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -3198,6 +3198,44 @@ gfc_scan_nodesc_arrays (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED,
   return 0;
 }
 
+/* Reinitialize any arrays used inside CODE.  Place the initialization
+   sequences in CODE.  */
+
+static void
+gfc_reinitialize_privatized_arrays (gfc_code *code, stmtblock_t *block)
+{
+  hash_set <gfc_symbol *> *array_set = new hash_set <gfc_symbol *> ();
+  gfc_code_walker (&code, gfc_dummy_code_callback, gfc_scan_nodesc_arrays,
+		   array_set);
+
+  hash_set<gfc_symbol *>::iterator its = array_set->begin ();
+
+  for (; its != array_set->end (); ++its)
+    {
+      gfc_symbol *sym = *its;
+      tree parm = gfc_get_symbol_decl (sym);
+      tree type = TREE_TYPE (parm);
+      tree offset, tmp;
+
+      /* Evaluate the bounds of the array.  */
+      gfc_trans_array_bounds (type, sym, &offset, block, false);
+
+      /* Set the offset.  */
+      if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
+        gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset);
+
+      /* Set the pointer itself if we aren't using the parameter
+         directly.  */
+      if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm)
+          && GFC_DECL_SAVED_DESCRIPTOR (parm))
+        {
+          tmp = convert (TREE_TYPE (parm),
+                         GFC_DECL_SAVED_DESCRIPTOR (parm));
+          gfc_add_modify (block, parm, tmp);
+        }
+    }
+}
+
 /* Build a set of internal array variables (lbound, ubound, stride, etc.)
    that need privatization.  */
 
@@ -3219,41 +3257,12 @@ gfc_privatize_nodesc_arrays_1 (tree *tp, int *walk_subtrees, void *data)
   return NULL;
 }
 
-/* Reinitialize all of the arrays inside ARRAY_SET in BLOCK.  Append private
-   clauses for those arrays in CLAUSES.  */
+/* Append private clauses for the arrays in BLOCK to CLAUSES.  */
 
 static tree
-gfc_privatize_nodesc_arrays (hash_set<gfc_symbol *> *array_set,
-			     stmtblock_t *block, tree clauses)
+gfc_privatize_nodesc_array_clauses (stmtblock_t *block, tree clauses)
 {
-  hash_set<gfc_symbol *>::iterator its = array_set->begin ();
   hash_set<tree> *private_decls = new hash_set<tree>;
-
-  for (; its != array_set->end (); ++its)
-    {
-      gfc_symbol *sym = *its;
-      tree parm = gfc_get_symbol_decl (sym);
-      tree type = TREE_TYPE (parm);
-      tree offset, tmp;
-
-      /* Evaluate the bounds of the array.  */
-      gfc_trans_array_bounds (type, sym, &offset, block, false);
-
-      /* Set the offset.  */
-      if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
-	gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset);
-
-      /* Set the pointer itself if we aren't using the parameter
-	 directly.  */
-      if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm)
-	  && GFC_DECL_SAVED_DESCRIPTOR (parm))
-	{
-	  tmp = convert (TREE_TYPE (parm),
-			 GFC_DECL_SAVED_DESCRIPTOR (parm));
-	  gfc_add_modify (block, parm, tmp);
-	}
-    }
-
   /* Add private clauses for any variables that are used by
      gfc_trans_array_bounds.  */
   walk_tree_without_duplicates (&block->head, gfc_privatize_nodesc_arrays_1,
@@ -3274,10 +3283,9 @@ gfc_privatize_nodesc_arrays (hash_set<gfc_symbol *> *array_set,
   return clauses;
 }
 
-/* Reinitialize any arrays in CLAUSES used inside CODE which do not contain
-   array descriptors if SCAN_NODESC_ARRAYS is TRUE.  Place the initialization
-   sequences in CODE.  Update CLAUSES to contain OMP_CLAUSE_PRIVATE for any
-   arrays which were initialized.  */
+/* Collect any arrays in CLAUSES used inside CODE which do not contain
+   array descriptors if SCAN_NODESC_ARRAYS is TRUE.  Update CLAUSES to
+   contain OMP_CLAUSE_PRIVATE for any arrays found.  */
 
 static hash_set<gfc_symbol *> *
 gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code,
@@ -3296,7 +3304,7 @@ gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code,
     {
       gfc_start_block (inner);
       pushlevel ();
-      *clauses = gfc_privatize_nodesc_arrays (array_set, inner, *clauses);
+      *clauses = gfc_privatize_nodesc_array_clauses (inner, *clauses);
     }
   else
     {
@@ -3856,6 +3864,12 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
 
   omp_clauses = gfc_trans_omp_clauses (pblock, do_clauses, code->loc);
 
+  /* Make sure that setup code reinitializing array bounds, offsets, and
+     strides immediately precedes the loop.  This is where the conversion of
+     OpenACC kernels to parallel regions expects it.  */
+  if (op == EXEC_OACC_LOOP)
+    gfc_reinitialize_privatized_arrays (code, pblock);
+
   for (i = 0; i < collapse; i++)
     {
       int simple = 0;
diff --git a/libgomp/ChangeLog.openacc b/libgomp/ChangeLog.openacc
index c0ee88b..2e23feb 100644
--- a/libgomp/ChangeLog.openacc
+++ b/libgomp/ChangeLog.openacc
@@ -1,3 +1,7 @@
+2019-01-24  Gergö Barany  <gergo@codesourcery.com>
+
+	* testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90: New test.
+
 2019-01-09  Julian Brown  <julian@codesourcery.com>
 
 	* acc_prof.h (_ACC_PROF_INFO_VERSION): Update to 201711.
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90 b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
new file mode 100644
index 0000000..fbae8cf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
@@ -0,0 +1,31 @@
+subroutine kernel(lo, hi, a, b, c)
+    implicit none
+    integer :: lo, hi, i
+    real, dimension(lo:hi) :: a, b, c
+
+!$acc kernels
+!$acc loop independent
+    do i = lo, hi
+      b(i) = a(i)
+    end do
+!$acc loop independent
+    do i = lo, hi
+      c(i) = b(i)
+    end do
+!$acc end kernels
+end subroutine kernel
+
+program main
+    integer :: n = 20
+    real, dimension(1:20) :: a, b, c
+
+    a(:) = 1
+    b(:) = 2
+    c(:) = 3
+
+    call kernel(1, n, a, b, c)
+
+    do i = 1, n
+        if (c(i) .ne. 1) call abort
+    end do
+end program main
-- 
2.8.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH, OpenACC] Rework OpenACC Fortran DO loop initialization
  2019-01-25 14:19   ` [PATCH, OpenACC] Rework OpenACC Fortran DO loop initialization Gergö Barany
@ 2019-01-28 20:15     ` Thomas Schwinge
  2019-02-19 12:28     ` [PATCH, OpenACC, og8] Fix incorrect test case Gergö Barany
  1 sibling, 0 replies; 9+ messages in thread
From: Thomas Schwinge @ 2019-01-28 20:15 UTC (permalink / raw)
  To: Gergö Barany; +Cc: gcc-patches, fortran

[-- Attachment #1: Type: text/plain, Size: 10471 bytes --]

Hi Gergő!

(We're asked for to CC the GCC Fortran developers, <fortran@gcc.gnu.org>,
for any Fortran related changes.)

On Fri, 25 Jan 2019 15:13:48 +0100, Gergö Barany <gergo_barany@mentor.com> wrote:
> This patch moves OpenACC Fortan DO loop setup code from the head of a 
> region to just before each loop.

The code that you're modifying: "gfc_privatize_nodesc_arrays" has been
added by Cesar, years ago, and is not yet in GCC trunk, and I'm not at
all familiar with that stuff.
<http://mid.mail-archive.com/561D65A9.3060903@codesourcery.com>,
<http://mid.mail-archive.com/5695AD6B.5060802@codesourcery.com>.  (We
shall re-visit that in next GCC development stage 1.)

That all said, your rationale of why/how you're changing the code seems
reasonable, and as I understand, you're mostly moving existing code
around, so...

> This is in preparation for upcoming 
> patches reworking the handling of OpenACC kernels regions.
> 
> OK for openacc-gcc-8-branch?

OK.

The test case (which is not really too useful right now, but I understand
will be later) needs to get a "dg-do run" directive added (to enable
"torture testing": cycling through optimization and other flags, which is
done (only) for Fortran, "for reasons"), and then needs to be "avoid
offloading" XFAILed, which we shall be able to remove as part of later
changes, yay.  I'm attaching a patch that you can squeeze in as is.


Grüße
 Thomas


> From f4768a88a4e2ab5dc80feb7bfb06cd273c849f72 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Gerg=C3=B6=20Barany?= <gergo@codesourcery.com>
> Date: Mon, 21 Jan 2019 03:08:57 -0800
> Subject: [PATCH] Rework OpenACC Fortran DO loop initialization
> 
> Fortran DO loops on arrays with non-constant bounds (like a(lo:hi)) need
> special setup code to compute the bounds and offsets for the iteration. In
> an OpenACC region containing multiple loops, this used to be done in a block
> of code at the start of the region for all of the loops. But the upcoming
> kernels conversion expects this kind of setup code to immediately precede
> the corresponding loop, and variables are not mapped correctly otherwise.
> This patch separates out the initialization part for each loop and places it
> immediately before the loop.
> 
>     gcc/fortran/
>     * trans-openmp.c (gfc_privatize_nodesc_array_clauses): Renamed from
>     gfc_privatize_nodesc_arrays, initialization part factored out to...
>     (gfc_reinitialize_privatized_arrays): ... this new function, called...
>     (gfc_trans_omp_do): ... from here for OpenACC loops.
> 
>     libgomp/
>     * testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90: New test.
> ---
>  gcc/fortran/ChangeLog.openacc                      |  7 ++
>  gcc/fortran/trans-openmp.c                         | 86 +++++++++++++---------
>  libgomp/ChangeLog.openacc                          |  4 +
>  .../initialize_kernels_loops.f90                   | 31 ++++++++
>  4 files changed, 92 insertions(+), 36 deletions(-)
>  create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
> 
> diff --git a/gcc/fortran/ChangeLog.openacc b/gcc/fortran/ChangeLog.openacc
> index 0f31f3e..450056d 100644
> --- a/gcc/fortran/ChangeLog.openacc
> +++ b/gcc/fortran/ChangeLog.openacc
> @@ -1,3 +1,10 @@
> +2019-01-24  Gergö Barany  <gergo@codesourcery.com>
> +
> +	* trans-openmp.c (gfc_privatize_nodesc_array_clauses): Renamed from
> +	gfc_privatize_nodesc_arrays, initialization part factored out to...
> +	(gfc_reinitialize_privatized_arrays): ... this new function, called...
> +	(gfc_trans_omp_do): ... from here for OpenACC loops.
> +
>  2019-01-09  Julian Brown  <julian@codesourcery.com>
>  
>  	* cpp.c (cpp_define_builtins): Update _OPENACC define to 201711.
> diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
> index d5dbf18..5a444c3 100644
> --- a/gcc/fortran/trans-openmp.c
> +++ b/gcc/fortran/trans-openmp.c
> @@ -3198,6 +3198,44 @@ gfc_scan_nodesc_arrays (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED,
>    return 0;
>  }
>  
> +/* Reinitialize any arrays used inside CODE.  Place the initialization
> +   sequences in CODE.  */
> +
> +static void
> +gfc_reinitialize_privatized_arrays (gfc_code *code, stmtblock_t *block)
> +{
> +  hash_set <gfc_symbol *> *array_set = new hash_set <gfc_symbol *> ();
> +  gfc_code_walker (&code, gfc_dummy_code_callback, gfc_scan_nodesc_arrays,
> +		   array_set);
> +
> +  hash_set<gfc_symbol *>::iterator its = array_set->begin ();
> +
> +  for (; its != array_set->end (); ++its)
> +    {
> +      gfc_symbol *sym = *its;
> +      tree parm = gfc_get_symbol_decl (sym);
> +      tree type = TREE_TYPE (parm);
> +      tree offset, tmp;
> +
> +      /* Evaluate the bounds of the array.  */
> +      gfc_trans_array_bounds (type, sym, &offset, block, false);
> +
> +      /* Set the offset.  */
> +      if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
> +        gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset);
> +
> +      /* Set the pointer itself if we aren't using the parameter
> +         directly.  */
> +      if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm)
> +          && GFC_DECL_SAVED_DESCRIPTOR (parm))
> +        {
> +          tmp = convert (TREE_TYPE (parm),
> +                         GFC_DECL_SAVED_DESCRIPTOR (parm));
> +          gfc_add_modify (block, parm, tmp);
> +        }
> +    }
> +}
> +
>  /* Build a set of internal array variables (lbound, ubound, stride, etc.)
>     that need privatization.  */
>  
> @@ -3219,41 +3257,12 @@ gfc_privatize_nodesc_arrays_1 (tree *tp, int *walk_subtrees, void *data)
>    return NULL;
>  }
>  
> -/* Reinitialize all of the arrays inside ARRAY_SET in BLOCK.  Append private
> -   clauses for those arrays in CLAUSES.  */
> +/* Append private clauses for the arrays in BLOCK to CLAUSES.  */
>  
>  static tree
> -gfc_privatize_nodesc_arrays (hash_set<gfc_symbol *> *array_set,
> -			     stmtblock_t *block, tree clauses)
> +gfc_privatize_nodesc_array_clauses (stmtblock_t *block, tree clauses)
>  {
> -  hash_set<gfc_symbol *>::iterator its = array_set->begin ();
>    hash_set<tree> *private_decls = new hash_set<tree>;
> -
> -  for (; its != array_set->end (); ++its)
> -    {
> -      gfc_symbol *sym = *its;
> -      tree parm = gfc_get_symbol_decl (sym);
> -      tree type = TREE_TYPE (parm);
> -      tree offset, tmp;
> -
> -      /* Evaluate the bounds of the array.  */
> -      gfc_trans_array_bounds (type, sym, &offset, block, false);
> -
> -      /* Set the offset.  */
> -      if (TREE_CODE (GFC_TYPE_ARRAY_OFFSET (type)) == VAR_DECL)
> -	gfc_add_modify (block, GFC_TYPE_ARRAY_OFFSET (type), offset);
> -
> -      /* Set the pointer itself if we aren't using the parameter
> -	 directly.  */
> -      if (TREE_CODE (parm) != PARM_DECL && DECL_LANG_SPECIFIC (parm)
> -	  && GFC_DECL_SAVED_DESCRIPTOR (parm))
> -	{
> -	  tmp = convert (TREE_TYPE (parm),
> -			 GFC_DECL_SAVED_DESCRIPTOR (parm));
> -	  gfc_add_modify (block, parm, tmp);
> -	}
> -    }
> -
>    /* Add private clauses for any variables that are used by
>       gfc_trans_array_bounds.  */
>    walk_tree_without_duplicates (&block->head, gfc_privatize_nodesc_arrays_1,
> @@ -3274,10 +3283,9 @@ gfc_privatize_nodesc_arrays (hash_set<gfc_symbol *> *array_set,
>    return clauses;
>  }
>  
> -/* Reinitialize any arrays in CLAUSES used inside CODE which do not contain
> -   array descriptors if SCAN_NODESC_ARRAYS is TRUE.  Place the initialization
> -   sequences in CODE.  Update CLAUSES to contain OMP_CLAUSE_PRIVATE for any
> -   arrays which were initialized.  */
> +/* Collect any arrays in CLAUSES used inside CODE which do not contain
> +   array descriptors if SCAN_NODESC_ARRAYS is TRUE.  Update CLAUSES to
> +   contain OMP_CLAUSE_PRIVATE for any arrays found.  */
>  
>  static hash_set<gfc_symbol *> *
>  gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code,
> @@ -3296,7 +3304,7 @@ gfc_init_nodesc_arrays (stmtblock_t *inner, tree *clauses, gfc_code *code,
>      {
>        gfc_start_block (inner);
>        pushlevel ();
> -      *clauses = gfc_privatize_nodesc_arrays (array_set, inner, *clauses);
> +      *clauses = gfc_privatize_nodesc_array_clauses (inner, *clauses);
>      }
>    else
>      {
> @@ -3856,6 +3864,12 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
>  
>    omp_clauses = gfc_trans_omp_clauses (pblock, do_clauses, code->loc);
>  
> +  /* Make sure that setup code reinitializing array bounds, offsets, and
> +     strides immediately precedes the loop.  This is where the conversion of
> +     OpenACC kernels to parallel regions expects it.  */
> +  if (op == EXEC_OACC_LOOP)
> +    gfc_reinitialize_privatized_arrays (code, pblock);
> +
>    for (i = 0; i < collapse; i++)
>      {
>        int simple = 0;
> diff --git a/libgomp/ChangeLog.openacc b/libgomp/ChangeLog.openacc
> index c0ee88b..2e23feb 100644
> --- a/libgomp/ChangeLog.openacc
> +++ b/libgomp/ChangeLog.openacc
> @@ -1,3 +1,7 @@
> +2019-01-24  Gergö Barany  <gergo@codesourcery.com>
> +
> +	* testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90: New test.
> +
>  2019-01-09  Julian Brown  <julian@codesourcery.com>
>  
>  	* acc_prof.h (_ACC_PROF_INFO_VERSION): Update to 201711.
> diff --git a/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90 b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
> new file mode 100644
> index 0000000..fbae8cf
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
> @@ -0,0 +1,31 @@
> +subroutine kernel(lo, hi, a, b, c)
> +    implicit none
> +    integer :: lo, hi, i
> +    real, dimension(lo:hi) :: a, b, c
> +
> +!$acc kernels
> +!$acc loop independent
> +    do i = lo, hi
> +      b(i) = a(i)
> +    end do
> +!$acc loop independent
> +    do i = lo, hi
> +      c(i) = b(i)
> +    end do
> +!$acc end kernels
> +end subroutine kernel
> +
> +program main
> +    integer :: n = 20
> +    real, dimension(1:20) :: a, b, c
> +
> +    a(:) = 1
> +    b(:) = 2
> +    c(:) = 3
> +
> +    call kernel(1, n, a, b, c)
> +
> +    do i = 1, n
> +        if (c(i) .ne. 1) call abort
> +    end do
> +end program main



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-into-Rework-OpenACC-Fortran-DO-loop-initialization.patch --]
[-- Type: text/x-diff, Size: 1023 bytes --]

From 26bf07eff5761acc361300bad365944afd7a4e9d Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Mon, 28 Jan 2019 21:00:25 +0100
Subject: [PATCH] into Rework OpenACC Fortran DO loop initialization

---
 .../libgomp.oacc-fortran/initialize_kernels_loops.f90        | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90 b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
index fbae8cf67cc..6d1713157b7 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
@@ -1,3 +1,8 @@
+! { dg-do run }
+!TODO
+! warning: OpenACC kernels construct will be executed sequentially; will by default avoid offloading to prevent data copy penalty
+! { dg-xfail-if "TODO" { openacc_nvidia_accel_selected } { "*" } { "-O0" "-O1" } }
+
 subroutine kernel(lo, hi, a, b, c)
     implicit none
     integer :: lo, hi, i
-- 
2.17.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH, OpenACC, og8] Fix incorrect test case
@ 2019-02-19 12:28     ` Gergö Barany
  2020-11-13 22:06       ` Add 'libgomp.oacc-fortran/pr94358-1.f90' [PR94358] (was: [PATCH, OpenACC] Rework OpenACC Fortran DO loop initialization) Thomas Schwinge
  0 siblings, 1 reply; 9+ messages in thread
From: Gergö Barany @ 2019-02-19 12:28 UTC (permalink / raw)
  To: gcc-patches; +Cc: Thomas Schwinge

[-- Attachment #1: Type: text/plain, Size: 383 bytes --]

Hi,

This patch fixes a broken test case I added in a recent patch: 
https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01504.html

The broken test can segfault due to writing to read-only memory, which 
this new version avoids. Will apply to the openacc-gcc-8-branch shortly.


Thanks,
Gergö


     libgomp/
     * testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90: Update.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-og8-Fix-incorrect-test-case.patch --]
[-- Type: text/x-patch; name="0001-og8-Fix-incorrect-test-case.patch", Size: 1752 bytes --]

From 65bf7b1656d4cffa2bd057c2e3a2129d449d04a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gerg=C3=B6=20Barany?= <gergo@codesourcery.com>
Date: Tue, 19 Feb 2019 04:17:26 -0800
Subject: [PATCH] [og8] Fix incorrect test case

OpenACC kernels regions implicitly copy variables to and from the device,
but in this test case a loop bound lives in read-only memory and the test
can segfault.

    libgomp/
    * testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90: Update.
---
 libgomp/ChangeLog.openacc                                           | 4 ++++
 libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90 | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/libgomp/ChangeLog.openacc b/libgomp/ChangeLog.openacc
index 96908d1..3485c9d 100644
--- a/libgomp/ChangeLog.openacc
+++ b/libgomp/ChangeLog.openacc
@@ -1,3 +1,7 @@
+2019-02-19  Gergö Barany  <gergo@codesourcery.com>
+
+	* testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90: Update.
+
 2019-01-31  Thomas Schwinge  <thomas@codesourcery.com>
 
 	* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c:
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90 b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
index e8b4f3a..990a8ef 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
@@ -6,7 +6,7 @@ subroutine kernel(lo, hi, a, b, c)
     integer :: lo, hi, i
     real, dimension(lo:hi) :: a, b, c
 
-!$acc kernels
+!$acc kernels copyin(lo, hi)
 !$acc loop independent ! { dg-warning "note: parallelized loop nest in OpenACC .kernels. construct" }
     do i = lo, hi
       b(i) = a(i)
-- 
2.8.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Add 'libgomp.oacc-fortran/pr94358-1.f90' [PR94358] (was: [PATCH, OpenACC] Rework OpenACC Fortran DO loop initialization)
  2019-02-19 12:28     ` [PATCH, OpenACC, og8] Fix incorrect test case Gergö Barany
@ 2020-11-13 22:06       ` Thomas Schwinge
  0 siblings, 0 replies; 9+ messages in thread
From: Thomas Schwinge @ 2020-11-13 22:06 UTC (permalink / raw)
  To: gcc-patches; +Cc: fortran

[-- Attachment #1: Type: text/plain, Size: 855 bytes --]

Hi!

The whole topic of GCC PR94358 "[OMP] Privatize internal array variables
introduced by the Fortran FE" is yet to be resolved, but we may already
now add Gergő's testcase:

On 2019-01-25T15:13:48+0100, Gergö Barany <gergo_barany@mentor.com> wrote:
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/initialize_kernels_loops.f90
> @@ -0,0 +1,31 @@
> +[...]

... to document the status quo, and so that it may help highlight any
behavioral changes later on.  I've pushed "Add
'libgomp.oacc-fortran/pr94358-1.f90' [PR94358]" to master branch in
commit d1ba078d9bcc3457d36ba12695cfef29eb3ca942, see attached.


Grüße
 Thomas


-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Add-libgomp.oacc-fortran-pr94358-1.f90-PR94358.patch --]
[-- Type: text/x-diff, Size: 1630 bytes --]

From d1ba078d9bcc3457d36ba12695cfef29eb3ca942 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gerg=C3=B6=20Barany?= <gergo@codesourcery.com>
Date: Mon, 21 Jan 2019 03:08:57 -0800
Subject: [PATCH] Add 'libgomp.oacc-fortran/pr94358-1.f90' [PR94358]

Document status quo re PR94358 "[OMP] Privatize internal array variables
introduced by the Fortran FE".

	libgomp/
	PR fortran/94358
	* testsuite/libgomp.oacc-fortran/pr94358-1.f90: New.

Co-authored-by: Thomas Schwinge <thomas@codesourcery.com>
---
 .../libgomp.oacc-fortran/pr94358-1.f90        | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90

diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90
new file mode 100644
index 00000000000..5013c5ba04b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pr94358-1.f90
@@ -0,0 +1,34 @@
+! { dg-do run }
+! { dg-additional-options "-fopt-info-omp-all" }
+
+subroutine kernel(lo, hi, a, b, c)
+  implicit none
+  integer :: lo, hi, i
+  real, dimension(lo:hi) :: a, b, c
+
+  !$acc kernels copyin(lo, hi) ! { dg-optimized "assigned OpenACC seq loop parallelism" }
+  !$acc loop independent
+  do i = lo, hi
+     b(i) = a(i)
+  end do
+  !$acc loop independent
+  do i = lo, hi
+     c(i) = b(i)
+  end do
+  !$acc end kernels
+end subroutine kernel
+
+program main
+  integer :: n = 20
+  real, dimension(1:20) :: a, b, c
+
+  a(:) = 1
+  b(:) = 2
+  c(:) = 3
+
+  call kernel(1, n, a, b, c)
+
+  do i = 1, n
+     if (c(i) .ne. 1) call abort
+  end do
+end program main
-- 
2.17.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-11-13 22:07 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-13 20:12 [gomp4] privatize internal array variables introduced by the fortran FE Cesar Philippidis
2015-10-13 20:29 ` Jakub Jelinek
2015-10-13 20:40   ` Cesar Philippidis
2015-10-14 11:51 ` Paul Richard Thomas
2016-01-13  1:50 ` [gomp4] arrays inside modules Cesar Philippidis
2019-01-25 14:19   ` [PATCH, OpenACC] Rework OpenACC Fortran DO loop initialization Gergö Barany
2019-01-28 20:15     ` Thomas Schwinge
2019-02-19 12:28     ` [PATCH, OpenACC, og8] Fix incorrect test case Gergö Barany
2020-11-13 22:06       ` Add 'libgomp.oacc-fortran/pr94358-1.f90' [PR94358] (was: [PATCH, OpenACC] Rework OpenACC Fortran DO loop initialization) Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).