public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch,gomp-4_0-branch] acc nested function support
@ 2014-07-18 21:41 Cesar Philippidis
  2014-07-29  8:30 ` Thomas Schwinge
  2014-11-05  0:45 ` Cesar Philippidis
  0 siblings, 2 replies; 13+ messages in thread
From: Cesar Philippidis @ 2014-07-18 21:41 UTC (permalink / raw)
  To: Thomas Schwinge, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 364 bytes --]

Hi Thomas,

This patch enables acc constructs to be used inside nested functions. I
doubt nested functions will be used much in c, but some of the openacc
fortran tutorials I've seen online make use of internal subroutines in
fortran. Those internal subroutines are implemented as nested functions.

Does this look OK to commit to gomp-4_0-branch?

Thanks,
Cesar


[-- Attachment #2: nested-function-base.diff --]
[-- Type: text/x-patch, Size: 15966 bytes --]

2014-07-17  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* gcc/gimple.h (gimple_statement_oacc_kernels,
	gimple_statment_oacc_parallel): Derive from
	gimple_statement_omp_taskreg instestead of
	gimple_statement_omp_parallel_layout.
	(is_a_helper <gimple_statement_omp_taskreg *>::test): Permit gimple
	codes GIMPLE_OACC_PARALLEL and GIMPLE_OACC_KERNELS.
	(is_a_helper <const gimple_statement_omp_taskreg *>::test): Likewise.
	*tree-nested.c (walk_gimple_omp_for): Handle openacc kernels and
	parallel constructs.
	(convert_nonlocal_reference_stmt): Likewise.
	(convert_local_reference_stmt): Likewise.
	(convert_tramp_reference_stmt): Likewise.
	(convert_gimple_call): Likewise.

	gcc/testsuite/
	* c-c++-common/goacc/nested-function-1.c: New test.
	* gfortran.dg/goacc/cray-2.f95: New test.
	* gfortran.dg/goacc/loop-4.f95: New test.
	* gfortran.dg/goacc/loop-5.f95: New test.


diff --git a/gcc/gimple.h b/gcc/gimple.h
index 68d1745..d45010f 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -576,22 +576,6 @@ struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
   tree data_arg;
 };
 
-/* GIMPLE_OACC_KERNELS */
-struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
-  gimple_statement_oacc_kernels : public gimple_statement_omp_parallel_layout
-{
-    /* No extra fields; adds invariant:
-         stmt->code == GIMPLE_OACC_KERNELS.  */
-};
-
-/* GIMPLE_OACC_PARALLEL */
-struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
-  gimple_statement_oacc_parallel : public gimple_statement_omp_parallel_layout
-{
-    /* No extra fields; adds invariant:
-         stmt->code == GIMPLE_OACC_PARALLEL.  */
-};
-
 /* GIMPLE_OMP_PARALLEL or GIMPLE_TASK */
 struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
   gimple_statement_omp_taskreg : public gimple_statement_omp_parallel_layout
@@ -617,6 +601,22 @@ struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
          stmt->code == GIMPLE_OMP_TARGET.  */
 };
 
+/* GIMPLE_OACC_KERNELS */
+struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
+  gimple_statement_oacc_kernels : public gimple_statement_omp_taskreg
+{
+    /* No extra fields; adds invariant:
+         stmt->code == GIMPLE_OACC_KERNELS.  */
+};
+
+/* GIMPLE_OACC_PARALLEL */
+struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
+  gimple_statement_oacc_parallel : public gimple_statement_omp_taskreg
+{
+    /* No extra fields; adds invariant:
+         stmt->code == GIMPLE_OACC_PARALLEL.  */
+};
+
 /* GIMPLE_OMP_TASK */
 
 struct GTY((tag("GSS_OMP_TASK")))
@@ -927,7 +927,8 @@ template <>
 inline bool
 is_a_helper <gimple_statement_omp_taskreg *>::test (gimple gs)
 {
-  return gs->code == GIMPLE_OMP_PARALLEL || gs->code == GIMPLE_OMP_TASK;
+  return gs->code == GIMPLE_OMP_PARALLEL || gs->code == GIMPLE_OMP_TASK
+    || gs->code == GIMPLE_OACC_PARALLEL || gs->code == GIMPLE_OACC_KERNELS;
 }
 
 template <>
@@ -1135,7 +1136,8 @@ template <>
 inline bool
 is_a_helper <const gimple_statement_omp_taskreg *>::test (const_gimple gs)
 {
-  return gs->code == GIMPLE_OMP_PARALLEL || gs->code == GIMPLE_OMP_TASK;
+  return gs->code == GIMPLE_OMP_PARALLEL || gs->code == GIMPLE_OMP_TASK
+    || gs->code == GIMPLE_OACC_PARALLEL || gs->code == GIMPLE_OACC_KERNELS;
 }
 
 template <>
diff --git a/gcc/testsuite/c-c++-common/goacc/nested-function-1.c b/gcc/testsuite/c-c++-common/goacc/nested-function-1.c
new file mode 100644
index 0000000..51a0e9f
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/nested-function-1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+
+extern void abort (void);
+
+int
+main (void)
+{
+  int j = 0, k = 6, l = 7, m = 8;
+  void simple (void)
+  {
+    int i;
+#pragma acc parallel
+    {
+#pragma acc loop
+      for (i = 0; i < m; i+= k)
+	j = (m + i - j) * l;
+    }
+  }
+  void collapse (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  void reduction (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3) reduction (+:j)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  simple();
+  collapse();
+  reduction();
+  return 0;
+}
diff --git a/gcc/testsuite/gfortran.dg/goacc/cray-2.f95 b/gcc/testsuite/gfortran.dg/goacc/cray-2.f95
new file mode 100644
index 0000000..70f7cf6
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/cray-2.f95
@@ -0,0 +1,55 @@
+! { dg-do compile }
+! { dg-additional-options "-fcray-pointer" }
+
+program test
+  call oacc1
+contains
+  subroutine oacc1
+    implicit none
+    integer :: i
+    real :: pointee
+    pointer (ptr, pointee)
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
+    !$acc data copy (pointee) ! { dg-error "Cray pointee" }
+    !$acc end data
+    !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
+    !$acc end data
+    !$acc parallel private (pointee) ! { dg-error "Cray pointee" }
+    !$acc end parallel
+    !$acc host_data use_device (pointee) ! { dg-error "Cray pointee" }
+    !$acc end host_data
+    !$acc parallel loop reduction(+:pointee) ! { dg-error "Cray pointee" }
+    do i = 1,5
+    enddo
+    !$acc end parallel loop
+    !$acc parallel loop
+    do i = 1,5
+      ! Subarrays are not implemented yet
+      !$acc cache (pointee) ! TODO: This must fail, as in openacc-1_0-branch
+    enddo
+    !$acc end parallel loop
+    !$acc update host (pointee) ! { dg-error "Cray pointee" }
+    !$acc update device (pointee) ! { dg-error "Cray pointee" }
+    !$acc data copy (ptr)
+    !$acc end data
+    !$acc data deviceptr (ptr) ! { dg-error "Cray pointer" }
+    !$acc end data
+    !$acc parallel private (ptr)
+    !$acc end parallel
+    !$acc host_data use_device (ptr) ! { dg-error "Cray pointer" }
+    !$acc end host_data
+    !$acc parallel loop reduction(+:ptr) ! { dg-error "Cray pointer" }
+    do i = 1,5
+    enddo
+    !$acc end parallel loop
+    !$acc parallel loop
+    do i = 1,5
+      !$acc cache (ptr) ! TODO: This must fail, as in openacc-1_0-branch
+    enddo
+    !$acc end parallel loop
+    !$acc update host (ptr)
+    !$acc update device (ptr)
+  end subroutine oacc1
+end program test
+! { dg-prune-output "unimplemented" }
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-4.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-4.f95
new file mode 100644
index 0000000..f876106
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-4.f95
@@ -0,0 +1,170 @@
+! { dg-do compile }
+! { dg-additional-options "-fmax-errors=100" }
+program test
+  call test1
+contains
+subroutine test1
+  integer :: i, j, k, b(10)
+  integer, dimension (30) :: a
+  double precision :: d
+  real :: r
+  i = 0
+  !$acc loop
+  do 100 ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+    if (i .gt. 0) exit ! { dg-error "EXIT statement" }
+  100 i = i + 1
+  i = 0
+  !$acc loop
+  do ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+      if (i .gt. 0) exit ! { dg-error "EXIT statement" }
+       i = i + 1
+  end do
+  i = 0
+  !$acc loop
+  do 200 while (i .lt. 4) ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+  200 i = i + 1
+  !$acc loop
+  do while (i .lt. 8) ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+       i = i + 1
+  end do
+  !$acc loop
+  do 300 d = 1, 30, 6 ! { dg-error "integer" }
+      i = d
+  300 a(i) = 1
+  !$acc loop
+  do d = 1, 30, 5 ! { dg-error "integer" }
+       i = d
+      a(i) = 2
+  end do
+  !$acc loop
+  do i = 1, 30
+      if (i .eq. 16) exit ! { dg-error "EXIT statement" }
+  end do
+  !$acc loop
+  outer: do i = 1, 30
+      do j = 5, 10
+          if (i .eq. 6 .and. j .eq. 7) exit outer ! { dg-error "EXIT statement" }
+      end do
+  end do outer
+  last: do i = 1, 30
+   end do last
+
+  ! different types of loop are allowed
+  !$acc loop
+  do i = 1,10
+  end do
+  !$acc loop
+  do 400, i = 1,10
+400   a(i) = i
+
+  ! after loop directive must be loop
+  !$acc loop
+  a(1) = 1 ! { dg-error "Expected DO loop" }
+  do i = 1,10
+  enddo
+
+  ! combined directives may be used with/without end
+  !$acc parallel loop
+  do i = 1,10
+  enddo
+  !$acc parallel loop
+  do i = 1,10
+  enddo
+  !$acc end parallel loop
+  !$acc kernels loop
+  do i = 1,10
+  enddo
+  !$acc kernels loop
+  do i = 1,10
+  enddo
+  !$acc end kernels loop
+
+  !$acc kernels loop reduction(max:i)
+  do i = 1,10
+  enddo
+  !$acc kernels
+  !$acc loop reduction(max:i)
+  do i = 1,10
+  enddo
+  !$acc end kernels
+
+  !$acc parallel loop collapse(0) ! { dg-error "constant positive integer" }
+  do i = 1,10
+  enddo
+
+  !$acc parallel loop collapse(-1) ! { dg-error "constant positive integer" }
+  do i = 1,10
+  enddo
+
+  !$acc parallel loop collapse(i) ! { dg-error "Constant expression required" }
+  do i = 1,10
+  enddo
+
+  !$acc parallel loop collapse(4) ! { dg-error "not enough DO loops for collapsed" }
+    do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+              a(i+j-k) = i + j + k
+          end do
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 5, 2
+        do j = i + 1, 7, i  ! { dg-error "collapsed loops don.t form rectangular iteration space" }
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+        k = 4
+    end do
+    !$acc parallel loop collapse(3-1)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+        k = 4
+    end do
+    !$acc parallel loop collapse(1+1)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+        k = 4
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do      ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do r = 4, 6    ! { dg-error "integer" }
+        end do
+    end do
+
+    ! Both seq and independent are not allowed
+  !$acc loop independent seq ! { dg-error "SEQ conflicts with INDEPENDENT" }
+  do i = 1,10
+  enddo
+
+
+  !$acc cache (a) ! { dg-error "inside of loop" }
+
+  do i = 1,10
+    !$acc cache(a)
+  enddo
+
+  do i = 1,10
+    a(i) = i
+    !$acc cache(a)
+  enddo
+
+end subroutine test1
+end program test
+! { dg-prune-output "Deleted" }
+! { dg-prune-output "ACC cache unimplemented" }
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-5.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-5.f95
new file mode 100644
index 0000000..448d2f5
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-5.f95
@@ -0,0 +1,58 @@
+! { dg-do compile }
+! { dg-additional-options "-std=f2008" }
+
+program test
+  call test1
+contains
+subroutine test1
+  implicit none
+  integer :: i, j
+
+  ! !$acc end loop not required by spec
+  !$acc loop
+  do i = 1,5
+  enddo
+  !$acc end loop ! { dg-warning "Redundant" }
+
+  !$acc loop
+  do i = 1,5
+  enddo
+  j = 1
+  !$acc end loop ! { dg-error "Unexpected" }
+
+  !$acc parallel
+  !$acc loop
+  do i = 1,5
+  enddo
+  !$acc end parallel
+  !$acc end loop ! { dg-error "Unexpected" }
+
+  ! OpenACC supports Fortran 2008 do concurrent statement
+  !$acc loop
+  do concurrent (i = 1:5)
+  end do
+
+  !$acc loop
+  outer_loop: do i = 1, 5
+    inner_loop: do j = 1,5
+      if (i .eq. j) cycle outer_loop
+      if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
+    end do inner_loop
+  end do outer_loop
+
+  outer_loop1: do i = 1, 5
+    !$acc loop
+    inner_loop1: do j = 1,5
+      if (i .eq. j) cycle outer_loop1 ! { dg-error "CYCLE statement" }
+    end do inner_loop1
+  end do outer_loop1
+
+  !$acc loop collapse(2)
+  outer_loop2: do i = 1, 5
+    inner_loop2: do j = 1,5
+      if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
+      if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
+    end do inner_loop2
+  end do outer_loop2
+end subroutine test1
+end program test
diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c
index 06f3589..3d0af52 100644
--- a/gcc/tree-nested.c
+++ b/gcc/tree-nested.c
@@ -622,8 +622,6 @@ walk_gimple_omp_for (gimple for_stmt,
     		     walk_stmt_fn callback_stmt, walk_tree_fn callback_op,
     		     struct nesting_info *info)
 {
-  gcc_assert (!is_gimple_omp_oacc_specifically (for_stmt));
-
   struct walk_stmt_info wi;
   gimple_seq seq;
   tree t;
@@ -1322,7 +1320,22 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 
     case GIMPLE_OACC_KERNELS:
     case GIMPLE_OACC_PARALLEL:
-      gcc_unreachable ();
+      save_suppress = info->suppress_expansion;
+      convert_nonlocal_omp_clauses (gimple_omp_taskreg_clauses_ptr (stmt),
+				    wi);
+      save_local_var_chain = info->new_local_var_chain;
+      info->new_local_var_chain = NULL;
+
+      walk_body (convert_nonlocal_reference_stmt, convert_nonlocal_reference_op,
+	         info, gimple_omp_body_ptr (stmt));
+
+      if (info->new_local_var_chain)
+	declare_vars (info->new_local_var_chain,
+	              gimple_seq_first_stmt (gimple_omp_body (stmt)),
+		      false);
+      info->new_local_var_chain = save_local_var_chain;
+      info->suppress_expansion = save_suppress;
+      break;
 
     case GIMPLE_OMP_PARALLEL:
     case GIMPLE_OMP_TASK:
@@ -1354,7 +1367,6 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
       break;
 
     case GIMPLE_OMP_FOR:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
       save_suppress = info->suppress_expansion;
       convert_nonlocal_omp_clauses (gimple_omp_for_clauses_ptr (stmt), wi);
       walk_gimple_omp_for (stmt, convert_nonlocal_reference_stmt,
@@ -1895,8 +1907,6 @@ convert_local_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
     {
     case GIMPLE_OACC_KERNELS:
     case GIMPLE_OACC_PARALLEL:
-      gcc_unreachable ();
-
     case GIMPLE_OMP_PARALLEL:
     case GIMPLE_OMP_TASK:
       save_suppress = info->suppress_expansion;
@@ -1926,7 +1936,6 @@ convert_local_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
       break;
 
     case GIMPLE_OMP_FOR:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
       save_suppress = info->suppress_expansion;
       convert_local_omp_clauses (gimple_omp_for_clauses_ptr (stmt), wi);
       walk_gimple_omp_for (stmt, convert_local_reference_stmt,
@@ -2286,18 +2295,15 @@ convert_tramp_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 	break;
       }
 
-    case GIMPLE_OACC_KERNELS:
-    case GIMPLE_OACC_PARALLEL:
-      gcc_unreachable ();
-
     case GIMPLE_OMP_TARGET:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
       if (gimple_omp_target_kind (stmt) != GF_OMP_TARGET_KIND_REGION)
 	{
 	  *handled_ops_p = false;
 	  return NULL_TREE;
 	}
       /* FALLTHRU */
+    case GIMPLE_OACC_KERNELS:
+    case GIMPLE_OACC_PARALLEL:
     case GIMPLE_OMP_PARALLEL:
     case GIMPLE_OMP_TASK:
       {
@@ -2359,8 +2365,6 @@ convert_gimple_call (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 
     case GIMPLE_OACC_KERNELS:
     case GIMPLE_OACC_PARALLEL:
-      gcc_unreachable ();
-
     case GIMPLE_OMP_PARALLEL:
     case GIMPLE_OMP_TASK:
       save_static_chain_added = info->static_chain_added;
@@ -2431,7 +2435,6 @@ convert_gimple_call (gimple_stmt_iterator *gsi, bool *handled_ops_p,
       break;
 
     case GIMPLE_OMP_FOR:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
       walk_body (convert_gimple_call, NULL, info,
 	  	 gimple_omp_for_pre_body_ptr (stmt));
       /* FALLTHRU */
@@ -2443,7 +2446,6 @@ convert_gimple_call (gimple_stmt_iterator *gsi, bool *handled_ops_p,
     case GIMPLE_OMP_TASKGROUP:
     case GIMPLE_OMP_ORDERED:
     case GIMPLE_OMP_CRITICAL:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
       walk_body (convert_gimple_call, NULL, info, gimple_omp_body_ptr (stmt));
       break;
 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch,gomp-4_0-branch] acc nested function support
  2014-07-18 21:41 [patch,gomp-4_0-branch] acc nested function support Cesar Philippidis
@ 2014-07-29  8:30 ` Thomas Schwinge
  2014-11-05  0:45 ` Cesar Philippidis
  1 sibling, 0 replies; 13+ messages in thread
From: Thomas Schwinge @ 2014-07-29  8:30 UTC (permalink / raw)
  To: Cesar Philippidis; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 4548 bytes --]

Hi Cesar!

On Fri, 18 Jul 2014 14:07:00 -0700, Cesar Philippidis <cesar@codesourcery.com> wrote:
> This patch enables acc constructs to be used inside nested functions.

Thanks!

> I
> doubt nested functions will be used much in c, but some of the openacc
> fortran tutorials I've seen online make use of internal subroutines in
> fortran. Those internal subroutines are implemented as nested functions.

> Does this look OK to commit to gomp-4_0-branch?

I think we first need to resolve the following issue:

> --- a/gcc/gimple.h
> +++ b/gcc/gimple.h
> @@ -576,22 +576,6 @@ struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
>    tree data_arg;
>  };
>  
> -/* GIMPLE_OACC_KERNELS */
> -struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
> -  gimple_statement_oacc_kernels : public gimple_statement_omp_parallel_layout
> -{
> -    /* No extra fields; adds invariant:
> -         stmt->code == GIMPLE_OACC_KERNELS.  */
> -};
> -
> -/* GIMPLE_OACC_PARALLEL */
> -struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
> -  gimple_statement_oacc_parallel : public gimple_statement_omp_parallel_layout
> -{
> -    /* No extra fields; adds invariant:
> -         stmt->code == GIMPLE_OACC_PARALLEL.  */
> -};
> -
>  /* GIMPLE_OMP_PARALLEL or GIMPLE_TASK */
>  struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
>    gimple_statement_omp_taskreg : public gimple_statement_omp_parallel_layout
> @@ -617,6 +601,22 @@ struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
>           stmt->code == GIMPLE_OMP_TARGET.  */
>  };
>  
> +/* GIMPLE_OACC_KERNELS */
> +struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
> +  gimple_statement_oacc_kernels : public gimple_statement_omp_taskreg
> +{
> +    /* No extra fields; adds invariant:
> +         stmt->code == GIMPLE_OACC_KERNELS.  */
> +};
> +
> +/* GIMPLE_OACC_PARALLEL */
> +struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
> +  gimple_statement_oacc_parallel : public gimple_statement_omp_taskreg
> +{
> +    /* No extra fields; adds invariant:
> +         stmt->code == GIMPLE_OACC_PARALLEL.  */
> +};
> +
>  /* GIMPLE_OMP_TASK */
>  
>  struct GTY((tag("GSS_OMP_TASK")))
> @@ -927,7 +927,8 @@ template <>
>  inline bool
>  is_a_helper <gimple_statement_omp_taskreg *>::test (gimple gs)
>  {
> -  return gs->code == GIMPLE_OMP_PARALLEL || gs->code == GIMPLE_OMP_TASK;
> +  return gs->code == GIMPLE_OMP_PARALLEL || gs->code == GIMPLE_OMP_TASK
> +    || gs->code == GIMPLE_OACC_PARALLEL || gs->code == GIMPLE_OACC_KERNELS;
>  }
>  
>  template <>
> @@ -1135,7 +1136,8 @@ template <>
>  inline bool
>  is_a_helper <const gimple_statement_omp_taskreg *>::test (const_gimple gs)
>  {
> -  return gs->code == GIMPLE_OMP_PARALLEL || gs->code == GIMPLE_OMP_TASK;
> +  return gs->code == GIMPLE_OMP_PARALLEL || gs->code == GIMPLE_OMP_TASK
> +    || gs->code == GIMPLE_OACC_PARALLEL || gs->code == GIMPLE_OACC_KERNELS;
>  }

Doing it this way has been "disapproved" before, and I raised the issue
in
<http://news.gmane.org/find-root.php?message_id=%3C87egxcjkmc.fsf%40kepler.schwinge.homeip.net%3E>.
If that makes sense to you, please continue this discussion until I'm
back next week.

> --- a/gcc/tree-nested.c
> +++ b/gcc/tree-nested.c
> @@ -1322,7 +1320,22 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
>  
>      case GIMPLE_OACC_KERNELS:
>      case GIMPLE_OACC_PARALLEL:
> -      gcc_unreachable ();
> +      save_suppress = info->suppress_expansion;
> +      convert_nonlocal_omp_clauses (gimple_omp_taskreg_clauses_ptr (stmt),
> +				    wi);
> +      save_local_var_chain = info->new_local_var_chain;
> +      info->new_local_var_chain = NULL;
> +
> +      walk_body (convert_nonlocal_reference_stmt, convert_nonlocal_reference_op,
> +	         info, gimple_omp_body_ptr (stmt));
> +
> +      if (info->new_local_var_chain)
> +	declare_vars (info->new_local_var_chain,
> +	              gimple_seq_first_stmt (gimple_omp_body (stmt)),
> +		      false);
> +      info->new_local_var_chain = save_local_var_chain;
> +      info->suppress_expansion = save_suppress;
> +      break;
>  
>      case GIMPLE_OMP_PARALLEL:
>      case GIMPLE_OMP_TASK:

Wouldn't we rather group the GIMPLE_OACC_* with GIMPLE_OMP_TARGET
(handling all these together)?  (Either way, please avoid the code
duplication for GIMPLE_OACC_KERNELS/GIMPLE_OACC_PARALLEL even if grouping
with GIMPLE_OMP_PARALLEL/GIMPLE_OMP_TASK; I think the latter's existing
code can basically be re-used as is?


Grüße,
 Thomas

[-- Attachment #2: Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch,gomp-4_0-branch] acc nested function support
  2014-07-18 21:41 [patch,gomp-4_0-branch] acc nested function support Cesar Philippidis
  2014-07-29  8:30 ` Thomas Schwinge
@ 2014-11-05  0:45 ` Cesar Philippidis
  2014-11-05 15:24   ` David Malcolm
  1 sibling, 1 reply; 13+ messages in thread
From: Cesar Philippidis @ 2014-11-05  0:45 UTC (permalink / raw)
  To: Thomas Schwinge, gcc-patches, dmalcolm

[-- Attachment #1: Type: text/plain, Size: 1338 bytes --]

Here's an updated version of my nested function patch.

David, I tweaked the gimple class hierarchy a little bit. Here's what
the updated class diagram looks like:

     + gimple_statement_omp
     |   |    layout: GSS_OMP.  Used for code GIMPLE_OMP_SECTION
     |   |
     |   + gimple_statement_omp_parallel_layout
     |   |   |    layout: GSS_OMP_PARALLEL_LAYOUT
     |   |   |
     |   |   + gimple_statement_omp_targetreg
     |   |       |
     |   |       + gimple_statement_oacc_kernels
     |   |       |        code: GIMPLE_OACC_KERNELS
     |   |       |
     |   |       + gimple_statement_oacc_parallel
     |   |       |        code: GIMPLE_OACC_PARALLEL
     |   |       |
     |   |       + gimple_statement_omp_target
     |   |                code: GIMPLE_OMP_TARGET

Basically, I've introduced gimple_statement_omp_targetreg and made
GIMPLE_OACC_{PARALLEL,KERNELS} and GIMPLE_OMP_TARGET inherit it. This
seems to work out pretty good. It cleans up both
{lower,expand}_oacc_offload in omp-low.c and allows OpenACC kernel and
parallel regions to be treated as OpenMP target regions in
tree-nested.c. Are these changes to gimple.h OK?

Thomas, assuming these gimple changes are OK, should I commit this
change to gomp-4_0-branch, or do you want to include this patch with
your middle end trunk submission?

Thanks,
Cesar

[-- Attachment #2: nested-fns-20141104.diff --]
[-- Type: text/x-patch, Size: 49297 bytes --]

2014-11-04  Cesar Philippidis  <cesar@codesourcery.com>

	gcc/
	* doc/gimple.texi (gimple class hierarchy): Add
	gimple_statement_omp_targetreg, gimple_statement_oacc_kernels and
	gimple_statement_oacc_parallel. Make gimple_statement_omp_target
	inherit gimple_statement_omp_targetreg.
	* gcc/gimple.h (gimple_statement_omp_targetreg): Declare.
	(gimple_statement_oacc_kernels): Derive from
	gimple_statement_omp_targetreg.
	(gimple_statement_oacc_parallel): Likewise.
	(gimple_statement_oacc_target): Likewise.
	(is_a_helper <gimple_statement_omp_targetreg *>): Define.
	(is_a_helper <const gimple_statement_omp_targetreg *>): Define.
	(gimple_omp_subcode): Use GIMPLE_OACC_KERNELS as the starting point
	for OpenACC/OpenMP subcodes.
	(gimple_omp_targetreg_clauses): Declare.
	(gimple_omp_targetreg_clauses_ptr): Declare.
	(gimple_omp_targetreg_set_clauses): Declare.
	(gimple_omp_targetreg_child_fn): Declare.
	(gimple_omp_targetreg_child_fn_ptr): Declare.
	(gimple_omp_targetreg_set_child_fn): Declare.
	(gimple_omp_targetreg_data_arg): Declare.
	(gimple_omp_targetreg_data_arg_ptr): Declare.
	(gimple_omp_targetreg_set_data_arg): Declare.
	(gimple_omp_targetreg_kind): Declare.
	(gimple_omp_targetreg_set_kind): Declare.
	* gcc/omp-low.c (expand_oacc_offload): Use
	gimple_omp_targetreg_child_fn and gimple_omp_target_reg_data_arg
	instead of the specific functions for OpenACC kernels and parallel
	regions.
	(lower_oacc_offload): Use gimple_omp_targetreg_clauses and
	gimple_omp_targetreg_set_data_arg for similar reasons.
	* tree-nested.c (walk_gimple_omp_for): Remove OpenACC assert.
	(convert_nonlocal_reference_stmt): Handle GIMPLE_OACC_KERNELS
	and GIMPLE_OACC_PARALLEL.
	(convert_local_reference_stmt): Remove OpenACC asserts.
	(convert_tramp_reference_stmt): Handle GIMPLE_OACC_KERNELS and
	GIMPLE_OACC_PARALLEL.
	(convert_gimple_call): Remove OpenACC asserts.

	gcc/testsuite/
	* gcc.dg/goacc/nested-function-1.c: New test.
	* gfortran.dg/goacc/cray-2.f95: New test.
	* gfortran.dg/goacc/loop-4.f95: New test.
	* gfortran.dg/goacc/loop-5.f95: New test.

	libgomp/
	* testsuite/libgomp.oacc-c/sub-collapse-1.c: New test.
	* testsuite/libgomp.oacc-c/sub-collapse-2.c: New test.
	* testsuite/libgomp.oacc-fortran/sub-collapse-1.f90: New test.
	* testsuite/libgomp.oacc-fortran/sub-collapse-2.f90: New test.
	* testsuite/libgomp.oacc-fortran/sub-collapse-3.f90: New test.

diff --git a/gcc/doc/gimple.texi b/gcc/doc/gimple.texi
index 4c59748..860cb2c 100644
--- a/gcc/doc/gimple.texi
+++ b/gcc/doc/gimple.texi
@@ -354,8 +354,16 @@ kinds, along with their relationships to @code{GSS_} values (layouts) and
      |   |   |   + gimple_statement_omp_task
      |   |   |            code: GIMPLE_OMP_TASK
      |   |   |
-     |   |   + gimple_statement_omp_target
-     |   |            code: GIMPLE_OMP_TARGET
+     |   |   + gimple_statement_omp_targetreg
+     |   |       |
+     |   |       + gimple_statement_oacc_kernels
+     |   |       |        code: GIMPLE_OACC_KERNELS
+     |   |       |
+     |   |       + gimple_statement_oacc_parallel
+     |   |       |        code: GIMPLE_OACC_PARALLEL
+     |   |       |
+     |   |       + gimple_statement_omp_target
+     |   |                code: GIMPLE_OMP_TARGET
      |   |
      |   + gimple_statement_omp_sections
      |   |        layout: GSS_OMP_SECTIONS, code: GIMPLE_OMP_SECTIONS
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 7bc673a..76abfb7 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -579,22 +579,6 @@ struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
   tree data_arg;
 };
 
-/* GIMPLE_OACC_KERNELS */
-struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
-  gimple_statement_oacc_kernels : public gimple_statement_omp_parallel_layout
-{
-    /* No extra fields; adds invariant:
-         stmt->code == GIMPLE_OACC_KERNELS.  */
-};
-
-/* GIMPLE_OACC_PARALLEL */
-struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
-  gimple_statement_oacc_parallel : public gimple_statement_omp_parallel_layout
-{
-    /* No extra fields; adds invariant:
-         stmt->code == GIMPLE_OACC_PARALLEL.  */
-};
-
 /* GIMPLE_OMP_PARALLEL or GIMPLE_TASK */
 struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
   gimple_statement_omp_taskreg : public gimple_statement_omp_parallel_layout
@@ -612,12 +596,14 @@ struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
          stmt->code == GIMPLE_OMP_PARALLEL.  */
 };
 
-/* GIMPLE_OMP_TARGET */
+/* GIMPLE_OMP_TARGET or GIMPLE_OACC_PARALLEL or GIMPLE_ACC_KERNELS */
 struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
-  gimple_statement_omp_target : public gimple_statement_omp_parallel_layout
+  gimple_statement_omp_targetreg : public gimple_statement_omp_parallel_layout
 {
     /* No extra fields; adds invariant:
-         stmt->code == GIMPLE_OMP_TARGET.  */
+         stmt->code == GIMPLE_OMP_TARGET
+	 || stmt->code == GIMPLE_OACC_PARALLEL
+	 || stmt->code == GIMPLE_OACC_KERNELS.  */
 };
 
 /* GIMPLE_OMP_TASK */
@@ -637,6 +623,29 @@ struct GTY((tag("GSS_OMP_TASK")))
   tree arg_align;
 };
 
+/* GIMPLE_OACC_KERNELS */
+struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
+  gimple_statement_oacc_kernels : public gimple_statement_omp_targetreg
+{
+    /* No extra fields; adds invariant:
+         stmt->code == GIMPLE_OACC_KERNELS.  */
+};
+
+/* GIMPLE_OACC_PARALLEL */
+struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
+  gimple_statement_oacc_parallel : public gimple_statement_omp_targetreg
+{
+    /* No extra fields; adds invariant:
+         stmt->code == GIMPLE_OACC_PARALLEL.  */
+};
+
+/* GIMPLE_OMP_TARGET */
+struct GTY((tag("GSS_OMP_PARALLEL_LAYOUT")))
+  gimple_statement_omp_target : public gimple_statement_omp_targetreg
+{
+    /* No extra fields; adds invariant:
+         stmt->code == GIMPLE_OMP_TARGET.  */
+};
 
 /* GIMPLE_OMP_SECTION */
 /* Uses struct gimple_statement_omp.  */
@@ -944,6 +953,15 @@ is_a_helper <gimple_statement_omp_parallel *>::test (gimple gs)
 template <>
 template <>
 inline bool
+is_a_helper <gimple_statement_omp_targetreg *>::test (gimple gs)
+{
+  return gs->code == GIMPLE_OMP_TARGET || gs->code == GIMPLE_OACC_PARALLEL
+    || gs->code == GIMPLE_OACC_KERNELS;
+}
+
+template <>
+template <>
+inline bool
 is_a_helper <gimple_statement_omp_target *>::test (gimple gs)
 {
   return gs->code == GIMPLE_OMP_TARGET;
@@ -1152,6 +1170,15 @@ is_a_helper <const gimple_statement_omp_parallel *>::test (const_gimple gs)
 template <>
 template <>
 inline bool
+is_a_helper <const gimple_statement_omp_targetreg *>::test (const_gimple gs)
+{
+  return gs->code == GIMPLE_OMP_TARGET || gs->code == GIMPLE_OACC_PARALLEL
+    || gs->code == GIMPLE_OACC_KERNELS;
+}
+
+template <>
+template <>
+inline bool
 is_a_helper <const gimple_statement_omp_target *>::test (const_gimple gs)
 {
   return gs->code == GIMPLE_OMP_TARGET;
@@ -1933,7 +1960,7 @@ gimple_references_memory_p (gimple stmt)
 static inline unsigned
 gimple_omp_subcode (const_gimple s)
 {
-  gcc_gimple_checking_assert (gimple_code (s) >= GIMPLE_OMP_ATOMIC_LOAD
+  gcc_gimple_checking_assert (gimple_code (s) >= GIMPLE_OACC_KERNELS
 	      && gimple_code (s) <= GIMPLE_OMP_TEAMS);
   return s->subcode;
 }
@@ -5318,6 +5345,127 @@ gimple_omp_single_set_clauses (gimple gs, tree clauses)
 /* Return the clauses associated with OMP_TARGET GS.  */
 
 static inline tree
+gimple_omp_targetreg_clauses (const_gimple gs)
+{
+  const gimple_statement_omp_targetreg *omp_targetreg_stmt =
+    as_a <const gimple_statement_omp_targetreg *> (gs);
+  return omp_targetreg_stmt->clauses;
+}
+
+
+/* Return a pointer to the clauses associated with OMP_TARGET GS.  */
+
+static inline tree *
+gimple_omp_targetreg_clauses_ptr (gimple gs)
+{
+  gimple_statement_omp_targetreg *omp_targetreg_stmt =
+    as_a <gimple_statement_omp_targetreg *> (gs);
+  return &omp_targetreg_stmt->clauses;
+}
+
+
+/* Set CLAUSES to be the clauses associated with OMP_TARGET GS.  */
+
+static inline void
+gimple_omp_targetreg_set_clauses (gimple gs, tree clauses)
+{
+  gimple_statement_omp_targetreg *omp_targetreg_stmt =
+    as_a <gimple_statement_omp_targetreg *> (gs);
+  omp_targetreg_stmt->clauses = clauses;
+}
+
+
+/* Return the child function used to hold the body of OMP_TARGET GS.  */
+
+static inline tree
+gimple_omp_targetreg_child_fn (const_gimple gs)
+{
+  const gimple_statement_omp_targetreg *omp_targetreg_stmt =
+    as_a <const gimple_statement_omp_targetreg *> (gs);
+  return omp_targetreg_stmt->child_fn;
+}
+
+/* Return a pointer to the child function used to hold the body of
+   OMP_TARGET GS.  */
+
+static inline tree *
+gimple_omp_targetreg_child_fn_ptr (gimple gs)
+{
+  gimple_statement_omp_targetreg *omp_targetreg_stmt =
+    as_a <gimple_statement_omp_targetreg *> (gs);
+  return &omp_targetreg_stmt->child_fn;
+}
+
+
+/* Set CHILD_FN to be the child function for OMP_TARGET GS.  */
+
+static inline void
+gimple_omp_targetreg_set_child_fn (gimple gs, tree child_fn)
+{
+  gimple_statement_omp_targetreg *omp_targetreg_stmt =
+    as_a <gimple_statement_omp_targetreg *> (gs);
+  omp_targetreg_stmt->child_fn = child_fn;
+}
+
+
+/* Return the artificial argument used to send variables and values
+   from the parent to the children threads in OMP_TARGET GS.  */
+
+static inline tree
+gimple_omp_targetreg_data_arg (const_gimple gs)
+{
+  const gimple_statement_omp_targetreg *omp_targetreg_stmt =
+    as_a <const gimple_statement_omp_targetreg *> (gs);
+  return omp_targetreg_stmt->data_arg;
+}
+
+
+/* Return a pointer to the data argument for OMP_TARGET GS.  */
+
+static inline tree *
+gimple_omp_targetreg_data_arg_ptr (gimple gs)
+{
+  gimple_statement_omp_targetreg *omp_targetreg_stmt =
+    as_a <gimple_statement_omp_targetreg *> (gs);
+  return &omp_targetreg_stmt->data_arg;
+}
+
+
+/* Set DATA_ARG to be the data argument for OMP_TARGET GS.  */
+
+static inline void
+gimple_omp_targetreg_set_data_arg (gimple gs, tree data_arg)
+{
+  gimple_statement_omp_targetreg *omp_targetreg_stmt =
+    as_a <gimple_statement_omp_targetreg *> (gs);
+  omp_targetreg_stmt->data_arg = data_arg;
+}
+
+
+/* Return the kind of OMP targetreg statemement.  */
+
+static inline int
+gimple_omp_targetreg_kind (const_gimple g)
+{
+  //GIMPLE_CHECK (g, GIMPLE_OMP_TARGET);
+  return (gimple_omp_subcode (g) & GF_OMP_TARGET_KIND_MASK);
+}
+
+
+/* Set the OMP targetreg kind.  */
+
+static inline void
+gimple_omp_targetreg_set_kind (gimple g, int kind)
+{
+  //GIMPLE_CHECK (g, GIMPLE_OMP_TARGET);
+  g->subcode = (g->subcode & ~GF_OMP_TARGET_KIND_MASK)
+		      | (kind & GF_OMP_TARGET_KIND_MASK);
+}
+
+
+/* Return the clauses associated with OMP_TARGET GS.  */
+
+static inline tree
 gimple_omp_target_clauses (const_gimple gs)
 {
   const gimple_statement_omp_target *omp_target_stmt =
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index d735e86..5e304fe 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -5407,24 +5407,9 @@ expand_oacc_offload (struct omp_region *region)
   gimple_stmt_iterator gsi;
   gimple entry_stmt, stmt;
   edge e;
-  tree (*gimple_omp_child_fn) (const_gimple);
-  tree (*gimple_omp_data_arg) (const_gimple);
-  switch (region->type)
-    {
-    case GIMPLE_OACC_KERNELS:
-      gimple_omp_child_fn = gimple_oacc_kernels_child_fn;
-      gimple_omp_data_arg = gimple_oacc_kernels_data_arg;
-      break;
-    case GIMPLE_OACC_PARALLEL:
-      gimple_omp_child_fn = gimple_oacc_parallel_child_fn;
-      gimple_omp_data_arg = gimple_oacc_parallel_data_arg;
-      break;
-    default:
-      gcc_unreachable ();
-    }
 
   entry_stmt = last_stmt (region->entry);
-  child_fn = gimple_omp_child_fn (entry_stmt);
+  child_fn = gimple_omp_targetreg_child_fn (entry_stmt);
   child_cfun = DECL_STRUCT_FUNCTION (child_fn);
 
   /* Supported by expand_omp_taskreg, but not here.  */
@@ -5452,13 +5437,13 @@ expand_oacc_offload (struct omp_region *region)
 	 a function call that has been inlined, the original PARM_DECL
 	 .OMP_DATA_I may have been converted into a different local
 	 variable.  In which case, we need to keep the assignment.  */
-      if (gimple_omp_data_arg (entry_stmt))
+      if (gimple_omp_targetreg_data_arg (entry_stmt))
 	{
 	  basic_block entry_succ_bb = single_succ (entry_bb);
 	  gimple_stmt_iterator gsi;
 	  tree arg;
 	  gimple parcopy_stmt = NULL;
-	  tree sender = TREE_VEC_ELT (gimple_omp_data_arg (entry_stmt), 0);
+	  tree sender = TREE_VEC_ELT (gimple_omp_targetreg_data_arg (entry_stmt), 0);
 
 	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
 	    {
@@ -5725,7 +5710,7 @@ expand_oacc_offload (struct omp_region *region)
     }
 
   gsi = gsi_last_bb (new_bb);
-  t = gimple_omp_data_arg (entry_stmt);
+  t = gimple_omp_targetreg_data_arg (entry_stmt);
   if (t == NULL)
     {
       t1 = size_zero_node;
@@ -10319,23 +10304,8 @@ lower_oacc_offload (gimple_stmt_iterator *gsi_p, omp_context *ctx)
   gimple_seq par_body, olist, ilist, orlist, irlist, new_body;
   location_t loc = gimple_location (stmt);
   unsigned int map_cnt = 0;
-  tree (*gimple_omp_clauses) (const_gimple);
-  void (*gimple_omp_set_data_arg) (gimple, tree);
-  switch (gimple_code (stmt))
-    {
-    case GIMPLE_OACC_KERNELS:
-      gimple_omp_clauses = gimple_oacc_kernels_clauses;
-      gimple_omp_set_data_arg = gimple_oacc_kernels_set_data_arg;
-      break;
-    case GIMPLE_OACC_PARALLEL:
-      gimple_omp_clauses = gimple_oacc_parallel_clauses;
-      gimple_omp_set_data_arg = gimple_oacc_parallel_set_data_arg;
-      break;
-    default:
-      gcc_unreachable ();
-    }
 
-  clauses = gimple_omp_clauses (stmt);
+  clauses = gimple_omp_targetreg_clauses (stmt);
   par_bind = gimple_seq_first_stmt (gimple_omp_body (stmt));
   par_body = gimple_bind_body (par_bind);
   child_fn = ctx->cb.dst_fn;
@@ -10428,7 +10398,7 @@ lower_oacc_offload (gimple_stmt_iterator *gsi_p, omp_context *ctx)
       DECL_NAMELESS (TREE_VEC_ELT (t, 2)) = 1;
       TREE_ADDRESSABLE (TREE_VEC_ELT (t, 2)) = 1;
       TREE_STATIC (TREE_VEC_ELT (t, 2)) = 1;
-      gimple_omp_set_data_arg (stmt, t);
+      gimple_omp_targetreg_set_data_arg (stmt, t);
 
       vec<constructor_elt, va_gc> *vsize;
       vec<constructor_elt, va_gc> *vkind;
diff --git a/gcc/testsuite/gcc.dg/goacc/nested-function-1.c b/gcc/testsuite/gcc.dg/goacc/nested-function-1.c
new file mode 100644
index 0000000..51a0e9f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/goacc/nested-function-1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+
+extern void abort (void);
+
+int
+main (void)
+{
+  int j = 0, k = 6, l = 7, m = 8;
+  void simple (void)
+  {
+    int i;
+#pragma acc parallel
+    {
+#pragma acc loop
+      for (i = 0; i < m; i+= k)
+	j = (m + i - j) * l;
+    }
+  }
+  void collapse (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  void reduction (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3) reduction (+:j)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  simple();
+  collapse();
+  reduction();
+  return 0;
+}
diff --git a/gcc/testsuite/gfortran.dg/goacc/cray-2.f95 b/gcc/testsuite/gfortran.dg/goacc/cray-2.f95
new file mode 100644
index 0000000..70f7cf6
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/cray-2.f95
@@ -0,0 +1,55 @@
+! { dg-do compile }
+! { dg-additional-options "-fcray-pointer" }
+
+program test
+  call oacc1
+contains
+  subroutine oacc1
+    implicit none
+    integer :: i
+    real :: pointee
+    pointer (ptr, pointee)
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
+    !$acc data copy (pointee) ! { dg-error "Cray pointee" }
+    !$acc end data
+    !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
+    !$acc end data
+    !$acc parallel private (pointee) ! { dg-error "Cray pointee" }
+    !$acc end parallel
+    !$acc host_data use_device (pointee) ! { dg-error "Cray pointee" }
+    !$acc end host_data
+    !$acc parallel loop reduction(+:pointee) ! { dg-error "Cray pointee" }
+    do i = 1,5
+    enddo
+    !$acc end parallel loop
+    !$acc parallel loop
+    do i = 1,5
+      ! Subarrays are not implemented yet
+      !$acc cache (pointee) ! TODO: This must fail, as in openacc-1_0-branch
+    enddo
+    !$acc end parallel loop
+    !$acc update host (pointee) ! { dg-error "Cray pointee" }
+    !$acc update device (pointee) ! { dg-error "Cray pointee" }
+    !$acc data copy (ptr)
+    !$acc end data
+    !$acc data deviceptr (ptr) ! { dg-error "Cray pointer" }
+    !$acc end data
+    !$acc parallel private (ptr)
+    !$acc end parallel
+    !$acc host_data use_device (ptr) ! { dg-error "Cray pointer" }
+    !$acc end host_data
+    !$acc parallel loop reduction(+:ptr) ! { dg-error "Cray pointer" }
+    do i = 1,5
+    enddo
+    !$acc end parallel loop
+    !$acc parallel loop
+    do i = 1,5
+      !$acc cache (ptr) ! TODO: This must fail, as in openacc-1_0-branch
+    enddo
+    !$acc end parallel loop
+    !$acc update host (ptr)
+    !$acc update device (ptr)
+  end subroutine oacc1
+end program test
+! { dg-prune-output "unimplemented" }
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-4.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-4.f95
new file mode 100644
index 0000000..f876106
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-4.f95
@@ -0,0 +1,170 @@
+! { dg-do compile }
+! { dg-additional-options "-fmax-errors=100" }
+program test
+  call test1
+contains
+subroutine test1
+  integer :: i, j, k, b(10)
+  integer, dimension (30) :: a
+  double precision :: d
+  real :: r
+  i = 0
+  !$acc loop
+  do 100 ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+    if (i .gt. 0) exit ! { dg-error "EXIT statement" }
+  100 i = i + 1
+  i = 0
+  !$acc loop
+  do ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+      if (i .gt. 0) exit ! { dg-error "EXIT statement" }
+       i = i + 1
+  end do
+  i = 0
+  !$acc loop
+  do 200 while (i .lt. 4) ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+  200 i = i + 1
+  !$acc loop
+  do while (i .lt. 8) ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+       i = i + 1
+  end do
+  !$acc loop
+  do 300 d = 1, 30, 6 ! { dg-error "integer" }
+      i = d
+  300 a(i) = 1
+  !$acc loop
+  do d = 1, 30, 5 ! { dg-error "integer" }
+       i = d
+      a(i) = 2
+  end do
+  !$acc loop
+  do i = 1, 30
+      if (i .eq. 16) exit ! { dg-error "EXIT statement" }
+  end do
+  !$acc loop
+  outer: do i = 1, 30
+      do j = 5, 10
+          if (i .eq. 6 .and. j .eq. 7) exit outer ! { dg-error "EXIT statement" }
+      end do
+  end do outer
+  last: do i = 1, 30
+   end do last
+
+  ! different types of loop are allowed
+  !$acc loop
+  do i = 1,10
+  end do
+  !$acc loop
+  do 400, i = 1,10
+400   a(i) = i
+
+  ! after loop directive must be loop
+  !$acc loop
+  a(1) = 1 ! { dg-error "Expected DO loop" }
+  do i = 1,10
+  enddo
+
+  ! combined directives may be used with/without end
+  !$acc parallel loop
+  do i = 1,10
+  enddo
+  !$acc parallel loop
+  do i = 1,10
+  enddo
+  !$acc end parallel loop
+  !$acc kernels loop
+  do i = 1,10
+  enddo
+  !$acc kernels loop
+  do i = 1,10
+  enddo
+  !$acc end kernels loop
+
+  !$acc kernels loop reduction(max:i)
+  do i = 1,10
+  enddo
+  !$acc kernels
+  !$acc loop reduction(max:i)
+  do i = 1,10
+  enddo
+  !$acc end kernels
+
+  !$acc parallel loop collapse(0) ! { dg-error "constant positive integer" }
+  do i = 1,10
+  enddo
+
+  !$acc parallel loop collapse(-1) ! { dg-error "constant positive integer" }
+  do i = 1,10
+  enddo
+
+  !$acc parallel loop collapse(i) ! { dg-error "Constant expression required" }
+  do i = 1,10
+  enddo
+
+  !$acc parallel loop collapse(4) ! { dg-error "not enough DO loops for collapsed" }
+    do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+              a(i+j-k) = i + j + k
+          end do
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 5, 2
+        do j = i + 1, 7, i  ! { dg-error "collapsed loops don.t form rectangular iteration space" }
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+        k = 4
+    end do
+    !$acc parallel loop collapse(3-1)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+        k = 4
+    end do
+    !$acc parallel loop collapse(1+1)
+    do i = 1, 3
+        do j = 4, 6
+        end do
+        k = 4
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do      ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+        end do
+    end do
+    !$acc parallel loop collapse(2)
+    do i = 1, 3
+        do r = 4, 6    ! { dg-error "integer" }
+        end do
+    end do
+
+    ! Both seq and independent are not allowed
+  !$acc loop independent seq ! { dg-error "SEQ conflicts with INDEPENDENT" }
+  do i = 1,10
+  enddo
+
+
+  !$acc cache (a) ! { dg-error "inside of loop" }
+
+  do i = 1,10
+    !$acc cache(a)
+  enddo
+
+  do i = 1,10
+    a(i) = i
+    !$acc cache(a)
+  enddo
+
+end subroutine test1
+end program test
+! { dg-prune-output "Deleted" }
+! { dg-prune-output "ACC cache unimplemented" }
diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-5.f95 b/gcc/testsuite/gfortran.dg/goacc/loop-5.f95
new file mode 100644
index 0000000..448d2f5
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/loop-5.f95
@@ -0,0 +1,58 @@
+! { dg-do compile }
+! { dg-additional-options "-std=f2008" }
+
+program test
+  call test1
+contains
+subroutine test1
+  implicit none
+  integer :: i, j
+
+  ! !$acc end loop not required by spec
+  !$acc loop
+  do i = 1,5
+  enddo
+  !$acc end loop ! { dg-warning "Redundant" }
+
+  !$acc loop
+  do i = 1,5
+  enddo
+  j = 1
+  !$acc end loop ! { dg-error "Unexpected" }
+
+  !$acc parallel
+  !$acc loop
+  do i = 1,5
+  enddo
+  !$acc end parallel
+  !$acc end loop ! { dg-error "Unexpected" }
+
+  ! OpenACC supports Fortran 2008 do concurrent statement
+  !$acc loop
+  do concurrent (i = 1:5)
+  end do
+
+  !$acc loop
+  outer_loop: do i = 1, 5
+    inner_loop: do j = 1,5
+      if (i .eq. j) cycle outer_loop
+      if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
+    end do inner_loop
+  end do outer_loop
+
+  outer_loop1: do i = 1, 5
+    !$acc loop
+    inner_loop1: do j = 1,5
+      if (i .eq. j) cycle outer_loop1 ! { dg-error "CYCLE statement" }
+    end do inner_loop1
+  end do outer_loop1
+
+  !$acc loop collapse(2)
+  outer_loop2: do i = 1, 5
+    inner_loop2: do j = 1,5
+      if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
+      if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
+    end do inner_loop2
+  end do outer_loop2
+end subroutine test1
+end program test
diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c
index b5d6543..e8ece9c 100644
--- a/gcc/tree-nested.c
+++ b/gcc/tree-nested.c
@@ -627,8 +627,6 @@ walk_gimple_omp_for (gimple for_stmt,
     		     walk_stmt_fn callback_stmt, walk_tree_fn callback_op,
     		     struct nesting_info *info)
 {
-  gcc_assert (!is_gimple_omp_oacc_specifically (for_stmt));
-
   struct walk_stmt_info wi;
   gimple_seq seq;
   tree t;
@@ -1325,10 +1323,6 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 	}
       break;
 
-    case GIMPLE_OACC_KERNELS:
-    case GIMPLE_OACC_PARALLEL:
-      gcc_unreachable ();
-
     case GIMPLE_OMP_PARALLEL:
     case GIMPLE_OMP_TASK:
       save_suppress = info->suppress_expansion;
@@ -1359,7 +1353,6 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
       break;
 
     case GIMPLE_OMP_FOR:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
       save_suppress = info->suppress_expansion;
       convert_nonlocal_omp_clauses (gimple_omp_for_clauses_ptr (stmt), wi);
       walk_gimple_omp_for (stmt, convert_nonlocal_reference_stmt,
@@ -1385,12 +1378,14 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
       info->suppress_expansion = save_suppress;
       break;
 
+    case GIMPLE_OACC_KERNELS:
+    case GIMPLE_OACC_PARALLEL:
     case GIMPLE_OMP_TARGET:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
-      if (gimple_omp_target_kind (stmt) != GF_OMP_TARGET_KIND_REGION)
+      if (gimple_omp_targetreg_kind (stmt) != GF_OMP_TARGET_KIND_REGION)
 	{
 	  save_suppress = info->suppress_expansion;
-	  convert_nonlocal_omp_clauses (gimple_omp_target_clauses_ptr (stmt),
+	  convert_nonlocal_omp_clauses (gimple_omp_targetreg_clauses_ptr
+ (stmt),
 					wi);
 	  info->suppress_expansion = save_suppress;
 	  walk_body (convert_nonlocal_reference_stmt,
@@ -1399,7 +1394,7 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 	  break;
 	}
       save_suppress = info->suppress_expansion;
-      if (convert_nonlocal_omp_clauses (gimple_omp_target_clauses_ptr (stmt),
+      if (convert_nonlocal_omp_clauses (gimple_omp_targetreg_clauses_ptr (stmt),
 					wi))
 	{
 	  tree c, decl;
@@ -1408,8 +1403,8 @@ convert_nonlocal_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 	  OMP_CLAUSE_DECL (c) = decl;
 	  OMP_CLAUSE_MAP_KIND (c) = OMP_CLAUSE_MAP_TO;
 	  OMP_CLAUSE_SIZE (c) = DECL_SIZE_UNIT (decl);
-	  OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (stmt);
-	  gimple_omp_target_set_clauses (stmt, c);
+	  OMP_CLAUSE_CHAIN (c) = gimple_omp_targetreg_clauses (stmt);
+	  gimple_omp_targetreg_set_clauses (stmt, c);
 	}
 
       save_local_var_chain = info->new_local_var_chain;
@@ -1898,10 +1893,6 @@ convert_local_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 
   switch (gimple_code (stmt))
     {
-    case GIMPLE_OACC_KERNELS:
-    case GIMPLE_OACC_PARALLEL:
-      gcc_unreachable ();
-
     case GIMPLE_OMP_PARALLEL:
     case GIMPLE_OMP_TASK:
       save_suppress = info->suppress_expansion;
@@ -1931,7 +1922,6 @@ convert_local_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
       break;
 
     case GIMPLE_OMP_FOR:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
       save_suppress = info->suppress_expansion;
       convert_local_omp_clauses (gimple_omp_for_clauses_ptr (stmt), wi);
       walk_gimple_omp_for (stmt, convert_local_reference_stmt,
@@ -1957,19 +1947,20 @@ convert_local_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
       info->suppress_expansion = save_suppress;
       break;
 
+    case GIMPLE_OACC_KERNELS:
+    case GIMPLE_OACC_PARALLEL:
     case GIMPLE_OMP_TARGET:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
-      if (gimple_omp_target_kind (stmt) != GF_OMP_TARGET_KIND_REGION)
+      if (gimple_omp_targetreg_kind (stmt) != GF_OMP_TARGET_KIND_REGION)
 	{
 	  save_suppress = info->suppress_expansion;
-	  convert_local_omp_clauses (gimple_omp_target_clauses_ptr (stmt), wi);
+	  convert_local_omp_clauses (gimple_omp_targetreg_clauses_ptr (stmt), wi);
 	  info->suppress_expansion = save_suppress;
 	  walk_body (convert_local_reference_stmt, convert_local_reference_op,
 		     info, gimple_omp_body_ptr (stmt));
 	  break;
 	}
       save_suppress = info->suppress_expansion;
-      if (convert_local_omp_clauses (gimple_omp_target_clauses_ptr (stmt), wi))
+      if (convert_local_omp_clauses (gimple_omp_targetreg_clauses_ptr (stmt), wi))
 	{
 	  tree c;
 	  (void) get_frame_type (info);
@@ -1977,8 +1968,8 @@ convert_local_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 	  OMP_CLAUSE_DECL (c) = info->frame_decl;
 	  OMP_CLAUSE_MAP_KIND (c) = OMP_CLAUSE_MAP_TOFROM;
 	  OMP_CLAUSE_SIZE (c) = DECL_SIZE_UNIT (info->frame_decl);
-	  OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (stmt);
-	  gimple_omp_target_set_clauses (stmt, c);
+	  OMP_CLAUSE_CHAIN (c) = gimple_omp_targetreg_clauses (stmt);
+	  gimple_omp_targetreg_set_clauses (stmt, c);
 	}
 
       save_local_var_chain = info->new_local_var_chain;
@@ -2291,11 +2282,8 @@ convert_tramp_reference_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 
     case GIMPLE_OACC_KERNELS:
     case GIMPLE_OACC_PARALLEL:
-      gcc_unreachable ();
-
     case GIMPLE_OMP_TARGET:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
-      if (gimple_omp_target_kind (stmt) != GF_OMP_TARGET_KIND_REGION)
+      if (gimple_omp_targetreg_kind (stmt) != GF_OMP_TARGET_KIND_REGION)
 	{
 	  *handled_ops_p = false;
 	  return NULL_TREE;
@@ -2360,10 +2348,6 @@ convert_gimple_call (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 	}
       break;
 
-    case GIMPLE_OACC_KERNELS:
-    case GIMPLE_OACC_PARALLEL:
-      gcc_unreachable ();
-
     case GIMPLE_OMP_PARALLEL:
     case GIMPLE_OMP_TASK:
       save_static_chain_added = info->static_chain_added;
@@ -2396,9 +2380,10 @@ convert_gimple_call (gimple_stmt_iterator *gsi, bool *handled_ops_p,
       info->static_chain_added |= save_static_chain_added;
       break;
 
+    case GIMPLE_OACC_KERNELS:
+    case GIMPLE_OACC_PARALLEL:
     case GIMPLE_OMP_TARGET:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
-      if (gimple_omp_target_kind (stmt) != GF_OMP_TARGET_KIND_REGION)
+      if (gimple_omp_targetreg_kind (stmt) != GF_OMP_TARGET_KIND_REGION)
 	{
 	  walk_body (convert_gimple_call, NULL, info, gimple_omp_body_ptr (stmt));
 	  break;
@@ -2413,7 +2398,7 @@ convert_gimple_call (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 	    continue;
 	  decl = i ? get_chain_decl (info) : info->frame_decl;
 	  /* Don't add CHAIN.* or FRAME.* twice.  */
-	  for (c = gimple_omp_target_clauses (stmt);
+	  for (c = gimple_omp_targetreg_clauses (stmt);
 	       c;
 	       c = OMP_CLAUSE_CHAIN (c))
 	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
@@ -2426,15 +2411,14 @@ convert_gimple_call (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 	      OMP_CLAUSE_MAP_KIND (c)
 		= i ? OMP_CLAUSE_MAP_TO : OMP_CLAUSE_MAP_TOFROM;
 	      OMP_CLAUSE_SIZE (c) = DECL_SIZE_UNIT (decl);
-	      OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (stmt);
-	      gimple_omp_target_set_clauses (stmt, c);
+	      OMP_CLAUSE_CHAIN (c) = gimple_omp_targetreg_clauses (stmt);
+	      gimple_omp_targetreg_set_clauses (stmt, c);
 	    }
 	}
       info->static_chain_added |= save_static_chain_added;
       break;
 
     case GIMPLE_OMP_FOR:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
       walk_body (convert_gimple_call, NULL, info,
 	  	 gimple_omp_for_pre_body_ptr (stmt));
       /* FALLTHRU */
@@ -2446,7 +2430,6 @@ convert_gimple_call (gimple_stmt_iterator *gsi, bool *handled_ops_p,
     case GIMPLE_OMP_TASKGROUP:
     case GIMPLE_OMP_ORDERED:
     case GIMPLE_OMP_CRITICAL:
-      gcc_assert (!is_gimple_omp_oacc_specifically (stmt));
       walk_body (convert_gimple_call, NULL, info, gimple_omp_body_ptr (stmt));
       break;
 
diff --git a/libgomp/testsuite/libgomp.oacc-c/sub-collapse-1.c b/libgomp/testsuite/libgomp.oacc-c/sub-collapse-1.c
new file mode 100644
index 0000000..f28348a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/sub-collapse-1.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int
+main (void)
+{
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][7][8];
+    int l = 0;
+
+    memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(4 - 1)
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  a[i][j][k] = i + j + k;
+#pragma acc end parallel
+
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  if (a[i][j][k] != i + j + k)
+	    abort();
+  }
+
+  void test2 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  a[i][j][k] = 1;
+#pragma acc end parallel
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    abort ();
+  }
+
+  test1 ();
+  test2 ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/sub-collapse-2.c b/libgomp/testsuite/libgomp.oacc-c/sub-collapse-2.c
new file mode 100644
index 0000000..00f8d4e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/sub-collapse-2.c
@@ -0,0 +1,163 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int
+main (void)
+{
+  int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1;
+
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    memset (a, '\0', sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  a[i][j][k] = 1;
+#pragma acc end parallel
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    abort();
+  }
+
+  void test2 (int v1, int v2, int v3, int v4, int v5, int v6)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    memset (a, '\0', sizeof (a));
+    memset (b, '\0', sizeof (b));
+
+#pragma acc parallel
+#pragma acc loop collapse(3) reduction (||:l)
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+#pragma acc end parallel
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    abort ();
+  }
+
+  void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8,
+      int v9)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    memset (a, '\0', sizeof (a));
+    memset (b, '\0', sizeof (b));
+
+#pragma acc parallel
+#pragma acc loop collapse(3) reduction (||:l)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+#pragma acc end parallel
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    abort ();
+  }
+
+  void test4 ()
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+    int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8,
+      v9 = p9;
+
+    memset (a, '\0', sizeof (a));
+    memset (b, '\0', sizeof (b));
+
+#pragma acc parallel
+#pragma acc loop collapse(3) reduction (||:l)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+#pragma acc end parallel
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    abort ();
+  }
+
+  test1 ();
+  test2 (p1, p2, p3, p4, p5, p6);
+  test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9);
+  test4 ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/sub-collapse-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/sub-collapse-1.f90
new file mode 100644
index 0000000..169cd12
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/sub-collapse-1.f90
@@ -0,0 +1,56 @@
+! { dg-do run }
+
+program collapse2
+  call test1
+  call test2
+contains
+  subroutine test1
+    integer :: i, j, k, a(1:3, 4:6, 5:7)
+    logical :: l
+    l = .false.
+    a(:, :, :) = 0
+    !$acc parallel
+    !$acc loop collapse(4 - 1)
+      do 164 i = 1, 3
+        do 164 j = 4, 6
+          do 164 k = 5, 7
+            a(i, j, k) = i + j + k
+164      end do
+    !$acc loop collapse(2) reduction(.or.:l)
+firstdo: do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+            if (a(i, j, k) .ne. (i + j + k)) l = .true.
+          end do
+        end do
+      end do firstdo
+    !$acc end parallel
+    if (l) call abort
+  end subroutine test1
+
+  subroutine test2
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+  dokk: do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+
+    !$acc loop collapse(3)
+ dol: do 120 l=1,3
+  doll: do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+    !$acc end parallel
+  end subroutine test2
+
+end program collapse2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/sub-collapse-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/sub-collapse-2.f90
new file mode 100644
index 0000000..a86e522
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/sub-collapse-2.f90
@@ -0,0 +1,171 @@
+! { dg-do run }
+
+program collapse3
+  integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9
+  p1 = 2
+  p2 = 6
+  p3 = -2
+  p4 = 4
+  p5 = 13
+  p6 = 18
+  p7 = 1
+  p8 = 1
+  p9 = 1
+  call test1
+  call test2 (p1, p2, p3, p4, p5, p6)
+  call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9)
+  call test4
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+    end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel
+    !$acc loop collapse (3) reduction (.or.:l)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel
+    !$acc loop collapse (3) reduction (.or.:l)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = p1
+    v2 = p2
+    v3 = p3
+    v4 = p4
+    v5 = p5
+    v6 = p6
+    v7 = p7
+    v8 = p8
+    v9 = p9
+    !$acc parallel
+    !$acc loop collapse (3) reduction (.or.:l)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+end program collapse3
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/sub-collapse-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/sub-collapse-3.f90
new file mode 100644
index 0000000..f91f0be
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/sub-collapse-3.f90
@@ -0,0 +1,242 @@
+! { dg-do run }
+
+program sub_collapse_3
+  call test1
+  call test2 (2, 6, -2, 4, 13, 18)
+  call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+  call test4
+  call test5 (2, 6, -2, 4, 13, 18)
+  call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+  end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel copyin (v1, v2, v3, v4, v5, v6)
+    !$acc loop collapse (3) reduction (.or.:l)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel  copyin (v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    !$acc loop collapse (3) reduction (.or.:l)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = 2
+    v2 = 6
+    v3 = -2
+    v4 = 4
+    v5 = 13
+    v6 = 18
+    v7 = 1
+    v8 = 1
+    v9 = 1
+    !$acc parallel  copyin (v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    !$acc loop collapse (3) reduction (.or.:l)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+  subroutine test5(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel copyin (v1, v2, v3, v4, v5, v6)
+    !$acc loop collapse (3) reduction (.or.:l)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test5
+
+  subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel copyin (v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    !$acc loop collapse (3) reduction (.or.:l)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+            m = i * 100 + j * 10 + k
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test6
+
+end program sub_collapse_3

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [patch,gomp-4_0-branch] acc nested function support
  2014-11-05  0:45 ` Cesar Philippidis
@ 2014-11-05 15:24   ` David Malcolm
  0 siblings, 0 replies; 13+ messages in thread
From: David Malcolm @ 2014-11-05 15:24 UTC (permalink / raw)
  To: Cesar Philippidis, jakub; +Cc: Thomas Schwinge, gcc-patches

On Tue, 2014-11-04 at 16:45 -0800, Cesar Philippidis wrote:
> Here's an updated version of my nested function patch.
> 
> David, I tweaked the gimple class hierarchy a little bit. Here's what
> the updated class diagram looks like:
> 
>      + gimple_statement_omp
>      |   |    layout: GSS_OMP.  Used for code GIMPLE_OMP_SECTION
>      |   |
>      |   + gimple_statement_omp_parallel_layout
>      |   |   |    layout: GSS_OMP_PARALLEL_LAYOUT
>      |   |   |
>      |   |   + gimple_statement_omp_targetreg
>      |   |       |
>      |   |       + gimple_statement_oacc_kernels
>      |   |       |        code: GIMPLE_OACC_KERNELS
>      |   |       |
>      |   |       + gimple_statement_oacc_parallel
>      |   |       |        code: GIMPLE_OACC_PARALLEL
>      |   |       |
>      |   |       + gimple_statement_omp_target
>      |   |                code: GIMPLE_OMP_TARGET
> 
> Basically, I've introduced gimple_statement_omp_targetreg and made
> GIMPLE_OACC_{PARALLEL,KERNELS} and GIMPLE_OMP_TARGET inherit it. This
> seems to work out pretty good. It cleans up both
> {lower,expand}_oacc_offload in omp-low.c and allows OpenACC kernel and
> parallel regions to be treated as OpenMP target regions in
> tree-nested.c. Are these changes to gimple.h OK?

I'm not a reviewer, so it's not directly up to me, but if it simplifies
the code then it seems reasonable.  I'm interested in Jakub's opinion.

> Thomas, assuming these gimple changes are OK, should I commit this
> change to gomp-4_0-branch, or do you want to include this patch with
> your middle end trunk submission?


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
@ 2016-06-01 15:07 Thomas Schwinge
  2016-06-01 15:12 ` Jakub Jelinek
  2016-06-13 14:43 ` [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c Thomas Schwinge
  0 siblings, 2 replies; 13+ messages in thread
From: Thomas Schwinge @ 2016-06-01 15:07 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches, fortran

Hi!

Here are the OpenACC bits of <http://gcc.gnu.org/PR71373>.

As we're currently not paying attention to OpenACC tile clauses in the
middle end, and thus OMP_CLAUSE_TILE's arguments are not to be considered
stable, I opted to simply discard them early, simplifying their
gcc/tree-nested.c handling.  Everything else should be self-explanatory.

OK for trunk and gcc-6-branch?

commit e3a027408c82683d824003645dc3e4567a1435f7
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Wed Jun 1 17:01:35 2016 +0200

    [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
    
    	gcc/
    	PR middle-end/71373
    	* gimplify.c (gimplify_adjust_omp_clauses): Discard
    	OMP_CLAUSE_TILE.
    	* omp-low.c (scan_sharing_clauses): Don't expect
    	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
    	* tree-nested.c (convert_nonlocal_omp_clauses)
    	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
    	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
    	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	PR middle-end/71373
    	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
    	for OpenACC tile clauses.
    	* gfortran.dg/goacc/combined-directives.f90: Likewise.
    	* gfortran.dg/goacc/subroutines.f90: Update.
---
 gcc/gimplify.c                                     |  6 +++
 gcc/omp-low.c                                      | 10 +----
 .../c-c++-common/goacc/combined-directives.c       |  3 +-
 .../gfortran.dg/goacc/combined-directives.f90      |  3 +-
 gcc/testsuite/gfortran.dg/goacc/subroutines.f90    | 47 +++++++++++++++-------
 gcc/tree-nested.c                                  | 30 ++++++++++++++
 6 files changed, 75 insertions(+), 24 deletions(-)

diff --git gcc/gimplify.c gcc/gimplify.c
index 131fa24..bd1cfe3 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -8280,7 +8280,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	case OMP_CLAUSE_VECTOR:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	  break;
+
 	case OMP_CLAUSE_TILE:
+	  /* We're not yet making use of the information provided by OpenACC
+	     tile clauses.  Discard these here, to simplify later middle end
+	     processing.  */
+	  remove = true;
 	  break;
 
 	default:
diff --git gcc/omp-low.c gcc/omp-low.c
index 77bdb18..c6ba31c 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -2187,7 +2187,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2201,9 +2200,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	  break;
 
 	case OMP_CLAUSE__CACHE_:
-	  sorry ("Clause not supported yet");
-	  break;
-
+	case OMP_CLAUSE_TILE:
 	default:
 	  gcc_unreachable ();
 	}
@@ -2360,7 +2357,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2368,9 +2364,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	  break;
 
 	case OMP_CLAUSE__CACHE_:
-	  sorry ("Clause not supported yet");
-	  break;
-
+	case OMP_CLAUSE_TILE:
 	default:
 	  gcc_unreachable ();
 	}
diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c
index c2a3c57..3fa800d 100644
--- gcc/testsuite/c-c++-common/goacc/combined-directives.c
+++ gcc/testsuite/c-c++-common/goacc/combined-directives.c
@@ -111,6 +111,7 @@ test ()
 // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
index 42a447a..abb5e6b 100644
--- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
+++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
@@ -143,7 +143,8 @@ end subroutine test
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
+! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90
index 6cab798..7c67e52 100644
--- gcc/testsuite/gfortran.dg/goacc/subroutines.f90
+++ gcc/testsuite/gfortran.dg/goacc/subroutines.f90
@@ -1,6 +1,4 @@
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile } 
+! Exercise how tree-nested.c handles OpenACC clauses.
 
 program main
   integer, parameter :: N = 100
@@ -26,20 +24,31 @@ contains
     local_a (:) = 5
     local_arg = 5
 
-    !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
+    !$acc kernels loop &
+    !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait async(local_arg)
     do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 5))
        local_a(local_i) = 100
-       !$acc loop seq
+       !$acc loop seq tile(*)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(1)
        do local_j = 1, N
        enddo
     enddo
     !$acc end kernels loop
 
-    !$acc kernels loop gang(static:local_arg) worker(local_arg) &
-    !$acc vector(local_arg)
+    !$acc kernels loop &
+    !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
     do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 4))
        local_a(local_i) = 100
-       !$acc loop seq
+       !$acc loop seq tile(1)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
        do local_j = 1, N
        enddo
     enddo
@@ -50,21 +59,31 @@ contains
     nonlocal_a (:) = 5
     nonlocal_arg = 5
   
-    !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
+    !$acc kernels loop &
+    !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait async(nonlocal_arg)
     do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
        nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
+       !$acc loop seq tile(2)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(3)
        do nonlocal_j = 1, N
        enddo
     enddo
     !$acc end kernels loop
 
-    !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
+    !$acc kernels loop &
+    !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
     do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
        nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
+       !$acc loop seq tile(*)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
        do nonlocal_j = 1, N
        enddo
     enddo
diff --git gcc/tree-nested.c gcc/tree-nested.c
index 25a92aa..97d3c52 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1197,8 +1199,21 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
 	default:
 	  gcc_unreachable ();
 	}
@@ -1790,6 +1805,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1878,8 +1895,21 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
 	default:
 	  gcc_unreachable ();
 	}


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
  2016-06-01 15:07 [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition Thomas Schwinge
@ 2016-06-01 15:12 ` Jakub Jelinek
  2016-06-02 16:21   ` Thomas Schwinge
  2016-06-13 14:43 ` [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c Thomas Schwinge
  1 sibling, 1 reply; 13+ messages in thread
From: Jakub Jelinek @ 2016-06-01 15:12 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: gcc-patches, fortran

On Wed, Jun 01, 2016 at 05:06:42PM +0200, Thomas Schwinge wrote:
> Here are the OpenACC bits of <http://gcc.gnu.org/PR71373>.
> 
> As we're currently not paying attention to OpenACC tile clauses in the
> middle end, and thus OMP_CLAUSE_TILE's arguments are not to be considered
> stable, I opted to simply discard them early, simplifying their
> gcc/tree-nested.c handling.  Everything else should be self-explanatory.
> 
> OK for trunk and gcc-6-branch?

LGTM for both, but please as a follow-up try to work also on a C testcase
with nested functions that covers all the clauses (both referencing
vars/expressions that are defined in the current function and used by a
nested function, and for vars/expressions that are defined in parent
function and used in clauses inside of nested function.

	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
  2016-06-01 15:12 ` Jakub Jelinek
@ 2016-06-02 16:21   ` Thomas Schwinge
  2016-06-02 16:25     ` Jakub Jelinek
  0 siblings, 1 reply; 13+ messages in thread
From: Thomas Schwinge @ 2016-06-02 16:21 UTC (permalink / raw)
  To: gcc-patches, Jakub Jelinek; +Cc: fortran

Hi!

On Wed, 1 Jun 2016 17:12:17 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Wed, Jun 01, 2016 at 05:06:42PM +0200, Thomas Schwinge wrote:
> > Here are the OpenACC bits of <http://gcc.gnu.org/PR71373>.
> > 
> > As we're currently not paying attention to OpenACC tile clauses in the
> > middle end, and thus OMP_CLAUSE_TILE's arguments are not to be considered
> > stable, I opted to simply discard them early, simplifying their
> > gcc/tree-nested.c handling.  Everything else should be self-explanatory.
> > 
> > OK for trunk and gcc-6-branch?
> 
> LGTM for both, but please as a follow-up try to work also on a C testcase
> with nested functions that covers all the clauses (both referencing
> vars/expressions that are defined in the current function and used by a
> nested function, and for vars/expressions that are defined in parent
> function and used in clauses inside of nested function.

OK, I translated gcc/testsuite/gfortran.dg/goacc/subroutines.f90 from
Fortran to C: gcc/testsuite/gcc.dg/goacc/nested.c.  For amusement ;-) I'm
also including the test case that originally made us aware of the
problem, gcc/testsuite/gcc.dg/goacc/pr71373.c.  Oh, and I just remembered
<http://news.gmane.org/find-root.php?message_id=%3C5459732B.1010101%40codesourcery.com%3E>,
so after re-testing, I'll also include these test cases, as far as still
relevant.  Nested function decomposition is not applicable to C++, so we
don't need any C++ test cases, right?

During the translation of gcc/testsuite/gfortran.dg/goacc/subroutines.f90
from Fortran to C, I stumbled upon <https://gcc.gnu.org/PR71381> "C/C++
OpenACC cache directive rejects valid syntax",
<http://news.gmane.org/find-root.php?message_id=%3C877fe7sthf.fsf%40kepler.schwinge.homeip.net%3E>,
so that one will need to go in first, before I'll then commit the
following:

commit 7eff9da0e8fe5eda7d76b9a27dbb1ec4e6183844
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Wed Jun 1 17:01:35 2016 +0200

    [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
    
    	gcc/
    	* gimplify.c (gimplify_adjust_omp_clauses): Discard
    	OMP_CLAUSE_TILE.
    	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
    	for OpenACC tile clauses.
    	* gfortran.dg/goacc/combined-directives.f90: Likewise.
    
    	gcc/
    	PR middle-end/71373
    	* tree-nested.c (convert_nonlocal_omp_clauses)
    	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
    	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
    	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	PR middle-end/71373
    	* gcc.dg/goacc/nested.c: New file.
    	* gcc.dg/goacc/pr71373.c: Likewise.
    	* gfortran.dg/goacc/subroutines.f90: Update.
---
 gcc/gimplify.c                                     |   6 ++
 gcc/omp-low.c                                      |   4 +-
 .../c-c++-common/goacc/combined-directives.c       |   3 +-
 gcc/testsuite/gcc.dg/goacc/nested.c                | 100 +++++++++++++++++++++
 gcc/testsuite/gcc.dg/goacc/pr71373.c               |  41 +++++++++
 .../gfortran.dg/goacc/combined-directives.f90      |   3 +-
 gcc/testsuite/gfortran.dg/goacc/subroutines.f90    |  56 ++++++++----
 gcc/tree-nested.c                                  |  30 +++++++
 8 files changed, 221 insertions(+), 22 deletions(-)

diff --git gcc/gimplify.c gcc/gimplify.c
index f12c6a1..7c19cf3 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -8280,7 +8280,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	case OMP_CLAUSE_VECTOR:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	  break;
+
 	case OMP_CLAUSE_TILE:
+	  /* We're not yet making use of the information provided by OpenACC
+	     tile clauses.  Discard these here, to simplify later middle end
+	     processing.  */
+	  remove = true;
 	  break;
 
 	default:
diff --git gcc/omp-low.c gcc/omp-low.c
index 91d5fcf..c6ba31c 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -2187,7 +2187,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2201,6 +2200,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	  break;
 
 	case OMP_CLAUSE__CACHE_:
+	case OMP_CLAUSE_TILE:
 	default:
 	  gcc_unreachable ();
 	}
@@ -2357,7 +2357,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2365,6 +2364,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	  break;
 
 	case OMP_CLAUSE__CACHE_:
+	case OMP_CLAUSE_TILE:
 	default:
 	  gcc_unreachable ();
 	}
diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c
index c2a3c57..3fa800d 100644
--- gcc/testsuite/c-c++-common/goacc/combined-directives.c
+++ gcc/testsuite/c-c++-common/goacc/combined-directives.c
@@ -111,6 +111,7 @@ test ()
 // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git gcc/testsuite/gcc.dg/goacc/nested.c gcc/testsuite/gcc.dg/goacc/nested.c
new file mode 100644
index 0000000..6e1f236
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested.c
@@ -0,0 +1,100 @@
+/* Exercise how tree-nested.c handles OpenACC clauses.  */
+/* See gcc/testsuite/gfortran.dg/goacc/subroutines.f90 for the Fortran
+   version.  */
+
+int main ()
+{
+#define N 100
+  int nonlocal_arg;
+  int nonlocal_a[N];
+  int nonlocal_i;
+  int nonlocal_j;
+
+  for (int i = 0; i < N; ++i)
+    nonlocal_a[i] = 5;
+  nonlocal_arg = 5;
+
+  void local ()
+  {
+    int local_i;
+    int local_arg;
+    int local_a[N];
+    int local_j;
+
+    for (int i = 0; i < N; ++i)
+      local_a[i] = 5;
+    local_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:local_arg) worker(local_arg) vector(local_arg) \
+  wait async(local_arg)
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:5])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:local_arg) worker(local_arg) vector(local_arg) \
+  wait(local_arg, local_arg + 1, local_arg + 2) async
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:4])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+  }
+
+  void nonlocal ()
+  {
+    for (int i = 0; i < N; ++i)
+      nonlocal_a[i] = 5;
+    nonlocal_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait async(nonlocal_arg)
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:3])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(2)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(3)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:2])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+  }
+
+  local ();
+  nonlocal ();
+
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/pr71373.c gcc/testsuite/gcc.dg/goacc/pr71373.c
new file mode 100644
index 0000000..9381752
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/pr71373.c
@@ -0,0 +1,41 @@
+/* Unintentional nested function usage.  */
+/* Due to missing right braces '}', the following functions are parsed as
+   nested functions.  This ran into an ICE.  */
+
+void foo (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  // Note right brace '}' commented out here.
+  //}
+}
+void bar (void)
+{
+}
+
+// Adding right brace '}' here, to make this compile.
+}
+
+
+// ..., and the other way round:
+
+void BAR (void)
+{
+// Note right brace '}' commented out here.
+//}
+
+void FOO (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  }
+}
+
+// Adding right brace '}' here, to make this compile.
+}
diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
index 42a447a..abb5e6b 100644
--- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
+++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
@@ -143,7 +143,8 @@ end subroutine test
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
+! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90
index 6cab798..479ef4f 100644
--- gcc/testsuite/gfortran.dg/goacc/subroutines.f90
+++ gcc/testsuite/gfortran.dg/goacc/subroutines.f90
@@ -1,6 +1,5 @@
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile } 
+! Exercise how tree-nested.c handles OpenACC clauses.
+! See gcc/testsuite/c-c++-common/goacc/nested.c for the C version.
 
 program main
   integer, parameter :: N = 100
@@ -8,10 +7,10 @@ program main
   integer :: nonlocal_a(N)
   integer :: nonlocal_i
   integer :: nonlocal_j
-  
+
   nonlocal_a (:) = 5
   nonlocal_arg = 5
-  
+
   call local ()
   call nonlocal ()
 
@@ -22,24 +21,35 @@ contains
     integer :: local_arg
     integer :: local_a(N)
     integer :: local_j
-    
+
     local_a (:) = 5
     local_arg = 5
 
-    !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
+    !$acc kernels loop &
+    !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait async(local_arg)
     do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 5))
        local_a(local_i) = 100
-       !$acc loop seq
+       !$acc loop seq tile(*)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(1)
        do local_j = 1, N
        enddo
     enddo
     !$acc end kernels loop
 
-    !$acc kernels loop gang(static:local_arg) worker(local_arg) &
-    !$acc vector(local_arg)
+    !$acc kernels loop &
+    !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
     do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 4))
        local_a(local_i) = 100
-       !$acc loop seq
+       !$acc loop seq tile(1)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
        do local_j = 1, N
        enddo
     enddo
@@ -49,22 +59,32 @@ contains
   subroutine nonlocal ()
     nonlocal_a (:) = 5
     nonlocal_arg = 5
-  
-    !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
+
+    !$acc kernels loop &
+    !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait async(nonlocal_arg)
     do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
        nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
+       !$acc loop seq tile(2)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(3)
        do nonlocal_j = 1, N
        enddo
     enddo
     !$acc end kernels loop
 
-    !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
+    !$acc kernels loop &
+    !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
     do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
        nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
+       !$acc loop seq tile(*)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
        do nonlocal_j = 1, N
        enddo
     enddo
diff --git gcc/tree-nested.c gcc/tree-nested.c
index 25a92aa..97d3c52 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1197,8 +1199,21 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
 	default:
 	  gcc_unreachable ();
 	}
@@ -1790,6 +1805,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1878,8 +1895,21 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
 	default:
 	  gcc_unreachable ();
 	}


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
  2016-06-02 16:21   ` Thomas Schwinge
@ 2016-06-02 16:25     ` Jakub Jelinek
  2016-06-10 10:36       ` Thomas Schwinge
  0 siblings, 1 reply; 13+ messages in thread
From: Jakub Jelinek @ 2016-06-02 16:25 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: gcc-patches, fortran

On Thu, Jun 02, 2016 at 06:20:57PM +0200, Thomas Schwinge wrote:
> relevant.  Nested function decomposition is not applicable to C++, so we
> don't need any C++ test cases, right?

C++ has lambdas, but those are already lowered in the FE, so yes,
from the OpenMP/OpenACC FEs, tree-nested.c is only used by C and Fortran.

>     [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
>     
>     	gcc/
>     	* gimplify.c (gimplify_adjust_omp_clauses): Discard
>     	OMP_CLAUSE_TILE.
>     	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
>     	gcc/testsuite/
>     	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
>     	for OpenACC tile clauses.
>     	* gfortran.dg/goacc/combined-directives.f90: Likewise.
>     
>     	gcc/
>     	PR middle-end/71373
>     	* tree-nested.c (convert_nonlocal_omp_clauses)
>     	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
>     	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
>     	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
>     	gcc/testsuite/
>     	PR middle-end/71373
>     	* gcc.dg/goacc/nested.c: New file.
>     	* gcc.dg/goacc/pr71373.c: Likewise.
>     	* gfortran.dg/goacc/subroutines.f90: Update.

LGTM.

	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
  2016-06-02 16:25     ` Jakub Jelinek
@ 2016-06-10 10:36       ` Thomas Schwinge
  0 siblings, 0 replies; 13+ messages in thread
From: Thomas Schwinge @ 2016-06-10 10:36 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches, fortran

[-- Attachment #1: Type: text/plain, Size: 160140 bytes --]

Hi!

On Thu, 2 Jun 2016 18:25:17 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Jun 02, 2016 at 06:20:57PM +0200, Thomas Schwinge wrote:
> >     [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition

> LGTM.

Committed to trunk in r237291, gcc-6-branch in r237296, and
gomp-4_0-branch in r237300:

commit e2c7e1b8ad89532911b25af34875a75f24823e1a
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri Jun 10 09:22:51 2016 +0000

    [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
    
    	gcc/
    	* gimplify.c (gimplify_adjust_omp_clauses): Discard
    	OMP_CLAUSE_TILE.
    	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
    	for OpenACC tile clauses.
    	* gfortran.dg/goacc/combined-directives.f90: Likewise.
    
    	gcc/
    	PR middle-end/71373
    	* tree-nested.c (convert_nonlocal_omp_clauses)
    	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
    	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
    	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	PR middle-end/71373
    	* gcc.dg/goacc/nested-function-1.c: New file.
    	* gcc.dg/goacc/nested-function-2.c: Likewise.
    	* gcc.dg/goacc/pr71373.c: Likewise.
    	* gfortran.dg/goacc/cray-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
    	* gfortran.dg/goacc/cray.f95: Update.
    	* gfortran.dg/goacc/loop-1.f95: Likewise.
    	* gfortran.dg/goacc/loop-3.f95: Likewise.
    	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
    	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
    	libgomp/testsuite/
    	PR middle-end/71373
    	* libgomp.oacc-c/nested-function-1.c: New file.
    	* libgomp.oacc-c/nested-function-2.c: Likewise.
    	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237291 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                                      |  10 +
 gcc/gimplify.c                                     |   6 +
 gcc/omp-low.c                                      |   4 +-
 gcc/testsuite/ChangeLog                            |  20 ++
 .../c-c++-common/goacc/combined-directives.c       |   3 +-
 gcc/testsuite/gcc.dg/goacc/nested-function-1.c     | 100 +++++++++
 gcc/testsuite/gcc.dg/goacc/nested-function-2.c     |  45 ++++
 gcc/testsuite/gcc.dg/goacc/pr71373.c               |  41 ++++
 .../gfortran.dg/goacc/combined-directives.f90      |   3 +-
 .../gfortran.dg/goacc/{cray.f95 => cray-2.f95}     |  11 +-
 gcc/testsuite/gfortran.dg/goacc/cray.f95           |   6 +-
 .../gfortran.dg/goacc/{loop-1.f95 => loop-1-2.f95} |  36 +--
 gcc/testsuite/gfortran.dg/goacc/loop-1.f95         |  30 +--
 .../gfortran.dg/goacc/{loop-3.f95 => loop-3-2.f95} |  15 +-
 gcc/testsuite/gfortran.dg/goacc/loop-3.f95         |  11 +-
 .../gfortran.dg/goacc/nested-function-1.f90        |  93 ++++++++
 gcc/testsuite/gfortran.dg/goacc/subroutines.f90    |  73 ------
 gcc/tree-nested.c                                  |  30 +++
 libgomp/ChangeLog                                  |  10 +
 .../testsuite/libgomp.oacc-c/nested-function-1.c   |  52 +++++
 .../testsuite/libgomp.oacc-c/nested-function-2.c   | 155 +++++++++++++
 .../libgomp.oacc-fortran/nested-function-1.f90     |  70 ++++++
 .../libgomp.oacc-fortran/nested-function-2.f90     | 173 +++++++++++++++
 .../libgomp.oacc-fortran/nested-function-3.f90     | 244 +++++++++++++++++++++
 24 files changed, 1113 insertions(+), 128 deletions(-)

diff --git gcc/ChangeLog gcc/ChangeLog
index 6afbae7..9cab311 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,5 +1,15 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	* tree-nested.c (convert_nonlocal_omp_clauses)
+	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
+	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
+	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
+
+	* gimplify.c (gimplify_adjust_omp_clauses): Discard
+	OMP_CLAUSE_TILE.
+	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
+
 	* omp-low.c (scan_sharing_clauses): Don't expect
 	OMP_CLAUSE__CACHE_.
 
diff --git gcc/gimplify.c gcc/gimplify.c
index f12c6a1..7c19cf3 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -8280,7 +8280,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	case OMP_CLAUSE_VECTOR:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	  break;
+
 	case OMP_CLAUSE_TILE:
+	  /* We're not yet making use of the information provided by OpenACC
+	     tile clauses.  Discard these here, to simplify later middle end
+	     processing.  */
+	  remove = true;
 	  break;
 
 	default:
diff --git gcc/omp-low.c gcc/omp-low.c
index 91d5fcf..22e5909 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -2187,7 +2187,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2200,6 +2199,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	    install_var_local (decl, ctx);
 	  break;
 
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
@@ -2357,13 +2357,13 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
 	case OMP_CLAUSE__GRIDDIM_:
 	  break;
 
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog
index e15b009..325de75 100644
--- gcc/testsuite/ChangeLog
+++ gcc/testsuite/ChangeLog
@@ -1,4 +1,24 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+	    Cesar Philippidis  <cesar@codesourcery.com>
+
+	PR middle-end/71373
+	* gcc.dg/goacc/nested-function-1.c: New file.
+	* gcc.dg/goacc/nested-function-2.c: Likewise.
+	* gcc.dg/goacc/pr71373.c: Likewise.
+	* gfortran.dg/goacc/cray-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
+	* gfortran.dg/goacc/cray.f95: Update.
+	* gfortran.dg/goacc/loop-1.f95: Likewise.
+	* gfortran.dg/goacc/loop-3.f95: Likewise.
+	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
+	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
+
+2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
+	for OpenACC tile clauses.
+	* gfortran.dg/goacc/combined-directives.f90: Likewise.
 
 	PR c/71381
 	* c-c++-common/goacc/cache-1.c: Update.  Move invalid usage tests
diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c
index c2a3c57..3fa800d 100644
--- gcc/testsuite/c-c++-common/goacc/combined-directives.c
+++ gcc/testsuite/c-c++-common/goacc/combined-directives.c
@@ -111,6 +111,7 @@ test ()
 // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-1.c gcc/testsuite/gcc.dg/goacc/nested-function-1.c
new file mode 100644
index 0000000..e17c0e2
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-1.c
@@ -0,0 +1,100 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran
+   version.  */
+
+int main ()
+{
+#define N 100
+  int nonlocal_arg;
+  int nonlocal_a[N];
+  int nonlocal_i;
+  int nonlocal_j;
+
+  for (int i = 0; i < N; ++i)
+    nonlocal_a[i] = 5;
+  nonlocal_arg = 5;
+
+  void local ()
+  {
+    int local_i;
+    int local_arg;
+    int local_a[N];
+    int local_j;
+
+    for (int i = 0; i < N; ++i)
+      local_a[i] = 5;
+    local_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:local_arg) worker(local_arg) vector(local_arg) \
+  wait async(local_arg)
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:5])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:local_arg) worker(local_arg) vector(local_arg) \
+  wait(local_arg, local_arg + 1, local_arg + 2) async
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:4])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+  }
+
+  void nonlocal ()
+  {
+    for (int i = 0; i < N; ++i)
+      nonlocal_a[i] = 5;
+    nonlocal_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait async(nonlocal_arg)
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:3])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(2)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(3)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:2])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+  }
+
+  local ();
+  nonlocal ();
+
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-2.c gcc/testsuite/gcc.dg/goacc/nested-function-2.c
new file mode 100644
index 0000000..70c9ec8
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c
@@ -0,0 +1,45 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int j = 0, k = 6, l = 7, m = 8;
+  void simple (void)
+  {
+    int i;
+#pragma acc parallel
+    {
+#pragma acc loop
+      for (i = 0; i < m; i+= k)
+	j = (m + i - j) * l;
+    }
+  }
+  void collapse (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  void reduction (void)
+  {
+    int x, y, z;
+#pragma acc parallel reduction (+:j)
+    {
+#pragma acc loop reduction (+:j) collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  simple();
+  collapse();
+  reduction();
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/pr71373.c gcc/testsuite/gcc.dg/goacc/pr71373.c
new file mode 100644
index 0000000..9381752
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/pr71373.c
@@ -0,0 +1,41 @@
+/* Unintentional nested function usage.  */
+/* Due to missing right braces '}', the following functions are parsed as
+   nested functions.  This ran into an ICE.  */
+
+void foo (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  // Note right brace '}' commented out here.
+  //}
+}
+void bar (void)
+{
+}
+
+// Adding right brace '}' here, to make this compile.
+}
+
+
+// ..., and the other way round:
+
+void BAR (void)
+{
+// Note right brace '}' commented out here.
+//}
+
+void FOO (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  }
+}
+
+// Adding right brace '}' here, to make this compile.
+}
diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
index 42a447a..abb5e6b 100644
--- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
+++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
@@ -143,7 +143,8 @@ end subroutine test
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
+! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray-2.f95
similarity index 91%
copy from gcc/testsuite/gfortran.dg/goacc/cray.f95
copy to gcc/testsuite/gfortran.dg/goacc/cray-2.f95
index 705c18c..51b79b5 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray-2.f95
@@ -1,15 +1,16 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray.f95.
 
-module test
+program test
+  call oacc1
 contains
   subroutine oacc1
     implicit none
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
@@ -52,4 +53,4 @@ contains
     !$acc update host (ptr)
     !$acc update self (ptr)
   end subroutine oacc1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray.f95
index 705c18c..d6d5317 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray.f95
@@ -1,5 +1,5 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray-2.f95.
 
 module test
 contains
@@ -8,8 +8,8 @@ contains
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
similarity index 89%
copy from gcc/testsuite/gfortran.dg/goacc/loop-1.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
index a605f03..79665b9 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
@@ -1,8 +1,10 @@
-module test
-  implicit none
+! See also loop-1.f95.
+
+program test
+  call test1
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
@@ -171,4 +173,4 @@ subroutine test1
   enddo
 
 end subroutine test1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1.f95
index a605f03..5f81b7a 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95
@@ -1,8 +1,10 @@
+! See also loop-1-2.f95.
+
 module test
   implicit none
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
similarity index 90%
copy from gcc/testsuite/gfortran.dg/goacc/loop-3.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
index 2a866c7..9be74a8 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
@@ -1,10 +1,13 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3.f95.
 
+program test
+  call test1
+contains
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +26,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +38,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +53,6 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3.f95
index 2a866c7..30930f4 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3.f95
@@ -1,10 +1,10 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3-2.f95.
 
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +23,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +35,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +50,5 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
diff --git gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
new file mode 100644
index 0000000..2fcaa40
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
@@ -0,0 +1,93 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version.
+
+program main
+  integer, parameter :: N = 100
+  integer :: nonlocal_arg
+  integer :: nonlocal_a(N)
+  integer :: nonlocal_i
+  integer :: nonlocal_j
+
+  nonlocal_a (:) = 5
+  nonlocal_arg = 5
+
+  call local ()
+  call nonlocal ()
+
+contains
+
+  subroutine local ()
+    integer :: local_i
+    integer :: local_arg
+    integer :: local_a(N)
+    integer :: local_j
+
+    local_a (:) = 5
+    local_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait async(local_arg)
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 5))
+       local_a(local_i) = 100
+       !$acc loop seq tile(*)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(1)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 4))
+       local_a(local_i) = 100
+       !$acc loop seq tile(1)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine local
+
+  subroutine nonlocal ()
+    nonlocal_a (:) = 5
+    nonlocal_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait async(nonlocal_arg)
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(2)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(3)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(*)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine nonlocal
+end program main
diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90
deleted file mode 100644
index 6cab798..0000000
--- gcc/testsuite/gfortran.dg/goacc/subroutines.f90
+++ /dev/null
@@ -1,73 +0,0 @@
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile } 
-
-program main
-  integer, parameter :: N = 100
-  integer :: nonlocal_arg
-  integer :: nonlocal_a(N)
-  integer :: nonlocal_i
-  integer :: nonlocal_j
-  
-  nonlocal_a (:) = 5
-  nonlocal_arg = 5
-  
-  call local ()
-  call nonlocal ()
-
-contains
-
-  subroutine local ()
-    integer :: local_i
-    integer :: local_arg
-    integer :: local_a(N)
-    integer :: local_j
-    
-    local_a (:) = 5
-    local_arg = 5
-
-    !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:local_arg) worker(local_arg) &
-    !$acc vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine local
-
-  subroutine nonlocal ()
-    nonlocal_a (:) = 5
-    nonlocal_arg = 5
-  
-    !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine nonlocal
-end program main
diff --git gcc/tree-nested.c gcc/tree-nested.c
index 25a92aa..6fc6326 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1197,8 +1199,21 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
 	default:
 	  gcc_unreachable ();
 	}
@@ -1790,6 +1805,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1878,8 +1895,21 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
 	default:
 	  gcc_unreachable ();
 	}
diff --git libgomp/ChangeLog libgomp/ChangeLog
index 5c7f41a..cf551f4 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,4 +1,14 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+	    Cesar Philippidis  <cesar@codesourcery.com>
+
+	PR middle-end/71373
+	* libgomp.oacc-c/nested-function-1.c: New file.
+	* libgomp.oacc-c/nested-function-2.c: Likewise.
+	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
+
+2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
 	PR c/71381
 	* testsuite/libgomp.oacc-c-c++-common/cache-1.c: #include
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-1.c libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
new file mode 100644
index 0000000..fb2a3ac
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
@@ -0,0 +1,52 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][7][8];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(4 - 1)
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  a[i][j][k] = i + j + k;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  if (a[i][j][k] != i + j + k)
+	    __builtin_abort();
+  }
+
+  void test2 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-2.c libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
new file mode 100644
index 0000000..2c3f3fe
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
@@ -0,0 +1,155 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1;
+
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, '\0', sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort();
+  }
+
+  void test2 (int v1, int v2, int v3, int v4, int v5, int v6)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8,
+      int v9)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test4 ()
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+    int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8,
+      v9 = p9;
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 (p1, p2, p3, p4, p5, p6);
+  test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9);
+  test4 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
new file mode 100644
index 0000000..fdbca44
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
@@ -0,0 +1,70 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse2
+  call test1
+  call test2
+contains
+  subroutine test1
+    integer :: i, j, k, a(1:3, 4:6, 5:7)
+    logical :: l
+    l = .false.
+    a(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop worker vector collapse(4 - 1)
+      do 164 i = 1, 3
+        do 164 j = 4, 6
+          do 164 k = 5, 7
+            a(i, j, k) = i + j + k
+164      end do
+    !$acc loop worker vector reduction(.or.:l) collapse(2)
+firstdo: do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+            if (a(i, j, k) .ne. (i + j + k)) l = .true.
+          end do
+        end do
+      end do firstdo
+    !$acc end parallel
+    if (l) call abort
+  end subroutine test1
+
+  subroutine test2
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    a = 0
+    !$acc parallel
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+      do 115 k=1,3
+         !$acc loop collapse(2)
+  dokk: do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc loop gang(static:1) collapse(1)
+      do k=1,3
+         if (any(a(k,1:3,1:3).ne.1)) call abort
+      enddo
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+ dol: do 120 l=1,3
+    !$acc loop collapse(2)
+  doll: do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc loop gang(static:1) collapse(1)
+     do l=1,3
+        if (any(a(l,1:3,1:3).ne.2)) call abort
+     enddo
+    !$acc end parallel
+  end subroutine test2
+
+end program collapse2
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
new file mode 100644
index 0000000..4e28196
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
@@ -0,0 +1,173 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse3
+  integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9
+  p1 = 2
+  p2 = 6
+  p3 = -2
+  p4 = 4
+  p5 = 13
+  p6 = 18
+  p7 = 1
+  p8 = 1
+  p9 = 1
+  call test1
+  call test2 (p1, p2, p3, p4, p5, p6)
+  call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9)
+  call test4
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+    end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = p1
+    v2 = p2
+    v3 = p3
+    v4 = p4
+    v5 = p5
+    v6 = p6
+    v7 = p7
+    v8 = p8
+    v9 = p9
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+end program collapse3
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
new file mode 100644
index 0000000..2f6485e
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
@@ -0,0 +1,244 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program sub_collapse_3
+  call test1
+  call test2 (2, 6, -2, 4, 13, 18)
+  call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+  call test4
+  call test5 (2, 6, -2, 4, 13, 18)
+  call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+  end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = 2
+    v2 = 6
+    v3 = -2
+    v4 = 4
+    v5 = 13
+    v6 = 18
+    v7 = 1
+    v8 = 1
+    v9 = 1
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+  subroutine test5(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test5
+
+  subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+            m = i * 100 + j * 10 + k
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test6
+
+end program sub_collapse_3

commit 507c9999e155b704d9a7244b1c9d467a95143b77
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri Jun 10 09:46:18 2016 +0000

    [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
    
    Backport trunk r237291:
    
    	gcc/
    	* gimplify.c (gimplify_adjust_omp_clauses): Discard
    	OMP_CLAUSE_TILE.
    	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
    	for OpenACC tile clauses.
    	* gfortran.dg/goacc/combined-directives.f90: Likewise.
    
    	gcc/
    	PR middle-end/71373
    	* tree-nested.c (convert_nonlocal_omp_clauses)
    	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
    	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
    	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	PR middle-end/71373
    	* gcc.dg/goacc/nested-function-1.c: New file.
    	* gcc.dg/goacc/nested-function-2.c: Likewise.
    	* gcc.dg/goacc/pr71373.c: Likewise.
    	* gfortran.dg/goacc/cray-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
    	* gfortran.dg/goacc/cray.f95: Update.
    	* gfortran.dg/goacc/loop-1.f95: Likewise.
    	* gfortran.dg/goacc/loop-3.f95: Likewise.
    	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
    	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
    	libgomp/testsuite/
    	PR middle-end/71373
    	* libgomp.oacc-c/nested-function-1.c: New file.
    	* libgomp.oacc-c/nested-function-2.c: Likewise.
    	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-6-branch@237296 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog                                      |  12 +
 gcc/gimplify.c                                     |   6 +
 gcc/omp-low.c                                      |   4 +-
 gcc/testsuite/ChangeLog                            |  22 ++
 .../c-c++-common/goacc/combined-directives.c       |   3 +-
 gcc/testsuite/gcc.dg/goacc/nested-function-1.c     | 100 +++++++++
 gcc/testsuite/gcc.dg/goacc/nested-function-2.c     |  45 ++++
 gcc/testsuite/gcc.dg/goacc/pr71373.c               |  41 ++++
 .../gfortran.dg/goacc/combined-directives.f90      |   3 +-
 .../gfortran.dg/goacc/{cray.f95 => cray-2.f95}     |  11 +-
 gcc/testsuite/gfortran.dg/goacc/cray.f95           |   6 +-
 .../gfortran.dg/goacc/{loop-1.f95 => loop-1-2.f95} |  36 +--
 gcc/testsuite/gfortran.dg/goacc/loop-1.f95         |  30 +--
 .../gfortran.dg/goacc/{loop-3.f95 => loop-3-2.f95} |  15 +-
 gcc/testsuite/gfortran.dg/goacc/loop-3.f95         |  11 +-
 .../gfortran.dg/goacc/nested-function-1.f90        |  93 ++++++++
 gcc/testsuite/gfortran.dg/goacc/subroutines.f90    |  73 ------
 gcc/tree-nested.c                                  |  32 +++
 libgomp/ChangeLog                                  |  11 +
 .../testsuite/libgomp.oacc-c/nested-function-1.c   |  52 +++++
 .../testsuite/libgomp.oacc-c/nested-function-2.c   | 155 +++++++++++++
 .../libgomp.oacc-fortran/nested-function-1.f90     |  70 ++++++
 .../libgomp.oacc-fortran/nested-function-2.f90     | 173 +++++++++++++++
 .../libgomp.oacc-fortran/nested-function-3.f90     | 244 +++++++++++++++++++++
 24 files changed, 1120 insertions(+), 128 deletions(-)

diff --git gcc/ChangeLog gcc/ChangeLog
index cbdcc42..bb9099d 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,5 +1,17 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	* tree-nested.c (convert_nonlocal_omp_clauses)
+	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
+	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
+	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
+
+	Backport from trunk r237291:
+	* gimplify.c (gimplify_adjust_omp_clauses): Discard
+	OMP_CLAUSE_TILE.
+	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
+
 	Backport from trunk r237290:
 	* omp-low.c (scan_sharing_clauses): Don't expect
 	OMP_CLAUSE__CACHE_.
diff --git gcc/gimplify.c gcc/gimplify.c
index 259c88b..846a75a 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -8224,7 +8224,13 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	case OMP_CLAUSE_VECTOR:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
+	  break;
+
 	case OMP_CLAUSE_TILE:
+	  /* We're not yet making use of the information provided by OpenACC
+	     tile clauses.  Discard these here, to simplify later middle end
+	     processing.  */
+	  remove = true;
 	  break;
 
 	default:
diff --git gcc/omp-low.c gcc/omp-low.c
index e570c22..5f1e6aa 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -2186,7 +2186,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2200,6 +2199,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	  break;
 
 	case OMP_CLAUSE_DEVICE_RESIDENT:
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
@@ -2357,7 +2357,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2365,6 +2364,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	  break;
 
 	case OMP_CLAUSE_DEVICE_RESIDENT:
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog
index 5e3f7e3..aa7c3a8 100644
--- gcc/testsuite/ChangeLog
+++ gcc/testsuite/ChangeLog
@@ -1,5 +1,27 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+		    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* gcc.dg/goacc/nested-function-1.c: New file.
+	* gcc.dg/goacc/nested-function-2.c: Likewise.
+	* gcc.dg/goacc/pr71373.c: Likewise.
+	* gfortran.dg/goacc/cray-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
+	* gfortran.dg/goacc/cray.f95: Update.
+	* gfortran.dg/goacc/loop-1.f95: Likewise.
+	* gfortran.dg/goacc/loop-3.f95: Likewise.
+	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
+	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
+
+	Backport from trunk r237291:
+	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
+	for OpenACC tile clauses.
+	* gfortran.dg/goacc/combined-directives.f90: Likewise.
+
 	PR c/71381
 	Backport from trunk r237290:
 	* c-c++-common/goacc/cache-1.c: Update.  Move invalid usage tests
diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c
index c2a3c57..3fa800d 100644
--- gcc/testsuite/c-c++-common/goacc/combined-directives.c
+++ gcc/testsuite/c-c++-common/goacc/combined-directives.c
@@ -111,6 +111,7 @@ test ()
 // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-1.c gcc/testsuite/gcc.dg/goacc/nested-function-1.c
new file mode 100644
index 0000000..e17c0e2
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-1.c
@@ -0,0 +1,100 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran
+   version.  */
+
+int main ()
+{
+#define N 100
+  int nonlocal_arg;
+  int nonlocal_a[N];
+  int nonlocal_i;
+  int nonlocal_j;
+
+  for (int i = 0; i < N; ++i)
+    nonlocal_a[i] = 5;
+  nonlocal_arg = 5;
+
+  void local ()
+  {
+    int local_i;
+    int local_arg;
+    int local_a[N];
+    int local_j;
+
+    for (int i = 0; i < N; ++i)
+      local_a[i] = 5;
+    local_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:local_arg) worker(local_arg) vector(local_arg) \
+  wait async(local_arg)
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:5])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:local_arg) worker(local_arg) vector(local_arg) \
+  wait(local_arg, local_arg + 1, local_arg + 2) async
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:4])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+  }
+
+  void nonlocal ()
+  {
+    for (int i = 0; i < N; ++i)
+      nonlocal_a[i] = 5;
+    nonlocal_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait async(nonlocal_arg)
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:3])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(2)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(3)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:2])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+  }
+
+  local ();
+  nonlocal ();
+
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-2.c gcc/testsuite/gcc.dg/goacc/nested-function-2.c
new file mode 100644
index 0000000..70c9ec8
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c
@@ -0,0 +1,45 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int j = 0, k = 6, l = 7, m = 8;
+  void simple (void)
+  {
+    int i;
+#pragma acc parallel
+    {
+#pragma acc loop
+      for (i = 0; i < m; i+= k)
+	j = (m + i - j) * l;
+    }
+  }
+  void collapse (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  void reduction (void)
+  {
+    int x, y, z;
+#pragma acc parallel reduction (+:j)
+    {
+#pragma acc loop reduction (+:j) collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  simple();
+  collapse();
+  reduction();
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/pr71373.c gcc/testsuite/gcc.dg/goacc/pr71373.c
new file mode 100644
index 0000000..9381752
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/pr71373.c
@@ -0,0 +1,41 @@
+/* Unintentional nested function usage.  */
+/* Due to missing right braces '}', the following functions are parsed as
+   nested functions.  This ran into an ICE.  */
+
+void foo (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  // Note right brace '}' commented out here.
+  //}
+}
+void bar (void)
+{
+}
+
+// Adding right brace '}' here, to make this compile.
+}
+
+
+// ..., and the other way round:
+
+void BAR (void)
+{
+// Note right brace '}' commented out here.
+//}
+
+void FOO (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  }
+}
+
+// Adding right brace '}' here, to make this compile.
+}
diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
index 42a447a..abb5e6b 100644
--- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
+++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
@@ -143,7 +143,8 @@ end subroutine test
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
+! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
 ! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray-2.f95
similarity index 91%
copy from gcc/testsuite/gfortran.dg/goacc/cray.f95
copy to gcc/testsuite/gfortran.dg/goacc/cray-2.f95
index 705c18c..51b79b5 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray-2.f95
@@ -1,15 +1,16 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray.f95.
 
-module test
+program test
+  call oacc1
 contains
   subroutine oacc1
     implicit none
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
@@ -52,4 +53,4 @@ contains
     !$acc update host (ptr)
     !$acc update self (ptr)
   end subroutine oacc1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray.f95
index 705c18c..d6d5317 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray.f95
@@ -1,5 +1,5 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray-2.f95.
 
 module test
 contains
@@ -8,8 +8,8 @@ contains
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
similarity index 89%
copy from gcc/testsuite/gfortran.dg/goacc/loop-1.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
index a605f03..79665b9 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
@@ -1,8 +1,10 @@
-module test
-  implicit none
+! See also loop-1.f95.
+
+program test
+  call test1
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
@@ -171,4 +173,4 @@ subroutine test1
   enddo
 
 end subroutine test1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1.f95
index a605f03..5f81b7a 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95
@@ -1,8 +1,10 @@
+! See also loop-1-2.f95.
+
 module test
   implicit none
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
similarity index 90%
copy from gcc/testsuite/gfortran.dg/goacc/loop-3.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
index 2a866c7..9be74a8 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
@@ -1,10 +1,13 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3.f95.
 
+program test
+  call test1
+contains
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +26,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +38,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +53,6 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3.f95
index 2a866c7..30930f4 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3.f95
@@ -1,10 +1,10 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3-2.f95.
 
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +23,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +35,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +50,5 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
diff --git gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
new file mode 100644
index 0000000..2fcaa40
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
@@ -0,0 +1,93 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version.
+
+program main
+  integer, parameter :: N = 100
+  integer :: nonlocal_arg
+  integer :: nonlocal_a(N)
+  integer :: nonlocal_i
+  integer :: nonlocal_j
+
+  nonlocal_a (:) = 5
+  nonlocal_arg = 5
+
+  call local ()
+  call nonlocal ()
+
+contains
+
+  subroutine local ()
+    integer :: local_i
+    integer :: local_arg
+    integer :: local_a(N)
+    integer :: local_j
+
+    local_a (:) = 5
+    local_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait async(local_arg)
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 5))
+       local_a(local_i) = 100
+       !$acc loop seq tile(*)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(1)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 4))
+       local_a(local_i) = 100
+       !$acc loop seq tile(1)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine local
+
+  subroutine nonlocal ()
+    nonlocal_a (:) = 5
+    nonlocal_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait async(nonlocal_arg)
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(2)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(3)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(*)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine nonlocal
+end program main
diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90
deleted file mode 100644
index 6cab798..0000000
--- gcc/testsuite/gfortran.dg/goacc/subroutines.f90
+++ /dev/null
@@ -1,73 +0,0 @@
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile } 
-
-program main
-  integer, parameter :: N = 100
-  integer :: nonlocal_arg
-  integer :: nonlocal_a(N)
-  integer :: nonlocal_i
-  integer :: nonlocal_j
-  
-  nonlocal_a (:) = 5
-  nonlocal_arg = 5
-  
-  call local ()
-  call nonlocal ()
-
-contains
-
-  subroutine local ()
-    integer :: local_i
-    integer :: local_arg
-    integer :: local_a(N)
-    integer :: local_j
-    
-    local_a (:) = 5
-    local_arg = 5
-
-    !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:local_arg) worker(local_arg) &
-    !$acc vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine local
-
-  subroutine nonlocal ()
-    nonlocal_a (:) = 5
-    nonlocal_arg = 5
-  
-    !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine nonlocal
-end program main
diff --git gcc/tree-nested.c gcc/tree-nested.c
index 8563687..327f663 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1197,8 +1199,22 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_DEVICE_RESIDENT:
 	default:
 	  gcc_unreachable ();
 	}
@@ -1790,6 +1806,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1878,8 +1896,22 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_DEVICE_RESIDENT:
 	default:
 	  gcc_unreachable ();
 	}
diff --git libgomp/ChangeLog libgomp/ChangeLog
index f6da5c4..620ddb4 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,5 +1,16 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+		    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* libgomp.oacc-c/nested-function-1.c: New file.
+	* libgomp.oacc-c/nested-function-2.c: Likewise.
+	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
+
 	PR c/71381
 	Backport from trunk r237290:
 	* testsuite/libgomp.oacc-c-c++-common/cache-1.c: #include
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-1.c libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
new file mode 100644
index 0000000..fb2a3ac
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
@@ -0,0 +1,52 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][7][8];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(4 - 1)
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  a[i][j][k] = i + j + k;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  if (a[i][j][k] != i + j + k)
+	    __builtin_abort();
+  }
+
+  void test2 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-2.c libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
new file mode 100644
index 0000000..2c3f3fe
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
@@ -0,0 +1,155 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1;
+
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, '\0', sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort();
+  }
+
+  void test2 (int v1, int v2, int v3, int v4, int v5, int v6)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8,
+      int v9)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test4 ()
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+    int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8,
+      v9 = p9;
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 (p1, p2, p3, p4, p5, p6);
+  test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9);
+  test4 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
new file mode 100644
index 0000000..fdbca44
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
@@ -0,0 +1,70 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse2
+  call test1
+  call test2
+contains
+  subroutine test1
+    integer :: i, j, k, a(1:3, 4:6, 5:7)
+    logical :: l
+    l = .false.
+    a(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop worker vector collapse(4 - 1)
+      do 164 i = 1, 3
+        do 164 j = 4, 6
+          do 164 k = 5, 7
+            a(i, j, k) = i + j + k
+164      end do
+    !$acc loop worker vector reduction(.or.:l) collapse(2)
+firstdo: do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+            if (a(i, j, k) .ne. (i + j + k)) l = .true.
+          end do
+        end do
+      end do firstdo
+    !$acc end parallel
+    if (l) call abort
+  end subroutine test1
+
+  subroutine test2
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    a = 0
+    !$acc parallel
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+      do 115 k=1,3
+         !$acc loop collapse(2)
+  dokk: do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc loop gang(static:1) collapse(1)
+      do k=1,3
+         if (any(a(k,1:3,1:3).ne.1)) call abort
+      enddo
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+ dol: do 120 l=1,3
+    !$acc loop collapse(2)
+  doll: do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc loop gang(static:1) collapse(1)
+     do l=1,3
+        if (any(a(l,1:3,1:3).ne.2)) call abort
+     enddo
+    !$acc end parallel
+  end subroutine test2
+
+end program collapse2
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
new file mode 100644
index 0000000..4e28196
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
@@ -0,0 +1,173 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse3
+  integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9
+  p1 = 2
+  p2 = 6
+  p3 = -2
+  p4 = 4
+  p5 = 13
+  p6 = 18
+  p7 = 1
+  p8 = 1
+  p9 = 1
+  call test1
+  call test2 (p1, p2, p3, p4, p5, p6)
+  call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9)
+  call test4
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+    end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = p1
+    v2 = p2
+    v3 = p3
+    v4 = p4
+    v5 = p5
+    v6 = p6
+    v7 = p7
+    v8 = p8
+    v9 = p9
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+end program collapse3
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
new file mode 100644
index 0000000..2f6485e
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
@@ -0,0 +1,244 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program sub_collapse_3
+  call test1
+  call test2 (2, 6, -2, 4, 13, 18)
+  call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+  call test4
+  call test5 (2, 6, -2, 4, 13, 18)
+  call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+  end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = 2
+    v2 = 6
+    v3 = -2
+    v4 = 4
+    v5 = 13
+    v6 = 18
+    v7 = 1
+    v8 = 1
+    v9 = 1
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+  subroutine test5(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test5
+
+  subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+            m = i * 100 + j * 10 + k
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test6
+
+end program sub_collapse_3

commit ce7fd1fc5d97d8c73c72a45902da983d5793c374
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Fri Jun 10 10:12:36 2016 +0000

    [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition
    
    Backport trunk r237291:
    
    	gcc/
    	* gimplify.c (gimplify_adjust_omp_clauses): Discard
    	OMP_CLAUSE_TILE.
    	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
    	for OpenACC tile clauses.
    	* gfortran.dg/goacc/combined-directives.f90: Likewise.
    
    	gcc/
    	PR middle-end/71373
    	* tree-nested.c (convert_nonlocal_omp_clauses)
    	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
    	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
    	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
    	gcc/testsuite/
    	PR middle-end/71373
    	* gcc.dg/goacc/nested-function-1.c: New file.
    	* gcc.dg/goacc/nested-function-2.c: Likewise.
    	* gcc.dg/goacc/pr71373.c: Likewise.
    	* gfortran.dg/goacc/cray-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
    	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
    	* gfortran.dg/goacc/cray.f95: Update.
    	* gfortran.dg/goacc/loop-1.f95: Likewise.
    	* gfortran.dg/goacc/loop-3.f95: Likewise.
    	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
    	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
    	libgomp/testsuite/
    	PR middle-end/71373
    	* libgomp.oacc-c/nested-function-1.c: New file.
    	* libgomp.oacc-c/nested-function-2.c: Likewise.
    	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
    	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@237300 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp                                 |  12 +
 gcc/gimplify.c                                     |   8 +-
 gcc/omp-low.c                                      |   4 +-
 gcc/testsuite/ChangeLog.gomp                       |  22 ++
 .../c-c++-common/goacc/combined-directives.c       |   3 +-
 gcc/testsuite/gcc.dg/goacc/nested-function-1.c     | 100 +++++++++
 gcc/testsuite/gcc.dg/goacc/nested-function-2.c     |  45 ++++
 gcc/testsuite/gcc.dg/goacc/pr71373.c               |  41 ++++
 .../gfortran.dg/goacc/combined-directives.f90      |   1 +
 .../gfortran.dg/goacc/{cray.f95 => cray-2.f95}     |  11 +-
 gcc/testsuite/gfortran.dg/goacc/cray.f95           |   6 +-
 .../gfortran.dg/goacc/{loop-1.f95 => loop-1-2.f95} |  36 +--
 gcc/testsuite/gfortran.dg/goacc/loop-1.f95         |  30 +--
 .../gfortran.dg/goacc/{loop-3.f95 => loop-3-2.f95} |  15 +-
 gcc/testsuite/gfortran.dg/goacc/loop-3.f95         |  11 +-
 .../gfortran.dg/goacc/nested-function-1.f90        |  93 ++++++++
 gcc/testsuite/gfortran.dg/goacc/subroutines.f90    |  73 ------
 gcc/tree-nested.c                                  |  40 ++++
 libgomp/ChangeLog.gomp                             |  11 +
 .../testsuite/libgomp.oacc-c/nested-function-1.c   |  52 +++++
 .../testsuite/libgomp.oacc-c/nested-function-2.c   | 155 +++++++++++++
 .../libgomp.oacc-fortran/nested-function-1.f90     |  70 ++++++
 .../libgomp.oacc-fortran/nested-function-2.f90     | 173 +++++++++++++++
 .../libgomp.oacc-fortran/nested-function-3.f90     | 244 +++++++++++++++++++++
 24 files changed, 1128 insertions(+), 128 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index 4477abf..b68538d 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,5 +1,17 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	* tree-nested.c (convert_nonlocal_omp_clauses)
+	(convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
+	OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
+	OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
+
+	Backport from trunk r237291:
+	* gimplify.c (gimplify_adjust_omp_clauses): Discard
+	OMP_CLAUSE_TILE.
+	* omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
+
 	Backport from trunk r237290:
 	* omp-low.c (scan_sharing_clauses): Don't expect
 	OMP_CLAUSE__CACHE_.
diff --git gcc/gimplify.c gcc/gimplify.c
index 37971c7..717b25f 100644
--- gcc/gimplify.c
+++ gcc/gimplify.c
@@ -8265,10 +8265,16 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	case OMP_CLAUSE_VECTOR:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_DEVICE_TYPE:
 	  break;
 
+	case OMP_CLAUSE_TILE:
+	  /* We're not yet making use of the information provided by OpenACC
+	     tile clauses.  Discard these here, to simplify later middle end
+	     processing.  */
+	  remove = true;
+	  break;
+
 	case OMP_CLAUSE_BIND:
 	case OMP_CLAUSE_NOHOST:
 	default:
diff --git gcc/omp-low.c gcc/omp-low.c
index 40ac8c8..fedb195 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -2218,7 +2218,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2235,6 +2234,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_BIND:
 	case OMP_CLAUSE_DEVICE_RESIDENT:
 	case OMP_CLAUSE_NOHOST:
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
@@ -2392,7 +2392,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
-	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE_INDEPENDENT:
 	case OMP_CLAUSE_AUTO:
 	case OMP_CLAUSE_SEQ:
@@ -2403,6 +2402,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
 	case OMP_CLAUSE_BIND:
 	case OMP_CLAUSE_DEVICE_RESIDENT:
 	case OMP_CLAUSE_NOHOST:
+	case OMP_CLAUSE_TILE:
 	case OMP_CLAUSE__CACHE_:
 	default:
 	  gcc_unreachable ();
diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index eef9425..031774d 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,5 +1,27 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+		    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* gcc.dg/goacc/nested-function-1.c: New file.
+	* gcc.dg/goacc/nested-function-2.c: Likewise.
+	* gcc.dg/goacc/pr71373.c: Likewise.
+	* gfortran.dg/goacc/cray-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-1-2.f95: Likewise.
+	* gfortran.dg/goacc/loop-3-2.f95: Likewise.
+	* gfortran.dg/goacc/cray.f95: Update.
+	* gfortran.dg/goacc/loop-1.f95: Likewise.
+	* gfortran.dg/goacc/loop-3.f95: Likewise.
+	* gfortran.dg/goacc/subroutines.f90: Update, and rename to...
+	* gfortran.dg/goacc/nested-function-1.f90: ... this new file.
+
+	Backport from trunk r237291:
+	* c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
+	for OpenACC tile clauses.
+	* gfortran.dg/goacc/combined-directives.f90: Likewise.
+
 	PR c/71381
 	Backport from trunk r237290:
 	* c-c++-common/goacc/cache-1.c: Update.  Move invalid usage tests
diff --git gcc/testsuite/c-c++-common/goacc/combined-directives.c gcc/testsuite/c-c++-common/goacc/combined-directives.c
index 2ef5b53..2342c9f 100644
--- gcc/testsuite/c-c++-common/goacc/combined-directives.c
+++ gcc/testsuite/c-c++-common/goacc/combined-directives.c
@@ -114,6 +114,7 @@ test ()
 // { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 // { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
 // { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-1.c gcc/testsuite/gcc.dg/goacc/nested-function-1.c
new file mode 100644
index 0000000..e17c0e2
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-1.c
@@ -0,0 +1,100 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran
+   version.  */
+
+int main ()
+{
+#define N 100
+  int nonlocal_arg;
+  int nonlocal_a[N];
+  int nonlocal_i;
+  int nonlocal_j;
+
+  for (int i = 0; i < N; ++i)
+    nonlocal_a[i] = 5;
+  nonlocal_arg = 5;
+
+  void local ()
+  {
+    int local_i;
+    int local_arg;
+    int local_a[N];
+    int local_j;
+
+    for (int i = 0; i < N; ++i)
+      local_a[i] = 5;
+    local_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:local_arg) worker(local_arg) vector(local_arg) \
+  wait async(local_arg)
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:5])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:local_arg) worker(local_arg) vector(local_arg) \
+  wait(local_arg, local_arg + 1, local_arg + 2) async
+    for (local_i = 0; local_i < N; ++local_i)
+      {
+#pragma acc cache (local_a[local_i:4])
+	local_a[local_i] = 100;
+#pragma acc loop seq tile(1)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (local_j = 0; local_j < N; ++local_j)
+	  ;
+      }
+  }
+
+  void nonlocal ()
+  {
+    for (int i = 0; i < N; ++i)
+      nonlocal_a[i] = 5;
+    nonlocal_arg = 5;
+
+#pragma acc kernels loop \
+  gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait async(nonlocal_arg)
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:3])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(2)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(3)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+
+#pragma acc kernels loop \
+  gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+  wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+      {
+#pragma acc cache (nonlocal_a[nonlocal_i:2])
+	nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+#pragma acc loop auto independent tile(*)
+	for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+	  ;
+      }
+  }
+
+  local ();
+  nonlocal ();
+
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/nested-function-2.c gcc/testsuite/gcc.dg/goacc/nested-function-2.c
new file mode 100644
index 0000000..70c9ec8
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/nested-function-2.c
@@ -0,0 +1,45 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int j = 0, k = 6, l = 7, m = 8;
+  void simple (void)
+  {
+    int i;
+#pragma acc parallel
+    {
+#pragma acc loop
+      for (i = 0; i < m; i+= k)
+	j = (m + i - j) * l;
+    }
+  }
+  void collapse (void)
+  {
+    int x, y, z;
+#pragma acc parallel
+    {
+#pragma acc loop collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  void reduction (void)
+  {
+    int x, y, z;
+#pragma acc parallel reduction (+:j)
+    {
+#pragma acc loop reduction (+:j) collapse (3)
+      for (x = 0; x < k; x++)
+	for (y = -5; y < l; y++)
+	  for (z = 0; z < m; z++)
+	    j += x + y + z;
+    }
+  }
+  simple();
+  collapse();
+  reduction();
+  return 0;
+}
diff --git gcc/testsuite/gcc.dg/goacc/pr71373.c gcc/testsuite/gcc.dg/goacc/pr71373.c
new file mode 100644
index 0000000..9381752
--- /dev/null
+++ gcc/testsuite/gcc.dg/goacc/pr71373.c
@@ -0,0 +1,41 @@
+/* Unintentional nested function usage.  */
+/* Due to missing right braces '}', the following functions are parsed as
+   nested functions.  This ran into an ICE.  */
+
+void foo (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  // Note right brace '}' commented out here.
+  //}
+}
+void bar (void)
+{
+}
+
+// Adding right brace '}' here, to make this compile.
+}
+
+
+// ..., and the other way round:
+
+void BAR (void)
+{
+// Note right brace '}' commented out here.
+//}
+
+void FOO (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop independent
+    for (int i = 0; i < 16; i++)
+      ;
+  }
+}
+
+// Adding right brace '}' here, to make this compile.
+}
diff --git gcc/testsuite/gfortran.dg/goacc/combined-directives.f90 gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
index 00b8822..ffa9371 100644
--- gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
+++ gcc/testsuite/gfortran.dg/goacc/combined-directives.f90
@@ -162,6 +162,7 @@ end subroutine test
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" { xfail *-*-* } } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
 ! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray-2.f95
similarity index 91%
copy from gcc/testsuite/gfortran.dg/goacc/cray.f95
copy to gcc/testsuite/gfortran.dg/goacc/cray-2.f95
index 705c18c..51b79b5 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray-2.f95
@@ -1,15 +1,16 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray.f95.
 
-module test
+program test
+  call oacc1
 contains
   subroutine oacc1
     implicit none
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
@@ -52,4 +53,4 @@ contains
     !$acc update host (ptr)
     !$acc update self (ptr)
   end subroutine oacc1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/cray.f95 gcc/testsuite/gfortran.dg/goacc/cray.f95
index 705c18c..d6d5317 100644
--- gcc/testsuite/gfortran.dg/goacc/cray.f95
+++ gcc/testsuite/gfortran.dg/goacc/cray.f95
@@ -1,5 +1,5 @@
-! { dg-do compile } 
 ! { dg-additional-options "-fcray-pointer" }
+! See also cray-2.f95.
 
 module test
 contains
@@ -8,8 +8,8 @@ contains
     integer :: i
     real :: pointee
     pointer (ptr, pointee)
-    !$acc declare device_resident (pointee) 
-    !$acc declare device_resident (ptr) 
+    !$acc declare device_resident (pointee)
+    !$acc declare device_resident (ptr)
     !$acc data copy (pointee) ! { dg-error "Cray pointee" }
     !$acc end data
     !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
similarity index 89%
copy from gcc/testsuite/gfortran.dg/goacc/loop-1.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
index a605f03..79665b9 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1-2.f95
@@ -1,8 +1,10 @@
-module test
-  implicit none
+! See also loop-1.f95.
+
+program test
+  call test1
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
@@ -171,4 +173,4 @@ subroutine test1
   enddo
 
 end subroutine test1
-end module test
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-1.f95 gcc/testsuite/gfortran.dg/goacc/loop-1.f95
index a605f03..5f81b7a 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-1.f95
@@ -1,8 +1,10 @@
+! See also loop-1-2.f95.
+
 module test
   implicit none
 contains
 
-subroutine test1  
+subroutine test1
   integer :: i, j, k, b(10)
   integer, dimension (30) :: a
   double precision :: d
@@ -30,15 +32,15 @@ subroutine test1
   do 300 d = 1, 30, 6
       i = d
   300 a(i) = 1
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
   !$acc loop
   do d = 1, 30, 5
        i = d
       a(i) = 2
   end do
-  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
-  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+  ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+  ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
   !$acc loop
   do i = 1, 30
       if (i .eq. 16) exit ! { dg-error "EXIT statement" }
@@ -53,7 +55,7 @@ subroutine test1
    end do last
 
   ! different types of loop are allowed
-  !$acc loop 
+  !$acc loop
   do i = 1,10
   end do
   !$acc loop
@@ -65,8 +67,8 @@ subroutine test1
   a(1) = 1 ! { dg-error "Expected DO loop" }
   do i = 1,10
   enddo
-  
-  ! combined directives may be used with/without end 
+
+  ! combined directives may be used with/without end
   !$acc parallel loop
   do i = 1,10
   enddo
@@ -82,11 +84,11 @@ subroutine test1
   enddo
   !$acc end kernels loop
 
-  !$acc kernels loop reduction(max:i) 
+  !$acc kernels loop reduction(max:i)
   do i = 1,10
   enddo
-  !$acc kernels 
-  !$acc loop reduction(max:i) 
+  !$acc kernels
+  !$acc loop reduction(max:i)
   do i = 1,10
   enddo
   !$acc end kernels
@@ -118,7 +120,7 @@ subroutine test1
     end do
     !$acc parallel loop collapse(2)
     do i = 1, 3
-        do j = 4, 6  
+        do j = 4, 6
         end do
     end do
     !$acc parallel loop collapse(2)
@@ -148,8 +150,8 @@ subroutine test1
     do i = 1, 3
         do r = 4, 6
         end do
-        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
-        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+        ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+        ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
     end do
 
     ! Both seq and independent are not allowed
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
similarity index 90%
copy from gcc/testsuite/gfortran.dg/goacc/loop-3.f95
copy to gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
index 2a866c7..9be74a8 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3-2.f95
@@ -1,10 +1,13 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3.f95.
 
+program test
+  call test1
+contains
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +26,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +38,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +53,6 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
+end program test
diff --git gcc/testsuite/gfortran.dg/goacc/loop-3.f95 gcc/testsuite/gfortran.dg/goacc/loop-3.f95
index 2a866c7..30930f4 100644
--- gcc/testsuite/gfortran.dg/goacc/loop-3.f95
+++ gcc/testsuite/gfortran.dg/goacc/loop-3.f95
@@ -1,10 +1,10 @@
-! { dg-do compile }
 ! { dg-additional-options "-std=f2008" }
+! See also loop-3-2.f95.
 
 subroutine test1
   implicit none
   integer :: i, j
-  
+
   ! !$acc end loop not required by spec
   !$acc loop
   do i = 1,5
@@ -23,7 +23,7 @@ subroutine test1
   enddo
   !$acc end parallel
   !$acc end loop ! { dg-error "Unexpected" }
-  
+
   ! OpenACC supports Fortran 2008 do concurrent statement
   !$acc loop
   do concurrent (i = 1:5)
@@ -35,7 +35,7 @@ subroutine test1
       if (i .eq. j) cycle outer_loop
       if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
     end do inner_loop
-  end do outer_loop 
+  end do outer_loop
 
   outer_loop1: do i = 1, 5
     !$acc loop
@@ -50,6 +50,5 @@ subroutine test1
       if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
       if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
     end do inner_loop2
-  end do outer_loop2 
+  end do outer_loop2
 end subroutine test1
-
diff --git gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
new file mode 100644
index 0000000..2fcaa40
--- /dev/null
+++ gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90
@@ -0,0 +1,93 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version.
+
+program main
+  integer, parameter :: N = 100
+  integer :: nonlocal_arg
+  integer :: nonlocal_a(N)
+  integer :: nonlocal_i
+  integer :: nonlocal_j
+
+  nonlocal_a (:) = 5
+  nonlocal_arg = 5
+
+  call local ()
+  call nonlocal ()
+
+contains
+
+  subroutine local ()
+    integer :: local_i
+    integer :: local_arg
+    integer :: local_a(N)
+    integer :: local_j
+
+    local_a (:) = 5
+    local_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait async(local_arg)
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 5))
+       local_a(local_i) = 100
+       !$acc loop seq tile(*)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(1)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+    !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
+    do local_i = 1, N
+       !$acc cache (local_a(local_i:local_i + 4))
+       local_a(local_i) = 100
+       !$acc loop seq tile(1)
+       do local_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do local_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine local
+
+  subroutine nonlocal ()
+    nonlocal_a (:) = 5
+    nonlocal_arg = 5
+
+    !$acc kernels loop &
+    !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait async(nonlocal_arg)
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(2)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(3)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+
+    !$acc kernels loop &
+    !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+    !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+    do nonlocal_i = 1, N
+       !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
+       nonlocal_a(nonlocal_i) = 100
+       !$acc loop seq tile(*)
+       do nonlocal_j = 1, N
+       enddo
+       !$acc loop auto independent tile(*)
+       do nonlocal_j = 1, N
+       enddo
+    enddo
+    !$acc end kernels loop
+  end subroutine nonlocal
+end program main
diff --git gcc/testsuite/gfortran.dg/goacc/subroutines.f90 gcc/testsuite/gfortran.dg/goacc/subroutines.f90
deleted file mode 100644
index 6cab798..0000000
--- gcc/testsuite/gfortran.dg/goacc/subroutines.f90
+++ /dev/null
@@ -1,73 +0,0 @@
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile } 
-
-program main
-  integer, parameter :: N = 100
-  integer :: nonlocal_arg
-  integer :: nonlocal_a(N)
-  integer :: nonlocal_i
-  integer :: nonlocal_j
-  
-  nonlocal_a (:) = 5
-  nonlocal_arg = 5
-  
-  call local ()
-  call nonlocal ()
-
-contains
-
-  subroutine local ()
-    integer :: local_i
-    integer :: local_arg
-    integer :: local_a(N)
-    integer :: local_j
-    
-    local_a (:) = 5
-    local_arg = 5
-
-    !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:local_arg) worker(local_arg) &
-    !$acc vector(local_arg)
-    do local_i = 1, N
-       local_a(local_i) = 100
-       !$acc loop seq
-       do local_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine local
-
-  subroutine nonlocal ()
-    nonlocal_a (:) = 5
-    nonlocal_arg = 5
-  
-    !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-
-    !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
-    !$acc vector(nonlocal_arg)
-    do nonlocal_i = 1, N
-       nonlocal_a(nonlocal_i) = 100
-       !$acc loop seq
-       do nonlocal_j = 1, N
-       enddo
-    enddo
-    !$acc end kernels loop
-  end subroutine nonlocal
-end program main
diff --git gcc/tree-nested.c gcc/tree-nested.c
index 1433c3e..984fa81 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1114,6 +1114,8 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1197,9 +1199,27 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_DEVICE_TYPE:
+	  /* TODO.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
 	case OMP_CLAUSE_BIND:
+	case OMP_CLAUSE_DEVICE_RESIDENT:
 	case OMP_CLAUSE_NOHOST:
 	default:
 	  gcc_unreachable ();
@@ -1792,6 +1812,8 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_GANG:
 	case OMP_CLAUSE_WORKER:
 	case OMP_CLAUSE_VECTOR:
+	case OMP_CLAUSE_ASYNC:
+	case OMP_CLAUSE_WAIT:
 	  /* Several OpenACC clauses have optional arguments.  Check if they
 	     are present.  */
 	  if (OMP_CLAUSE_OPERAND (clause, 0))
@@ -1880,9 +1902,27 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_SIMD:
 	case OMP_CLAUSE_DEFAULTMAP:
 	case OMP_CLAUSE_SEQ:
+	case OMP_CLAUSE_INDEPENDENT:
+	case OMP_CLAUSE_AUTO:
 	  break;
 
+	case OMP_CLAUSE_DEVICE_TYPE:
+	  /* TODO.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE_TILE:
+	  /* OpenACC tile clauses are discarded during gimplification, so we
+	     don't expect to see anything here.  */
+	  gcc_unreachable ();
+
+	case OMP_CLAUSE__CACHE_:
+	  /* These clauses belong to the OpenACC cache directive, which is
+	     discarded during gimplification, so we don't expect to see
+	     anything here.  */
+	  gcc_unreachable ();
+
 	case OMP_CLAUSE_BIND:
+	case OMP_CLAUSE_DEVICE_RESIDENT:
 	case OMP_CLAUSE_NOHOST:
 	default:
 	  gcc_unreachable ();
diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 17c26c5..af4e0d5 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,5 +1,16 @@
 2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	Backport from trunk r237291:
+	2016-06-10  Thomas Schwinge  <thomas@codesourcery.com>
+		    Cesar Philippidis  <cesar@codesourcery.com>
+
+	* libgomp.oacc-c/nested-function-1.c: New file.
+	* libgomp.oacc-c/nested-function-2.c: Likewise.
+	* libgomp.oacc-fortran/nested-function-1.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-2.f90: Likewise.
+	* libgomp.oacc-fortran/nested-function-3.f90: Likewise.
+
 	PR c/71381
 	* testsuite/libgomp.oacc-fortran/cache-1.f90: Remove file.
 
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-1.c libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
new file mode 100644
index 0000000..fb2a3ac
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-1.c
@@ -0,0 +1,52 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][7][8];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(4 - 1)
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  a[i][j][k] = i + j + k;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 4; j <= 6; j++)
+	for (k = 5; k <= 7; k++)
+	  if (a[i][j][k] != i + j + k)
+	    __builtin_abort();
+  }
+
+  void test2 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 1; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-c/nested-function-2.c libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
new file mode 100644
index 0000000..2c3f3fe
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c/nested-function-2.c
@@ -0,0 +1,155 @@
+/* Exercise nested function decomposition, gcc/tree-nested.c.  */
+
+int
+main (void)
+{
+  int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1;
+
+  void test1 ()
+  {
+    int i, j, k;
+    int a[4][4][4];
+
+    __builtin_memset (a, '\0', sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  a[i][j][k] = 1;
+
+    for (i = 1; i <= 3; i++)
+      for (j = 1; j <= 3; j++)
+	for (k = 2; k <= 3; k++)
+	  if (a[i][j][k] != 1)
+	    __builtin_abort();
+  }
+
+  void test2 (int v1, int v2, int v3, int v4, int v5, int v6)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8,
+      int v9)
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  void test4 ()
+  {
+    int i, j, k, l = 0, r = 0;
+    int a[7][5][19];
+    int b[7][5][19];
+    int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8,
+      v9 = p9;
+
+    __builtin_memset (a, '\0', sizeof (a));
+    __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!l)
+	      a[i][j][k] += 1;
+	  }
+
+    for (i = v1; i <= v2; i += v7)
+      for (j = v3; j <= v4; j += v8)
+	for (k = v5; k <= v6; k += v9)
+	  {
+	    r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+	    if (!r)
+	      b[i][j][k] += 1;
+	  }
+
+    if (l != r)
+      __builtin_abort ();
+
+    for (i = v1; i <= v2; i++)
+      for (j = v3; j <= v4; j++)
+	for (k = v5; k <= v6; k++)
+	  if (b[i][j][k] != a[i][j][k])
+	    __builtin_abort ();
+  }
+
+  test1 ();
+  test2 (p1, p2, p3, p4, p5, p6);
+  test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9);
+  test4 ();
+
+  return 0;
+}
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
new file mode 100644
index 0000000..fdbca44
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-1.f90
@@ -0,0 +1,70 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse2
+  call test1
+  call test2
+contains
+  subroutine test1
+    integer :: i, j, k, a(1:3, 4:6, 5:7)
+    logical :: l
+    l = .false.
+    a(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop worker vector collapse(4 - 1)
+      do 164 i = 1, 3
+        do 164 j = 4, 6
+          do 164 k = 5, 7
+            a(i, j, k) = i + j + k
+164      end do
+    !$acc loop worker vector reduction(.or.:l) collapse(2)
+firstdo: do i = 1, 3
+        do j = 4, 6
+          do k = 5, 7
+            if (a(i, j, k) .ne. (i + j + k)) l = .true.
+          end do
+        end do
+      end do firstdo
+    !$acc end parallel
+    if (l) call abort
+  end subroutine test1
+
+  subroutine test2
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    a = 0
+    !$acc parallel
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+      do 115 k=1,3
+         !$acc loop collapse(2)
+  dokk: do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc loop gang(static:1) collapse(1)
+      do k=1,3
+         if (any(a(k,1:3,1:3).ne.1)) call abort
+      enddo
+    ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+    ! execution mode into something like gang-single.
+    !$acc loop gang(static:1) collapse(1)
+ dol: do 120 l=1,3
+    !$acc loop collapse(2)
+  doll: do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc loop gang(static:1) collapse(1)
+     do l=1,3
+        if (any(a(l,1:3,1:3).ne.2)) call abort
+     enddo
+    !$acc end parallel
+  end subroutine test2
+
+end program collapse2
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
new file mode 100644
index 0000000..4e28196
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-2.f90
@@ -0,0 +1,173 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse3
+  integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9
+  p1 = 2
+  p2 = 6
+  p3 = -2
+  p4 = 4
+  p5 = 13
+  p6 = 18
+  p7 = 1
+  p8 = 1
+  p9 = 1
+  call test1
+  call test2 (p1, p2, p3, p4, p5, p6)
+  call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9)
+  call test4
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+    end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = p1
+    v2 = p2
+    v3 = p3
+    v4 = p4
+    v5 = p5
+    v6 = p6
+    v7 = p7
+    v8 = p8
+    v9 = p9
+    !$acc parallel reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+end program collapse3
diff --git libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90 libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
new file mode 100644
index 0000000..2f6485e
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-fortran/nested-function-3.f90
@@ -0,0 +1,244 @@
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program sub_collapse_3
+  call test1
+  call test2 (2, 6, -2, 4, 13, 18)
+  call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+  call test4
+  call test5 (2, 6, -2, 4, 13, 18)
+  call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+contains
+  subroutine test1
+    integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+    !$acc parallel
+    !$acc loop collapse(3)
+      do 115 k=1,3
+dokk:   do kk=1,3
+          do kkk=1,3
+            a(k,kk,kkk) = 1
+          enddo
+        enddo dokk
+115   continue
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.1)) call abort
+    !$acc parallel
+    !$acc loop collapse(3)
+dol:  do 120 l=1,3
+doll:   do ll=1,3
+          do lll=1,3
+            a(l,ll,lll) = 2
+          enddo
+        enddo doll
+120   end do dol
+    !$acc end parallel
+    if (any(a(1:3,1:3,1:3).ne.2)) call abort
+  end subroutine test1
+
+  subroutine test2(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test2
+
+  subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.l) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test3
+
+  subroutine test4
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    v1 = 2
+    v2 = 6
+    v3 = -2
+    v4 = 4
+    v5 = 13
+    v6 = 18
+    v7 = 1
+    v8 = 1
+    v9 = 1
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+         do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+         end do
+      end do
+    end do
+  end subroutine test4
+
+  subroutine test5(v1, v2, v3, v4, v5, v6)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2
+        do j = v3, v4
+          do k = v5, v6
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2
+      do j = v3, v4
+        do k = v5, v6
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test5
+
+  subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+    integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+    integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+    logical :: l, r
+    l = .false.
+    r = .false.
+    a(:, :, :) = 0
+    b(:, :, :) = 0
+    !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+    !$acc loop reduction (.or.:l) collapse (3)
+      do i = v1, v2, v7
+        do j = v3, v4, v8
+          do k = v5, v6, v9
+            l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+            l = l.or.k.lt.13.or.k.gt.18
+            if (.not.l) a(i, j, k) = a(i, j, k) + 1
+            m = i * 100 + j * 10 + k
+          end do
+        end do
+      end do
+    !$acc end parallel
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+          r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          r = r.or.k.lt.13.or.k.gt.18
+          if (.not.r) b(i, j, k) = b(i, j, k) + 1
+        end do
+      end do
+    end do
+    if (l .neqv. r) call abort
+    do i = v1, v2, v7
+      do j = v3, v4, v8
+        do k = v5, v6, v9
+           if (a(i, j, k) .ne. b(i, j, k)) call abort
+        end do
+      end do
+    end do
+  end subroutine test6
+
+end program sub_collapse_3


Grüße
 Thomas

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c
  2016-06-01 15:07 [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition Thomas Schwinge
  2016-06-01 15:12 ` Jakub Jelinek
@ 2016-06-13 14:43 ` Thomas Schwinge
  2016-06-13 14:48   ` Thomas Schwinge
  2016-06-13 14:49   ` Jakub Jelinek
  1 sibling, 2 replies; 13+ messages in thread
From: Thomas Schwinge @ 2016-06-13 14:43 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches

Hi!

On Wed, 01 Jun 2016 17:06:42 +0200, Thomas Schwinge <thomas@codesourcery.com> wrote:
> Here are the OpenACC bits of <http://gcc.gnu.org/PR71373>.

In the PR, Jakub clarified that all the missing other OMP_CLAUSE_* are in
fact all unreachable here.  OK to document this as follows, in trunk?

The "anything else" default case in fact now is just the non-clause
OMP_CLAUSE_ERROR, so when adding a case for that one, we could then
remove the default case, and thus get a compiler warning when new clauses
are added in the future, without handling them here.  That makes sense to
me (would have made apparent much earlier the original problem of missing
handling for certain OMP_CLAUSE_*), but based on feedback received, it
feels as if I'm the only supporter of such "defensive" programming
paradigms?

commit c6b10a9bc1437395c4931d43f30e778152a28cb2
Author: Thomas Schwinge <thomas@codesourcery.com>
Date:   Mon Jun 13 16:29:37 2016 +0200

    [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c
    
    	gcc/
    	* tree-nested.c (convert_nonlocal_omp_clauses):
    	(convert_local_omp_clauses): Document missing OMP_CLAUSE_*.
---
 gcc/tree-nested.c | 60 ++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git gcc/tree-nested.c gcc/tree-nested.c
index 812f619..62cb01f 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1203,17 +1203,29 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_AUTO:
 	  break;
 
+	  /* OpenACC tile clauses are discarded during gimplification.  */
 	case OMP_CLAUSE_TILE:
-	  /* OpenACC tile clauses are discarded during gimplification, so we
-	     don't expect to see anything here.  */
-	  gcc_unreachable ();
-
+	  /* The following clause belongs to the OpenACC cache directive, which
+	     is discarded during gimplification.  */
 	case OMP_CLAUSE__CACHE_:
-	  /* These clauses belong to the OpenACC cache directive, which is
-	     discarded during gimplification, so we don't expect to see
-	     anything here.  */
-	  gcc_unreachable ();
-
+	  /* The following clauses are only allowed in the OpenMP declare simd
+	     directive, so not seen here.  */
+	case OMP_CLAUSE_UNIFORM:
+	case OMP_CLAUSE_INBRANCH:
+	case OMP_CLAUSE_NOTINBRANCH:
+	  /* The following clauses are only allowed on OpenMP cancel and
+	     cancellation point directives, which at this point have already
+	     been lowered into a function call.  */
+	case OMP_CLAUSE_FOR:
+	case OMP_CLAUSE_PARALLEL:
+	case OMP_CLAUSE_SECTIONS:
+	case OMP_CLAUSE_TASKGROUP:
+	  /* The following clauses are only added during OMP lowering; nested
+	     function decomposition happens before that.  */
+	case OMP_CLAUSE__LOOPTEMP_:
+	case OMP_CLAUSE__SIMDUID_:
+	case OMP_CLAUSE__GRIDDIM_:
+	  /* Anything else.  */
 	default:
 	  gcc_unreachable ();
 	}
@@ -1899,17 +1911,29 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_AUTO:
 	  break;
 
+	  /* OpenACC tile clauses are discarded during gimplification.  */
 	case OMP_CLAUSE_TILE:
-	  /* OpenACC tile clauses are discarded during gimplification, so we
-	     don't expect to see anything here.  */
-	  gcc_unreachable ();
-
+	  /* The following clause belongs to the OpenACC cache directive, which
+	     is discarded during gimplification.  */
 	case OMP_CLAUSE__CACHE_:
-	  /* These clauses belong to the OpenACC cache directive, which is
-	     discarded during gimplification, so we don't expect to see
-	     anything here.  */
-	  gcc_unreachable ();
-
+	  /* The following clauses are only allowed in the OpenMP declare simd
+	     directive, so not seen here.  */
+	case OMP_CLAUSE_UNIFORM:
+	case OMP_CLAUSE_INBRANCH:
+	case OMP_CLAUSE_NOTINBRANCH:
+	  /* The following clauses are only allowed on OpenMP cancel and
+	     cancellation point directives, which at this point have already
+	     been lowered into a function call.  */
+	case OMP_CLAUSE_FOR:
+	case OMP_CLAUSE_PARALLEL:
+	case OMP_CLAUSE_SECTIONS:
+	case OMP_CLAUSE_TASKGROUP:
+	  /* The following clauses are only added during OMP lowering; nested
+	     function decomposition happens before that.  */
+	case OMP_CLAUSE__LOOPTEMP_:
+	case OMP_CLAUSE__SIMDUID_:
+	case OMP_CLAUSE__GRIDDIM_:
+	  /* Anything else.  */
 	default:
 	  gcc_unreachable ();
 	}


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c
  2016-06-13 14:43 ` [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c Thomas Schwinge
@ 2016-06-13 14:48   ` Thomas Schwinge
  2016-06-13 14:49   ` Jakub Jelinek
  1 sibling, 0 replies; 13+ messages in thread
From: Thomas Schwinge @ 2016-06-13 14:48 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches

Hi!

On Mon, 13 Jun 2016 16:43:25 +0200, Thomas Schwinge <thomas@codesourcery.com> wrote:
> On Wed, 01 Jun 2016 17:06:42 +0200, Thomas Schwinge <thomas@codesourcery.com> wrote:
> > Here are the OpenACC bits of <http://gcc.gnu.org/PR71373>.
> 
> In the PR, Jakub clarified that all the missing other OMP_CLAUSE_* are in
> fact all unreachable here.  [...]
> 
> The "anything else" default case in fact now is just the non-clause
> OMP_CLAUSE_ERROR, so when adding a case for that one, we could then
> remove the default case, and thus get a compiler warning when new clauses
> are added in the future, without handling them here.  That makes sense to
> me (would have made apparent much earlier the original problem of missing
> handling for certain OMP_CLAUSE_*), but based on feedback received, it
> feels as if I'm the only supporter of such "defensive" programming
> paradigms?

That is, something like that:

--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1225,8 +1225,9 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE__LOOPTEMP_:
 	case OMP_CLAUSE__SIMDUID_:
 	case OMP_CLAUSE__GRIDDIM_:
-	  /* Anything else.  */
-	default:
+	  /* This non-clause should never be seen outside of the front
+	     ends.  */
+	case OMP_CLAUSE_ERROR:
 	  gcc_unreachable ();
 	}
     }
@@ -1933,8 +1934,9 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE__LOOPTEMP_:
 	case OMP_CLAUSE__SIMDUID_:
 	case OMP_CLAUSE__GRIDDIM_:
-	  /* Anything else.  */
-	default:
+	  /* This non-clause should never be seen outside of the front
+	     ends.  */
+	case OMP_CLAUSE_ERROR:
 	  gcc_unreachable ();
 	}
     }


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c
  2016-06-13 14:43 ` [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c Thomas Schwinge
  2016-06-13 14:48   ` Thomas Schwinge
@ 2016-06-13 14:49   ` Jakub Jelinek
  2016-06-13 16:41     ` Thomas Schwinge
  1 sibling, 1 reply; 13+ messages in thread
From: Jakub Jelinek @ 2016-06-13 14:49 UTC (permalink / raw)
  To: Thomas Schwinge; +Cc: gcc-patches

On Mon, Jun 13, 2016 at 04:43:25PM +0200, Thomas Schwinge wrote:
> On Wed, 01 Jun 2016 17:06:42 +0200, Thomas Schwinge <thomas@codesourcery.com> wrote:
> > Here are the OpenACC bits of <http://gcc.gnu.org/PR71373>.
> 
> In the PR, Jakub clarified that all the missing other OMP_CLAUSE_* are in
> fact all unreachable here.  OK to document this as follows, in trunk?
> 
> The "anything else" default case in fact now is just the non-clause
> OMP_CLAUSE_ERROR, so when adding a case for that one, we could then
> remove the default case, and thus get a compiler warning when new clauses
> are added in the future, without handling them here.  That makes sense to
> me (would have made apparent much earlier the original problem of missing
> handling for certain OMP_CLAUSE_*), but based on feedback received, it
> feels as if I'm the only supporter of such "defensive" programming
> paradigms?
> 
> commit c6b10a9bc1437395c4931d43f30e778152a28cb2
> Author: Thomas Schwinge <thomas@codesourcery.com>
> Date:   Mon Jun 13 16:29:37 2016 +0200
> 
>     [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c
>     
>     	gcc/
>     	* tree-nested.c (convert_nonlocal_omp_clauses):
>     	(convert_local_omp_clauses): Document missing OMP_CLAUSE_*.

Ok, but please mention the PR line above the ChangeLog entry.  Thanks.

	Jakub

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c
  2016-06-13 14:49   ` Jakub Jelinek
@ 2016-06-13 16:41     ` Thomas Schwinge
  0 siblings, 0 replies; 13+ messages in thread
From: Thomas Schwinge @ 2016-06-13 16:41 UTC (permalink / raw)
  To: Jakub Jelinek, gcc-patches

Hi!

On Mon, 13 Jun 2016 16:48:56 +0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Jun 13, 2016 at 04:43:25PM +0200, Thomas Schwinge wrote:
> > On Wed, 01 Jun 2016 17:06:42 +0200, Thomas Schwinge <thomas@codesourcery.com> wrote:
> > > Here are the OpenACC bits of <http://gcc.gnu.org/PR71373>.
> > 
> > In the PR, Jakub clarified that all the missing other OMP_CLAUSE_* are in
> > fact all unreachable here.

> > The "anything else" default case in fact now is just the non-clause
> > OMP_CLAUSE_ERROR, so when adding a case for that one, we could then
> > remove the default case, and thus get a compiler warning when new clauses
> > are added in the future, without handling them here.  That makes sense to
> > me (would have made apparent much earlier the original problem of missing
> > handling for certain OMP_CLAUSE_*), but based on feedback received, it
> > feels as if I'm the only supporter of such "defensive" programming
> > paradigms?

Any thoughts about that,
<http://news.gmane.org/find-root.php?message_id=%3C874m8xqh5w.fsf%40hertz.schwinge.homeip.net%3E>?

> >     [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c

> Ok, [...]

As posted, committed to trunk in r237386:

commit be2a5a8e8ffd13c099d372c4fcc363d5cd3c83c2
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Mon Jun 13 16:37:29 2016 +0000

    [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c
    
    	gcc/
    	PR middle-end/71373
    	* tree-nested.c (convert_nonlocal_omp_clauses)
    	(convert_local_omp_clauses): Document missing OMP_CLAUSE_*.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@237386 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog     |  4 ++++
 gcc/tree-nested.c | 60 ++++++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 46 insertions(+), 18 deletions(-)

diff --git gcc/ChangeLog gcc/ChangeLog
index ff685b1..89098e7 100644
--- gcc/ChangeLog
+++ gcc/ChangeLog
@@ -1,5 +1,9 @@
 2016-06-13  Thomas Schwinge  <thomas@codesourcery.com>
 
+	PR middle-end/71373
+	* tree-nested.c (convert_nonlocal_omp_clauses)
+	(convert_local_omp_clauses): Document missing OMP_CLAUSE_*.
+
 	* tree-cfg.c (edge_to_cases_cleanup): Fix CASE_CHAIN typo.
 	* tree.def (CASE_LABEL_EXPR): Likewise.
 
diff --git gcc/tree-nested.c gcc/tree-nested.c
index 812f619..62cb01f 100644
--- gcc/tree-nested.c
+++ gcc/tree-nested.c
@@ -1203,17 +1203,29 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_AUTO:
 	  break;
 
+	  /* OpenACC tile clauses are discarded during gimplification.  */
 	case OMP_CLAUSE_TILE:
-	  /* OpenACC tile clauses are discarded during gimplification, so we
-	     don't expect to see anything here.  */
-	  gcc_unreachable ();
-
+	  /* The following clause belongs to the OpenACC cache directive, which
+	     is discarded during gimplification.  */
 	case OMP_CLAUSE__CACHE_:
-	  /* These clauses belong to the OpenACC cache directive, which is
-	     discarded during gimplification, so we don't expect to see
-	     anything here.  */
-	  gcc_unreachable ();
-
+	  /* The following clauses are only allowed in the OpenMP declare simd
+	     directive, so not seen here.  */
+	case OMP_CLAUSE_UNIFORM:
+	case OMP_CLAUSE_INBRANCH:
+	case OMP_CLAUSE_NOTINBRANCH:
+	  /* The following clauses are only allowed on OpenMP cancel and
+	     cancellation point directives, which at this point have already
+	     been lowered into a function call.  */
+	case OMP_CLAUSE_FOR:
+	case OMP_CLAUSE_PARALLEL:
+	case OMP_CLAUSE_SECTIONS:
+	case OMP_CLAUSE_TASKGROUP:
+	  /* The following clauses are only added during OMP lowering; nested
+	     function decomposition happens before that.  */
+	case OMP_CLAUSE__LOOPTEMP_:
+	case OMP_CLAUSE__SIMDUID_:
+	case OMP_CLAUSE__GRIDDIM_:
+	  /* Anything else.  */
 	default:
 	  gcc_unreachable ();
 	}
@@ -1899,17 +1911,29 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi)
 	case OMP_CLAUSE_AUTO:
 	  break;
 
+	  /* OpenACC tile clauses are discarded during gimplification.  */
 	case OMP_CLAUSE_TILE:
-	  /* OpenACC tile clauses are discarded during gimplification, so we
-	     don't expect to see anything here.  */
-	  gcc_unreachable ();
-
+	  /* The following clause belongs to the OpenACC cache directive, which
+	     is discarded during gimplification.  */
 	case OMP_CLAUSE__CACHE_:
-	  /* These clauses belong to the OpenACC cache directive, which is
-	     discarded during gimplification, so we don't expect to see
-	     anything here.  */
-	  gcc_unreachable ();
-
+	  /* The following clauses are only allowed in the OpenMP declare simd
+	     directive, so not seen here.  */
+	case OMP_CLAUSE_UNIFORM:
+	case OMP_CLAUSE_INBRANCH:
+	case OMP_CLAUSE_NOTINBRANCH:
+	  /* The following clauses are only allowed on OpenMP cancel and
+	     cancellation point directives, which at this point have already
+	     been lowered into a function call.  */
+	case OMP_CLAUSE_FOR:
+	case OMP_CLAUSE_PARALLEL:
+	case OMP_CLAUSE_SECTIONS:
+	case OMP_CLAUSE_TASKGROUP:
+	  /* The following clauses are only added during OMP lowering; nested
+	     function decomposition happens before that.  */
+	case OMP_CLAUSE__LOOPTEMP_:
+	case OMP_CLAUSE__SIMDUID_:
+	case OMP_CLAUSE__GRIDDIM_:
+	  /* Anything else.  */
 	default:
 	  gcc_unreachable ();
 	}


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-06-13 16:41 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-01 15:07 [PR middle-end/71373] Handle more OMP_CLAUSE_* in nested function decomposition Thomas Schwinge
2016-06-01 15:12 ` Jakub Jelinek
2016-06-02 16:21   ` Thomas Schwinge
2016-06-02 16:25     ` Jakub Jelinek
2016-06-10 10:36       ` Thomas Schwinge
2016-06-13 14:43 ` [PR middle-end/71373] Document missing OMP_CLAUSE_* in gcc/tree-nested.c Thomas Schwinge
2016-06-13 14:48   ` Thomas Schwinge
2016-06-13 14:49   ` Jakub Jelinek
2016-06-13 16:41     ` Thomas Schwinge
  -- strict thread matches above, loose matches on Subject: below --
2014-07-18 21:41 [patch,gomp-4_0-branch] acc nested function support Cesar Philippidis
2014-07-29  8:30 ` Thomas Schwinge
2014-11-05  0:45 ` Cesar Philippidis
2014-11-05 15:24   ` David Malcolm

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).