* [gomp4] check for compatible parallelism with acc routines
@ 2015-08-28 15:56 Cesar Philippidis
2015-08-28 17:37 ` Nathan Sidwell
0 siblings, 1 reply; 2+ messages in thread
From: Cesar Philippidis @ 2015-08-28 15:56 UTC (permalink / raw)
To: gcc-patches; +Cc: Nathan Sidwell, Julian Brown
[-- Attachment #1: Type: text/plain, Size: 1440 bytes --]
This patch teaches omplower to report any incompatible parallelism when
using routines. I also fixed a minor bug involving reductions inside
routines and removed a dead variable inside execute_oacc_transform which
caused a build warning.
There are two scenarios involving acc routines that need checking:
1. calls to routines
2. acc loops inside routines
For both of these cases, I'm utilizing the routine dimensions associated
with the 'oacc function' attribute.
A couple of libgomp test cases were clearly bogus. E.g., you cannot have
a gang loop inside a worker routine, nor can you call a vector routine
from a vector loop. This patch corrects those tests, too.
I encountered one ambiguity in the spec involving the seq loop clause.
The spec say that seq loops are supposed to be executed sequentially by
a single thread. I'm not sure whether that implies that a seq loop
cannot be embedded into a gang/worker/vector loop, or if a gwv loop can
nest inside a loop. E.g.
#pragma acc loop gang
for (...)
{
#pragma acc loop seq
for (...)
}
and
#pragma acc loop seq
for (...)
{
#pragma acc loop gang
for (...)
}
Right now, gcc is permitting both of these loops. I.e., only the seq
loop itself is executing in a non-partitioned mode. Julian inquired
about this in the openacc technical list a while ago, but I don't think
he got a response.
This patch has been applied to gomp-4_0-branch.
Cesar
[-- Attachment #2: routines-error-handling.diff --]
[-- Type: text/x-patch, Size: 16815 bytes --]
2015-08-28 Cesar Philippidis <cesar@codesourcery.com>
gcc/
* omp-low.c (extract_oacc_routine_gwv): New function.
(build_outer_var_ref): Handle refs inside acc routines.
(scan_omp_for): Check nested parallelism inside acc routines.
(scan_omp_1_stmt): Check for compatible parallelism when calling
routines.
(execute_oacc_transform): Remove dead variable.
gcc/testsuite/
* c-c++-common/goacc/routine-6.c: New test.
* c-c++-common/goacc/routine-7.c: New test.
* gfortran.dg/goacc/routine-4.f90: New test.
* gfortran.dg/goacc/routine-5.f90: New test.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/routine-4.c: Fix calls to
acc routines.
* testsuite/libgomp.oacc-fortran/routine-7.f90: Likewise.
* testsuite/libgomp.oacc-fortran/vector-routine.f90: Likewise.
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 4312a60..e8d7513 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -415,6 +415,35 @@ is_combined_parallel (struct omp_region *region)
return region->is_combined_parallel;
}
+/* Return the gang, worker and vector attributes from associated with
+ FNDECL. Returns a GOMP_DIM for the lowest level of parallelism beginning
+ with GOMP_DIM_GANG, or -1 if the routine is a SEQ. Otherwise, return 0 if
+ the FNDECL is not an acc routine.
+*/
+
+static int
+extract_oacc_routine_gwv (tree fndecl)
+{
+ tree attrs = get_oacc_fn_attrib (fndecl);
+ tree pos;
+ unsigned gwv = 0;
+ int i;
+ int ret = 0;
+
+ if (attrs != NULL_TREE)
+ {
+ for (i = 0, pos = TREE_VALUE (attrs);
+ gwv == 0 && i != GOMP_DIM_MAX;
+ i++, pos = TREE_CHAIN (pos))
+ if (TREE_PURPOSE (pos) != boolean_false_node)
+ return 1 << i;
+
+ ret = -1;
+ }
+
+ return ret;
+}
+
/* Extract the header elements of parallel loop FOR_STMT and store
them into *FD. */
@@ -1227,7 +1256,8 @@ build_outer_var_ref (tree var, omp_context *ctx)
else
x = lookup_decl (var, ctx->outer);
}
- else if (is_reference (var) || is_oacc_parallel (ctx))
+ else if (is_reference (var) || is_oacc_parallel (ctx)
+ || extract_oacc_routine_gwv (current_function_decl) != 0)
/* This can happen with orphaned constructs. If var is reference, it is
possible it is shared and as such valid. */
x = var;
@@ -2578,9 +2608,16 @@ scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
bool gwv_clause = false;
bool auto_clause = false;
bool seq_clause = false;
+ int gwv_routine = 0;
if (outer_ctx)
outer_type = gimple_code (outer_ctx->stmt);
+ else
+ {
+ gwv_routine = extract_oacc_routine_gwv (current_function_decl);
+ if (gwv_routine > 0)
+ gwv_routine = gwv_routine >> 1;
+ }
ctx = new_omp_context (stmt, outer_ctx);
@@ -2699,6 +2736,12 @@ scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
&& ctx->gwv_this > ctx->gwv_below)
error_at (gimple_location (stmt),
"gang, worker and vector must occur in this order in a loop nest");
+ else if (!outer_ctx && ctx->gwv_this != 0 && gwv_routine != 0
+ && ((ffs (ctx->gwv_this) <= gwv_routine)
+ || gwv_routine < 0))
+ error_at (gimple_location (stmt),
+ "invalid parallelism inside acc routine");
+
if (outer_ctx && outer_type == GIMPLE_OMP_FOR)
outer_ctx->gwv_below |= ctx->gwv_below;
}
@@ -3287,6 +3330,16 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
default:
break;
}
+ else if (ctx && is_gimple_omp_oacc (ctx->stmt)
+ && !is_oacc_parallel (ctx))
+ {
+ /* Is this a call to an acc routine? */
+ int gwv = extract_oacc_routine_gwv (fndecl);
+
+ if (gwv > 0 && ffs (ctx->gwv_this) >= ffs (gwv))
+ error_at (gimple_location (stmt),
+ "incompatible parallelism with acc routine");
+ }
}
}
if (remove)
@@ -3352,6 +3405,7 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
insert_decl_map (&ctx->cb, var, var);
}
break;
+
default:
*handled_ops_p = false;
break;
@@ -14815,7 +14869,6 @@ execute_oacc_transform ()
else if (gimple_call_internal_p (stmt))
{
unsigned ifn_code = gimple_call_internal_fn (stmt);
- int retval = 0;
switch (ifn_code)
{
default: break;
diff --git a/gcc/testsuite/c-c++-common/goacc/routine-6.c b/gcc/testsuite/c-c++-common/goacc/routine-6.c
new file mode 100644
index 0000000..72f035f
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/routine-6.c
@@ -0,0 +1,105 @@
+/* Test invalid calls to routines. */
+/* { dg-do compile } */
+
+#pragma acc routine gang
+int
+gang ()
+{
+ return 1;
+}
+
+#pragma acc routine worker
+int
+worker ()
+{
+ return 1;
+}
+
+#pragma acc routine vector
+int
+vector ()
+{
+ return 1;
+}
+
+#pragma acc routine seq
+int
+seq ()
+{
+ return 1;
+}
+
+int
+main ()
+{
+ int red = 0;
+#pragma acc parallel copy (red)
+ {
+ /* Independent/seq loop tests. */
+#pragma acc loop reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += gang ();
+
+#pragma acc loop reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += worker ();
+
+#pragma acc loop reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += vector ();
+
+ /* Gang routine tests. */
+#pragma acc loop gang reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += gang (); // { dg-error "incompatible parallelism with acc routine" }
+
+#pragma acc loop worker reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += gang (); // { dg-error "incompatible parallelism with acc routine" }
+
+#pragma acc loop vector reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += gang (); // { dg-error "incompatible parallelism with acc routine" }
+
+ /* Worker routine tests. */
+#pragma acc loop gang reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += worker ();
+
+#pragma acc loop worker reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += worker (); // { dg-error "incompatible parallelism with acc routine" }
+
+#pragma acc loop vector reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += worker (); // { dg-error "incompatible parallelism with acc routine" }
+
+ /* Vector routine tests. */
+#pragma acc loop gang reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += vector ();
+
+#pragma acc loop worker reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += vector ();
+
+#pragma acc loop vector reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += vector (); // { dg-error "incompatible parallelism with acc routine" }
+
+ /* Seq routine tests. */
+#pragma acc loop gang reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += seq ();
+
+#pragma acc loop worker reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += seq ();
+
+#pragma acc loop vector reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red += seq ();
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/routine-7.c b/gcc/testsuite/c-c++-common/goacc/routine-7.c
new file mode 100644
index 0000000..96f3e3a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/routine-7.c
@@ -0,0 +1,94 @@
+/* Test invalid intra-routine parallelism. */
+/* { dg-do compile } */
+
+#pragma acc routine gang
+int
+gang (int red)
+{
+#pragma acc loop reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop gang reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop worker reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop vector reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+ return 1;
+}
+
+#pragma acc routine worker
+int
+worker (int red)
+{
+#pragma acc loop reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop gang reduction (+:red) // { dg-error "invalid parallelism inside acc routine" }
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop worker reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop vector reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+ return 1;
+}
+
+#pragma acc routine vector
+int
+vector (int red)
+{
+#pragma acc loop reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop gang reduction (+:red) // { dg-error "invalid parallelism inside acc routine" }
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop worker reduction (+:red) // { dg-error "invalid parallelism inside acc routine" }
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop vector reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+ return 1;
+}
+
+#pragma acc routine seq
+int
+seq (int red)
+{
+#pragma acc loop reduction (+:red)
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop gang reduction (+:red) // { dg-error "invalid parallelism inside acc routine" }
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop worker reduction (+:red) // { dg-error "invalid parallelism inside acc routine" }
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+#pragma acc loop vector reduction (+:red) // { dg-error "invalid parallelism inside acc routine" }
+ for (int i = 0; i < 10; i++)
+ red ++;
+
+ return 1;
+}
diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-4.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-4.f90
new file mode 100644
index 0000000..2587ffd
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/routine-4.f90
@@ -0,0 +1,160 @@
+! Test invalid calls to routines.
+
+module param
+ integer, parameter :: N = 32
+end module param
+
+program main
+ use param
+ integer :: i
+ integer :: a(N)
+
+ do i = 1, N
+ a(i) = i
+ end do
+
+ !
+ ! Seq routine tests.
+ !
+
+ !$acc parallel copy (a)
+ !$acc loop
+ do i = 1, N
+ call seq (a)
+ end do
+
+ !$acc loop gang
+ do i = 1, N
+ call seq (a)
+ end do
+
+ !$acc loop worker
+ do i = 1, N
+ call seq (a)
+ end do
+
+ !$acc loop vector
+ do i = 1, N
+ call seq (a)
+ end do
+ !$acc end parallel
+
+ !
+ ! Gang routines loops.
+ !
+
+ !$acc parallel copy (a)
+ !$acc loop
+ do i = 1, N
+ call gang (a)
+ end do
+
+ !$acc loop gang
+ do i = 1, N
+ call gang (a) ! { dg-error "incompatible parallelism with acc routine" }
+ end do
+
+ !$acc loop worker
+ do i = 1, N
+ call gang (a) ! { dg-error "incompatible parallelism with acc routine" }
+ end do
+
+ !$acc loop vector
+ do i = 1, N
+ call gang (a) ! { dg-error "incompatible parallelism with acc routine" }
+ end do
+ !$acc end parallel
+
+ !
+ ! Worker routines loops.
+ !
+
+ !$acc parallel copy (a)
+ !$acc loop
+ do i = 1, N
+ call worker (a)
+ end do
+
+ !$acc loop gang
+ do i = 1, N
+ call worker (a)
+ end do
+
+ !$acc loop worker
+ do i = 1, N
+ call worker (a) ! { dg-error "incompatible parallelism with acc routine" }
+ end do
+
+ !$acc loop vector
+ do i = 1, N
+ call worker (a) ! { dg-error "incompatible parallelism with acc routine" }
+ end do
+ !$acc end parallel
+
+ !
+ ! Vector routines loops.
+ !
+
+ !$acc parallel copy (a)
+ !$acc loop
+ do i = 1, N
+ call vector (a)
+ end do
+
+ !$acc loop gang
+ do i = 1, N
+ call vector (a)
+ end do
+
+ !$acc loop worker
+ do i = 1, N
+ call vector (a)
+ end do
+
+ !$acc loop vector
+ do i = 1, N
+ call vector (a) ! { dg-error "incompatible parallelism with acc routine" }
+ end do
+ !$acc end parallel
+contains
+
+ subroutine gang (a)
+ !$acc routine gang
+ integer, intent (inout) :: a(N)
+ integer :: i
+
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+ end subroutine gang
+
+ subroutine worker (a)
+ !$acc routine worker
+ integer, intent (inout) :: a(N)
+ integer :: i
+
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+ end subroutine worker
+
+ subroutine vector (a)
+ !$acc routine vector
+ integer, intent (inout) :: a(N)
+ integer :: i
+
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+ end subroutine vector
+
+ subroutine seq (a)
+ !$acc routine seq
+ integer, intent (inout) :: a(N)
+ integer :: i
+
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+ end subroutine seq
+end program main
diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-5.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-5.f90
new file mode 100644
index 0000000..f769334
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/routine-5.f90
@@ -0,0 +1,109 @@
+! Test invalid intra-routine parallellism.
+
+module param
+ integer, parameter :: N = 32
+end module param
+
+subroutine gang (a)
+ !$acc routine gang
+ integer, intent (inout) :: a(N)
+ integer :: i
+
+ !$acc loop
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop gang
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop worker
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop vector
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+end subroutine gang
+
+subroutine worker (a)
+ !$acc routine worker
+ integer, intent (inout) :: a(N)
+ integer :: i
+
+ !$acc loop
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop gang ! { dg-error "invalid parallelism inside acc routine" }
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop worker
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop vector
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+end subroutine worker
+
+subroutine vector (a)
+ !$acc routine vector
+ integer, intent (inout) :: a(N)
+ integer :: i
+
+ !$acc loop
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop gang ! { dg-error "invalid parallelism inside acc routine" }
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop worker ! { dg-error "invalid parallelism inside acc routine" }
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop vector
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+end subroutine vector
+
+subroutine seq (a)
+ !$acc routine seq
+ integer, intent (inout) :: a(N)
+ integer :: i
+
+ !$acc loop
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop gang ! { dg-error "invalid parallelism inside acc routine" }
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop worker ! { dg-error "invalid parallelism inside acc routine" }
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+
+ !$acc loop vector ! { dg-error "invalid parallelism inside acc routine" }
+ do i = 1, N
+ a(i) = a(i) - a(i)
+ end do
+end subroutine seq
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c
index c73400c..e9cf4c4 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c
@@ -23,10 +23,10 @@ worker (int *b)
{
int i, j;
-#pragma acc loop gang
+#pragma acc loop worker
for (i = 0; i < N; i++)
{
-#pragma acc loop worker
+#pragma acc loop vector
for (j = 0; j < M; j++)
b[i * M + j] += b[i * M + j];
}
@@ -112,7 +112,7 @@ main(int argc, char **argv)
#pragma acc parallel copy (a[0:N])
{
-#pragma acc loop vector
+#pragma acc loop
for (i = 0; i < N; i++)
vector (&a[0]);
}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90
index 354784e..7fc8169 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90
@@ -41,7 +41,7 @@ program main
end do
!$acc parallel copy (b)
- !$acc loop worker
+ !$acc loop
do i = 1, N
call worker (b)
end do
@@ -56,7 +56,7 @@ program main
end do
!$acc parallel copy (a)
- !$acc loop vector
+ !$acc loop
do i = 1, N
call vector (a)
end do
@@ -85,9 +85,9 @@ subroutine worker (b)
integer, intent (inout) :: b(M*N)
integer :: i, j
- !$acc loop gang
- do i = 1, N
!$acc loop worker
+ do i = 1, N
+ !$acc loop vector
do j = 1, M
b(j + ((i - 1) * M)) = b(j + ((i - 1) * M)) + 1
end do
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/vector-routine.f90 b/libgomp/testsuite/libgomp.oacc-fortran/vector-routine.f90
index a8d078a..1edcee4 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/vector-routine.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/vector-routine.f90
@@ -13,14 +13,8 @@ program main
a(i) = i
end do
- !
- ! Appears there's two bugs...
- ! 1) loop with vector
- ! 2) loop without vector
- !
-
!$acc parallel copy (a)
- !$acc loop vector
+ !$acc loop worker
do i = 1, N
call vector (a)
end do
@@ -37,6 +31,7 @@ contains
integer, intent (inout) :: a(N)
integer :: i
+ !$acc loop vector
do i = 1, N
a(i) = a(i) - a(i)
end do
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2015-08-28 17:30 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-28 15:56 [gomp4] check for compatible parallelism with acc routines Cesar Philippidis
2015-08-28 17:37 ` Nathan Sidwell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).