2015-11-04 Nathan Sidwell gcc/ * config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Add checking. libgomp/ * testsuite/libgomp.oacc-fortran/reduction-1.f90: Fix dimensions and reduction copy. * testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: New. Index: gcc/config/nvptx/nvptx.c =================================================================== --- gcc/config/nvptx/nvptx.c (revision 229771) +++ gcc/config/nvptx/nvptx.c (working copy) @@ -3472,8 +3472,29 @@ nvptx_goacc_validate_dims (tree ARG_UNUS { bool changed = false; - /* TODO: Leave dimensions unaltered. Reductions need - porting before filtering dimensions makes sense. */ + /* The vector size must be 32, unless this is a SEQ routine. */ + if (fn_level <= GOMP_DIM_VECTOR + && dims[GOMP_DIM_VECTOR] != PTX_VECTOR_LENGTH) + { + if (dims[GOMP_DIM_VECTOR] >= 0 && fn_level < 0) + warning_at (DECL_SOURCE_LOCATION (decl), 0, + dims[GOMP_DIM_VECTOR] + ? "using vector_length (%d), ignoring %d" + : "using vector_length (%d), ignoring runtime setting", + PTX_VECTOR_LENGTH, dims[GOMP_DIM_VECTOR]); + dims[GOMP_DIM_VECTOR] = PTX_VECTOR_LENGTH; + changed = true; + } + + /* Check the num workers is not too large. */ + if (dims[GOMP_DIM_WORKER] > PTX_WORKER_LENGTH) + { + warning_at (DECL_SOURCE_LOCATION (decl), 0, + "using num_workers (%d), ignoring %d", + PTX_WORKER_LENGTH, dims[GOMP_DIM_WORKER]); + dims[GOMP_DIM_WORKER] = PTX_WORKER_LENGTH; + changed = true; + } return changed; } Index: libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 (revision 229771) +++ libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 (working copy) @@ -5,7 +5,7 @@ program reduction_1 implicit none - integer, parameter :: n = 10, vl = 2 + integer, parameter :: n = 10, vl = 32 integer :: i, vresult, result logical :: lresult, lvresult integer, dimension (n) :: array @@ -19,7 +19,7 @@ program reduction_1 ! '+' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(+:result) do i = 1, n result = result + array(i) @@ -38,7 +38,7 @@ program reduction_1 ! '*' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(*:result) do i = 1, n result = result * array(i) @@ -57,7 +57,7 @@ program reduction_1 ! 'max' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(max:result) do i = 1, n result = max (result, array(i)) @@ -76,7 +76,7 @@ program reduction_1 ! 'min' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(min:result) do i = 1, n result = min (result, array(i)) @@ -95,7 +95,7 @@ program reduction_1 ! 'iand' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(iand:result) do i = 1, n result = iand (result, array(i)) @@ -114,7 +114,7 @@ program reduction_1 ! 'ior' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(ior:result) do i = 1, n result = ior (result, array(i)) @@ -133,7 +133,7 @@ program reduction_1 ! 'ieor' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(ieor:result) do i = 1, n result = ieor (result, array(i)) @@ -152,7 +152,7 @@ program reduction_1 ! '.and.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.and.:lresult) do i = 1, n lresult = lresult .and. (array(i) .ge. 5) @@ -171,7 +171,7 @@ program reduction_1 ! '.or.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.or.:lresult) do i = 1, n lresult = lresult .or. (array(i) .ge. 5) @@ -190,7 +190,7 @@ program reduction_1 ! '.eqv.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.eqv.:lresult) do i = 1, n lresult = lresult .eqv. (array(i) .ge. 5) @@ -209,7 +209,7 @@ program reduction_1 ! '.neqv.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.neqv.:lresult) do i = 1, n lresult = lresult .neqv. (array(i) .ge. 5) Index: libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 (revision 229771) +++ libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 (working copy) @@ -5,7 +5,7 @@ program reduction_2 implicit none - integer, parameter :: n = 10, vl = 2 + integer, parameter :: n = 10, vl = 32 integer :: i real, parameter :: e = .001 real :: vresult, result @@ -21,7 +21,7 @@ program reduction_2 ! '+' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(+:result) do i = 1, n result = result + array(i) @@ -40,7 +40,7 @@ program reduction_2 ! '*' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(*:result) do i = 1, n result = result * array(i) @@ -59,7 +59,7 @@ program reduction_2 ! 'max' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(max:result) do i = 1, n result = max (result, array(i)) @@ -78,7 +78,7 @@ program reduction_2 ! 'min' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(min:result) do i = 1, n result = min (result, array(i)) @@ -97,7 +97,7 @@ program reduction_2 ! '.and.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.and.:lresult) do i = 1, n lresult = lresult .and. (array(i) .ge. 5) @@ -116,7 +116,7 @@ program reduction_2 ! '.or.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.or.:lresult) do i = 1, n lresult = lresult .or. (array(i) .ge. 5) @@ -135,7 +135,7 @@ program reduction_2 ! '.eqv.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.eqv.:lresult) do i = 1, n lresult = lresult .eqv. (array(i) .ge. 5) @@ -154,7 +154,7 @@ program reduction_2 ! '.neqv.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.neqv.:lresult) do i = 1, n lresult = lresult .neqv. (array(i) .ge. 5) Index: libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 (revision 229771) +++ libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 (working copy) @@ -5,7 +5,7 @@ program reduction_3 implicit none - integer, parameter :: n = 10, vl = 2 + integer, parameter :: n = 10, vl = 32 integer :: i double precision, parameter :: e = .001 double precision :: vresult, result @@ -21,7 +21,7 @@ program reduction_3 ! '+' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(+:result) do i = 1, n result = result + array(i) @@ -40,7 +40,7 @@ program reduction_3 ! '*' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(*:result) do i = 1, n result = result * array(i) @@ -59,7 +59,7 @@ program reduction_3 ! 'max' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(max:result) do i = 1, n result = max (result, array(i)) @@ -78,7 +78,7 @@ program reduction_3 ! 'min' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(min:result) do i = 1, n result = min (result, array(i)) @@ -97,7 +97,7 @@ program reduction_3 ! '.and.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.and.:lresult) do i = 1, n lresult = lresult .and. (array(i) .ge. 5) @@ -116,7 +116,7 @@ program reduction_3 ! '.or.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.or.:lresult) do i = 1, n lresult = lresult .or. (array(i) .ge. 5) @@ -135,7 +135,7 @@ program reduction_3 ! '.eqv.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.eqv.:lresult) do i = 1, n lresult = lresult .eqv. (array(i) .ge. 5) @@ -154,7 +154,7 @@ program reduction_3 ! '.neqv.' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) !$acc loop reduction(.neqv.:lresult) do i = 1, n lresult = lresult .neqv. (array(i) .ge. 5) Index: libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 (revision 229771) +++ libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 (working copy) @@ -19,7 +19,7 @@ program reduction_4 ! '+' reductions - !$acc parallel vector_length(vl) num_gangs(1) + !$acc parallel vector_length(vl) num_gangs(1) copy(result) !$acc loop reduction(+:result) do i = 1, n result = result + array(i) Index: libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 =================================================================== --- libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 (revision 229771) +++ libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 (working copy) @@ -11,7 +11,7 @@ program reduction vs1 = 0 vs2 = 0 - !$acc parallel vector_length (32) + !$acc parallel vector_length (32) copy(s1, s2) !$acc loop reduction(+:s1, s2) do i = 1, n s1 = s1 + 1 Index: libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c (revision 229771) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c (working copy) @@ -10,8 +10,7 @@ main (int argc, char *argv[]) #else # define GANGS 256 #endif - #pragma acc parallel num_gangs(GANGS) num_workers(1) vector_length(1) \ - copy(res2) + #pragma acc parallel num_gangs(GANGS) copy(res2) { #pragma acc atomic res2 += 5; @@ -28,8 +27,7 @@ main (int argc, char *argv[]) #else # define GANGS 8 #endif - #pragma acc parallel num_gangs(GANGS) num_workers(1) vector_length(1) \ - copy(res2) + #pragma acc parallel num_gangs(GANGS) copy(res2) { #pragma acc atomic res2 *= 5; Index: libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c (revision 229771) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c (working copy) @@ -23,7 +23,7 @@ main(void) vresult = 0; /* '+' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(result) #pragma acc loop reduction (+:result) for (i = 0; i < n; i++) result += array[i]; @@ -39,7 +39,7 @@ main(void) vresult = 0; /* '*' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(result) #pragma acc loop reduction (*:result) for (i = 0; i < n; i++) result *= array[i]; @@ -91,7 +91,7 @@ main(void) lvresult = false; /* '&&' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(lresult) #pragma acc loop reduction (&&:lresult) for (i = 0; i < n; i++) lresult = lresult && (result > array[i]); @@ -110,7 +110,7 @@ main(void) lvresult = false; /* '||' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(lresult) #pragma acc loop reduction (||:lresult) for (i = 0; i < n; i++) lresult = lresult || (result > array[i]); Index: libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c (revision 229771) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c (working copy) @@ -8,7 +8,7 @@ main (void) int i, j, k, l = 0, f = 0, x = 0; int m1 = 4, m2 = -5, m3 = 17; - #pragma acc parallel +#pragma acc parallel copy(l) #pragma acc loop collapse(3) reduction(+:l) for (i = -2; i < m1; i++) for (j = m2; j < -2; j++) Index: libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c (revision 229771) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c (working copy) @@ -11,8 +11,7 @@ main (int argc, char *argv[]) #else # define GANGS 256 #endif - #pragma acc parallel num_gangs(GANGS) num_workers(1) vector_length(1) \ - copy(res2) async(1) + #pragma acc parallel num_gangs(GANGS) copy(res2) async(1) { #pragma acc atomic res2 += 5; @@ -31,8 +30,7 @@ main (int argc, char *argv[]) #else # define GANGS 8 #endif - #pragma acc parallel num_gangs(GANGS) num_workers(1) vector_length(1) \ - copy(res2) async(1) + #pragma acc parallel num_gangs(GANGS) copy(res2) async(1) { #pragma acc atomic res2 *= 5; Index: libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c (revision 229771) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c (working copy) @@ -24,7 +24,7 @@ main(void) vresult = 0; /* '+' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(result) #pragma acc loop reduction (+:result) for (i = 0; i < n; i++) result += array[i]; @@ -94,7 +94,7 @@ main(void) lvresult = false; /* '&&' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(lresult) #pragma acc loop reduction (&&:lresult) for (i = 0; i < n; i++) lresult = lresult && (creal(result) > creal(array[i])); @@ -113,7 +113,7 @@ main(void) lvresult = false; /* '||' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(lresult) #pragma acc loop reduction (||:lresult) for (i = 0; i < n; i++) lresult = lresult || (creal(result) > creal(array[i])); Index: libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c (revision 229771) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-initial-1.c (working copy) @@ -4,13 +4,13 @@ int main(void) { #define I 5 -#define N 11 +#define N 32 #define A 8 int a = A; int s = I; -#pragma acc parallel vector_length(N) +#pragma acc parallel vector_length(N) copy(s) { int i; #pragma acc loop reduction(+:s) Index: libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c (revision 229771) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c (working copy) @@ -13,7 +13,7 @@ { \ type res, vres; \ res = (init); \ -DO_PRAGMA (acc parallel vector_length (vl))\ + DO_PRAGMA (acc parallel vector_length (vl) copy(res)) \ DO_PRAGMA (acc loop reduction (op:res))\ for (i = 0; i < n; i++) \ res = res op (b); \ @@ -63,7 +63,7 @@ test_reductions_bool (void) { \ type res, vres; \ res = (init); \ -DO_PRAGMA (acc parallel vector_length (vl))\ +DO_PRAGMA (acc parallel vector_length (vl) copy(res))\ DO_PRAGMA (acc loop reduction (op:res))\ for (i = 0; i < n; i++) \ res = op (res, (b)); \ Index: libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c (revision 229771) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c (working copy) @@ -8,7 +8,7 @@ main (void) int n = 100; int i; -#pragma acc parallel vector_length (32) +#pragma acc parallel vector_length (32) copy(s1,s2) #pragma acc loop reduction (+:s1, s2) for (i = 0; i < n; i++) { Index: libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c (revision 229771) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c (working copy) @@ -23,7 +23,7 @@ main(void) vresult = 0; /* '+' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(result) #pragma acc loop reduction (+:result) for (i = 0; i < n; i++) result += array[i]; @@ -39,7 +39,7 @@ main(void) vresult = 0; /* '*' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(result) #pragma acc loop reduction (*:result) for (i = 0; i < n; i++) result *= array[i]; @@ -91,7 +91,7 @@ main(void) lvresult = false; /* '&&' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(lresult) #pragma acc loop reduction (&&:lresult) for (i = 0; i < n; i++) lresult = lresult && (result > array[i]); @@ -110,7 +110,7 @@ main(void) lvresult = false; /* '||' reductions. */ -#pragma acc parallel vector_length (vl) +#pragma acc parallel vector_length (vl) copy(lresult) #pragma acc loop reduction (||:lresult) for (i = 0; i < n; i++) lresult = lresult || (result > array[i]); Index: libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c (revision 0) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c (working copy) @@ -0,0 +1,17 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +/* Worker and vector size checks. Picked an outrageously large + value. */ + +int main () +{ +#pragma acc parallel num_workers (2<<20) /* { dg-error "using num_workers" } */ + { + } + +#pragma acc parallel vector_length (2<<20) /* { dg-error "using vector_length" } */ + { + } + + return 0; +}