From: Nathan Sidwell <nathan@acm.org>
To: Jakub Jelinek <jakub@redhat.com>, GCC Patches <gcc-patches@gcc.gnu.org>
Cc: Cesar Philippidis <cesar_philippidis@mentor.com>
Subject: Re: [3/3] OpenACC reductions
Date: Mon, 02 Nov 2015 16:38:00 -0000 [thread overview]
Message-ID: <56379197.8020309@acm.org> (raw)
In-Reply-To: <56378AE9.2020905@acm.org>
[-- Attachment #1: Type: text/plain, Size: 663 bytes --]
This patch are the initial set of tests. The libgomp tests use an idiom of
summing thread identifiers and then checking the expected set of threads
participated. They are all derived from the loop tests I recently added for the
execution model itself.
The fortran test was duplicated in both the gfortran testsuite and the libgomp
testsuite. I deleted it from the former. It was slightly bogus as it asked
for a vector-length of 40, and appeared to be working by accident by not
actually partitioning the loop. I fixed that up and reworked it to avoid
needing a reduction on a reference variable. Reference handling will be a later
patch.
nathan
[-- Attachment #2: 03-trunk-reductions-tests-1102.patch --]
[-- Type: text/x-patch, Size: 11411 bytes --]
2015-11-02 Nathan Sidwell <nathan@codesourcery.com>
libgomp/
* libgomp.oacc-c-c++-common/loop-red-g-1.c: New.
* libgomp.oacc-c-c++-common/loop-red-gwv-1.c: New.
* libgomp.oacc-c-c++-common/loop-red-v-1.c: New.
* libgomp.oacc-c-c++-common/loop-red-v-2.c: New.
* libgomp.oacc-c-c++-common/loop-red-w-1.c: New.
* libgomp.oacc-c-c++-common/loop-red-w-2.c: New.
* libgomp.oacc-c-c++-common/loop-red-wv-1.c: New.
* libgomp.oacc-fortran/reduction-5.f90: Avoid reference var.
gcc/testsuite/
* gfortran.dg/goacc/reduction-2.f95: Delete.
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c (working copy)
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ix;
+ int ondev = 0;
+ int t = 0, h = 0;
+
+#pragma acc parallel num_gangs(32) vector_length(32) copy(t) copy(ondev)
+ {
+#pragma acc loop gang reduction (+:t)
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ val = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ t += val;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+ if(ondev)
+ {
+ int g = ix / ((N + 31) / 32);
+ int w = 0;
+ int v = 0;
+
+ val = (g << 16) | (w << 8) | v;
+ }
+ h += val;
+ }
+ if (t != h)
+ {
+ printf ("t=%x expected %x\n", t, h);
+ return 1;
+ }
+
+ return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c (working copy)
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ix;
+ int ondev = 0;
+ int t = 0, h = 0;
+
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(t) copy(ondev)
+ {
+#pragma acc loop gang worker vector reduction(+:t)
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ val = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ t += val;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+ if(ondev)
+ {
+ int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+
+ int g = ix / (chunk_size * 32 * 32);
+ int w = ix / 32 % 32;
+ int v = ix % 32;
+
+ val = (g << 16) | (w << 8) | v;
+ }
+ h += val;
+ }
+ if (t != h)
+ {
+ printf ("t=%x expected %x\n", t, h);
+ return 1;
+ }
+
+ return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (working copy)
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+
+int main ()
+{
+ int ix;
+ int ondev = 0;
+ int t = 0, h = 0;
+
+#pragma acc parallel vector_length(32) copy(t) copy(ondev)
+ {
+#pragma acc loop vector reduction (+:t)
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ val = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ t += val;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+ if (ondev)
+ {
+ int g = 0;
+ int w = 0;
+ int v = ix % 32;
+
+ val = (g << 16) | (w << 8) | v;
+ }
+ h += val;
+ }
+
+ if (t != h)
+ {
+ printf ("t=%x expected %x\n", t, h);
+ return 1;
+ }
+
+ return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (working copy)
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+
+int main ()
+{
+ int ix;
+ int ondev = 0;
+ int q = 0, h = 0;
+
+#pragma acc parallel vector_length(32) copy(q) copy(ondev)
+ {
+ int t = q;
+
+#pragma acc loop vector reduction (+:t)
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ val = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ t += val;
+ }
+ q = t;
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+ if (ondev)
+ {
+ int g = 0;
+ int w = 0;
+ int v = ix % 32;
+
+ val = (g << 16) | (w << 8) | v;
+ }
+ h += val;
+ }
+
+ if (q != h)
+ {
+ printf ("t=%x expected %x\n", q, h);
+ return 1;
+ }
+
+ return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (working copy)
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ix;
+ int ondev = 0;
+ int t = 0, h = 0;
+
+#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
+ {
+#pragma acc loop worker reduction(+:t)
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ val = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ t += val;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+ if(ondev)
+ {
+ int g = 0;
+ int w = ix % 32;
+ int v = 0;
+
+ val = (g << 16) | (w << 8) | v;
+ }
+ h += val;
+ }
+ if (t != h)
+ {
+ printf ("t=%x expected %x\n", t, h);
+ return 1;
+ }
+
+ return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ix;
+ int ondev = 0;
+ int q = 0, h = 0;
+
+#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev)
+ {
+ int t = q;
+
+#pragma acc loop worker reduction(+:t)
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ val = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ t += val;
+ }
+ q = t;
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+ if(ondev)
+ {
+ int g = 0;
+ int w = ix % 32;
+ int v = 0;
+
+ val = (g << 16) | (w << 8) | v;
+ }
+ h += val;
+ }
+ if (q != h)
+ {
+ printf ("t=%x expected %x\n", q, h);
+ return 1;
+ }
+
+ return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (working copy)
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ix;
+ int ondev = 0;
+ int t = 0, h = 0;
+
+#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
+ {
+#pragma acc loop worker vector reduction (+:t)
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ val = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ t += val;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int val = ix;
+ if(ondev)
+ {
+ int g = 0;
+ int w = (ix / 32) % 32;
+ int v = ix % 32;
+
+ val = (g << 16) | (w << 8) | v;
+ }
+ h += val;
+ }
+ if (t != h)
+ {
+ printf ("t=%x expected %x\n", t, h);
+ return 1;
+ }
+
+ return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
===================================================================
--- libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 (revision 229667)
+++ libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 (working copy)
@@ -21,12 +21,15 @@ end program reduction
subroutine redsub(sum, n, c)
integer :: sum, n, c
- sum = 0
+ integer :: s
+ s = 0
- !$acc parallel vector_length(n) copyin (n, c) num_gangs(1)
- !$acc loop reduction(+:sum)
+ !$acc parallel vector_length(32) copyin (n, c) copy (s) num_gangs(1)
+ !$acc loop reduction(+:s)
do i = 1, n
- sum = sum + c
+ s = s + c
end do
!$acc end parallel
+
+ sum = s
end subroutine redsub
Index: gcc/testsuite/gfortran.dg/goacc/reduction-2.f95
===================================================================
--- gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 (revision 229667)
+++ gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 (working copy)
@@ -1,21 +0,0 @@
-! { dg-do compile }
-
-program reduction
- integer, parameter :: n = 40, c = 10
- integer :: i, sum
-
- call redsub (sum, n, c)
-end program reduction
-
-subroutine redsub(sum, n, c)
- integer :: sum, n, c
-
- sum = 0
-
- !$acc parallel vector_length(n) copyin (n, c)
- !$acc loop reduction(+:sum)
- do i = 1, n
- sum = sum + c
- end do
- !$acc end parallel
-end subroutine redsub
next prev parent reply other threads:[~2015-11-02 16:38 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-11-02 16:10 [0/3] " Nathan Sidwell
2015-10-18 23:20 ` [gomp4] fortran testcase Nathan Sidwell
2015-11-02 16:18 ` [1/3] OpenACC reductions Nathan Sidwell
2015-11-03 15:46 ` Jakub Jelinek
2015-11-03 16:02 ` Nathan Sidwell
2015-11-04 10:31 ` Jakub Jelinek
2015-11-04 13:58 ` Nathan Sidwell
2015-11-04 14:08 ` Jakub Jelinek
2015-11-04 9:59 ` Jakub Jelinek
2015-11-06 10:47 ` [gomp4] " Thomas Schwinge
2016-01-07 3:55 ` [gomp4] private reductions Cesar Philippidis
2016-01-07 16:53 ` Cesar Philippidis
2016-01-09 1:14 ` Cesar Philippidis
2016-01-11 12:10 ` Thomas Schwinge
2016-01-11 14:55 ` Cesar Philippidis
2021-08-09 11:37 ` [1/3] OpenACC reductions Thomas Schwinge
2015-11-02 16:35 ` [2/3] " Nathan Sidwell
2015-11-04 10:01 ` Jakub Jelinek
2015-11-04 13:57 ` Nathan Sidwell
2015-11-04 13:27 ` Bernd Schmidt
2015-11-04 14:09 ` Nathan Sidwell
2015-11-04 16:59 ` Nathan Sidwell
2015-11-06 10:48 ` [gomp4] " Thomas Schwinge
2015-11-02 16:38 ` Nathan Sidwell [this message]
2015-11-04 10:03 ` [3/3] " Jakub Jelinek
2015-11-06 10:49 ` [gomp4] " Thomas Schwinge
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56379197.8020309@acm.org \
--to=nathan@acm.org \
--cc=cesar_philippidis@mentor.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=jakub@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).