Re: [3/3] OpenACC reductions

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Nathan Sidwell <nathan@acm.org>
To: Jakub Jelinek <jakub@redhat.com>, GCC Patches <gcc-patches@gcc.gnu.org>
Cc: Cesar Philippidis <cesar_philippidis@mentor.com>
Subject: Re: [3/3] OpenACC reductions
Date: Mon, 02 Nov 2015 16:38:00 -0000	[thread overview]
Message-ID: <56379197.8020309@acm.org> (raw)
In-Reply-To: <56378AE9.2020905@acm.org>

[-- Attachment #1: Type: text/plain, Size: 663 bytes --]

This patch are the initial set of tests.  The libgomp tests use an idiom of 
summing thread identifiers and then checking the expected set of threads 
participated.  They are all derived from the loop tests I recently added for the 
execution model itself.

The fortran test was duplicated in both the gfortran testsuite and the libgomp 
testsuite.   I deleted it from the former.  It was slightly bogus as it asked 
for a vector-length of 40, and appeared to be working by accident by not 
actually partitioning the loop.  I fixed that up and reworked it to avoid 
needing a reduction on a reference variable.  Reference handling will be a later 
patch.

nathan

[-- Attachment #2: 03-trunk-reductions-tests-1102.patch --]
[-- Type: text/x-patch, Size: 11411 bytes --]

2015-11-02  Nathan Sidwell  <nathan@codesourcery.com>

	libgomp/
	* libgomp.oacc-c-c++-common/loop-red-g-1.c: New.
	* libgomp.oacc-c-c++-common/loop-red-gwv-1.c: New.
	* libgomp.oacc-c-c++-common/loop-red-v-1.c: New.
	* libgomp.oacc-c-c++-common/loop-red-v-2.c: New.
	* libgomp.oacc-c-c++-common/loop-red-w-1.c: New.
	* libgomp.oacc-c-c++-common/loop-red-w-2.c: New.
	* libgomp.oacc-c-c++-common/loop-red-wv-1.c: New.
	* libgomp.oacc-fortran/reduction-5.f90: Avoid reference var.

	gcc/testsuite/
	* gfortran.dg/goacc/reduction-2.f95: Delete.

Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c	(working copy)
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int t = 0, h = 0;
+  
+#pragma acc parallel num_gangs(32) vector_length(32) copy(t) copy(ondev)
+  {
+#pragma acc loop gang  reduction (+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if(ondev)
+	{
+	  int g = ix / ((N + 31) / 32);
+	  int w = 0;
+	  int v = 0;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+  if (t != h)
+    {
+      printf ("t=%x expected %x\n", t, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c	(working copy)
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int t = 0, h = 0;
+  
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(t) copy(ondev)
+  {
+#pragma acc loop gang worker vector reduction(+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if(ondev)
+	{
+	  int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+	  
+	  int g = ix / (chunk_size * 32 * 32);
+	  int w = ix / 32 % 32;
+	  int v = ix % 32;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+  if (t != h)
+    {
+      printf ("t=%x expected %x\n", t, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c	(working copy)
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int t = 0,  h = 0;
+
+#pragma acc parallel vector_length(32) copy(t) copy(ondev)
+  {
+#pragma acc loop vector reduction (+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if (ondev)
+	{
+	  int g = 0;
+	  int w = 0;
+	  int v = ix % 32;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+
+  if (t != h)
+    {
+      printf ("t=%x expected %x\n", t, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c	(working copy)
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int q = 0,  h = 0;
+
+#pragma acc parallel vector_length(32) copy(q) copy(ondev)
+  {
+    int t = q;
+    
+#pragma acc loop vector reduction (+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+    q = t;
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if (ondev)
+	{
+	  int g = 0;
+	  int w = 0;
+	  int v = ix % 32;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+
+  if (q != h)
+    {
+      printf ("t=%x expected %x\n", q, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c	(working copy)
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int t = 0,  h = 0;
+
+#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
+  {
+#pragma acc loop worker reduction(+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if(ondev)
+	{
+	  int g = 0;
+	  int w = ix % 32;
+	  int v = 0;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+  if (t != h)
+    {
+      printf ("t=%x expected %x\n", t, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c	(working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int q = 0,  h = 0;
+
+#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev)
+  {
+    int t = q;
+    
+#pragma acc loop worker reduction(+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+    q = t;
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if(ondev)
+	{
+	  int g = 0;
+	  int w = ix % 32;
+	  int v = 0;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+  if (q != h)
+    {
+      printf ("t=%x expected %x\n", q, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c	(working copy)
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int t = 0, h = 0;
+  
+#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
+  {
+#pragma acc loop worker vector reduction (+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if(ondev)
+	{
+	  int g = 0;
+	  int w = (ix / 32) % 32;
+	  int v = ix % 32;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+  if (t != h)
+    {
+      printf ("t=%x expected %x\n", t, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
===================================================================
--- libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90	(revision 229667)
+++ libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90	(working copy)
@@ -21,12 +21,15 @@ end program reduction
 subroutine redsub(sum, n, c)
   integer :: sum, n, c
 
-  sum = 0
+  integer :: s
+  s = 0
 
-  !$acc parallel vector_length(n) copyin (n, c) num_gangs(1)
-  !$acc loop reduction(+:sum)
+  !$acc parallel vector_length(32) copyin (n, c) copy (s) num_gangs(1)
+  !$acc loop reduction(+:s)
   do i = 1, n
-     sum = sum + c
+     s = s + c
   end do
   !$acc end parallel
+
+  sum = s
 end subroutine redsub
Index: gcc/testsuite/gfortran.dg/goacc/reduction-2.f95
===================================================================
--- gcc/testsuite/gfortran.dg/goacc/reduction-2.f95	(revision 229667)
+++ gcc/testsuite/gfortran.dg/goacc/reduction-2.f95	(working copy)
@@ -1,21 +0,0 @@
-! { dg-do compile }
-
-program reduction
-  integer, parameter    :: n = 40, c = 10
-  integer               :: i, sum
-
-  call redsub (sum, n, c)
-end program reduction
-
-subroutine redsub(sum, n, c)
-  integer :: sum, n, c
-
-  sum = 0
-
-  !$acc parallel vector_length(n) copyin (n, c)
-  !$acc loop reduction(+:sum)
-  do i = 1, n
-     sum = sum + c
-  end do
-  !$acc end parallel
-end subroutine redsub

next prev parent reply	other threads:[~2015-11-02 16:38 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-02 16:10 [0/3] " Nathan Sidwell
2015-10-18 23:20 ` [gomp4] fortran testcase Nathan Sidwell
2015-11-02 16:18 ` [1/3] OpenACC reductions Nathan Sidwell
2015-11-03 15:46   ` Jakub Jelinek
2015-11-03 16:02     ` Nathan Sidwell
2015-11-04 10:31       ` Jakub Jelinek
2015-11-04 13:58         ` Nathan Sidwell
2015-11-04 14:08           ` Jakub Jelinek
2015-11-04  9:59   ` Jakub Jelinek
2015-11-06 10:47   ` [gomp4] " Thomas Schwinge
2016-01-07  3:55     ` [gomp4] private reductions Cesar Philippidis
2016-01-07 16:53       ` Cesar Philippidis
2016-01-09  1:14       ` Cesar Philippidis
2016-01-11 12:10       ` Thomas Schwinge
2016-01-11 14:55         ` Cesar Philippidis
2021-08-09 11:37   ` [1/3] OpenACC reductions Thomas Schwinge
2015-11-02 16:35 ` [2/3] " Nathan Sidwell
2015-11-04 10:01   ` Jakub Jelinek
2015-11-04 13:57     ` Nathan Sidwell
2015-11-04 13:27   ` Bernd Schmidt
2015-11-04 14:09     ` Nathan Sidwell
2015-11-04 16:59     ` Nathan Sidwell
2015-11-06 10:48       ` [gomp4] " Thomas Schwinge
2015-11-02 16:38 ` Nathan Sidwell [this message]
2015-11-04 10:03   ` [3/3] " Jakub Jelinek
2015-11-06 10:49   ` [gomp4] " Thomas Schwinge

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56379197.8020309@acm.org \
    --to=nathan@acm.org \
    --cc=cesar_philippidis@mentor.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jakub@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).