public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [OpenACC] Enable firstprivate OpenACC reductions
@ 2018-09-05 20:08 Cesar Philippidis
  2018-12-04 12:25 ` Jakub Jelinek
  0 siblings, 1 reply; 2+ messages in thread
From: Cesar Philippidis @ 2018-09-05 20:08 UTC (permalink / raw)
  To: gcc-patches, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 828 bytes --]

This patch teaches the gimplifier how to pass certain OpenACC reduction
variables as firstprivate, and not with an implicit copy directive. This
is matches the default behavior for the implicit data mappings of scalar
variables inside OpenACC parallel regions. It should be noted that the
gimplifier will still implicitly map reduction variables on loops
immediately enclosed inside a parallel regions, like so

  #pragma acc parallel
  #pragma acc loop reduction(+:sum)

as copy. This change only impacts reductions variables inside nested acc
loops like

  #pragma acc parallel
  #pragma acc loop
  for (...)
  {
    #pragma acc loop reduction(+:s2)

Here s2 will be transferred into the accelerator as firstprivate instead
of copy.

Is this OK for trunk? I regtested and bootstrapped for x86_64 with nvptx
offloading.

Cesar

[-- Attachment #2: 0001-OpenACC-Enable-firstprivate-OpenACC-reductions.patch --]
[-- Type: text/x-patch, Size: 6348 bytes --]

[OpenACC] Enable firstprivate OpenACC reductions

2018-XX-YY  Cesar Philippidis  <cesar@codesourcery.com>
	    Chung-Lin Tang  <cltang@codesourcery.com>

	gcc/
	* gimplify.c (omp_add_variable): Enable firstprivate reduction
	variables.

	gcc/testsuite/
	* c-c++-common/goacc/reduction-8.c: New test.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/privatize-reduction-1.c: New
	test.
	* testsuite/libgomp.oacc-c-c++-common/privatize-reduction-2.c: New
	test.


diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index dbd0f0ebd0c..4d954e20788 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -6823,20 +6823,27 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
   else
     splay_tree_insert (ctx->variables, (splay_tree_key)decl, flags);
 
-  /* For reductions clauses in OpenACC loop directives, by default create a
-     copy clause on the enclosing parallel construct for carrying back the
-     results.  */
+  /* For OpenACC loop directives, when a reduction clause is placed on
+     the outermost acc loop within an acc parallel or kernels
+     construct, it must have an implied copy data mapping. E.g.
+
+       #pragma acc parallel
+	 {
+	   #pragma acc loop reduction (+:sum)
+
+     a copy clause for sum should be added on the enclosing parallel
+     construct for carrying back the results.  */
   if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION))
     {
       struct gimplify_omp_ctx *outer_ctx = ctx->outer_context;
-      while (outer_ctx)
+      if (outer_ctx)
 	{
 	  n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl);
 	  if (n != NULL)
 	    {
 	      /* Ignore local variables and explicitly declared clauses.  */
 	      if (n->value & (GOVD_LOCAL | GOVD_EXPLICIT))
-		break;
+		;
 	      else if (outer_ctx->region_type == ORT_ACC_KERNELS)
 		{
 		  /* According to the OpenACC spec, such a reduction variable
@@ -6856,9 +6863,7 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
 	    {
 	      splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl,
 				 GOVD_MAP | GOVD_SEEN);
-	      break;
 	    }
-	  outer_ctx = outer_ctx->outer_context;
 	}
     }
 }
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-8.c b/gcc/testsuite/c-c++-common/goacc/reduction-8.c
new file mode 100644
index 00000000000..8a0283f4ac3
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-8.c
@@ -0,0 +1,94 @@
+/* { dg-additional-options "-fdump-tree-gimple" } */
+
+#define n 1000
+
+int
+main(void)
+{
+  int i, j;
+  int result, array[n];
+
+#pragma acc parallel loop reduction (+:result)
+  for (i = 0; i < n; i++)
+    result ++;
+
+#pragma acc parallel
+#pragma acc loop reduction (+:result)
+  for (i = 0; i < n; i++)
+    result ++;
+
+#pragma acc parallel
+#pragma acc loop
+  for (i = 0; i < n; i++)
+    {
+      result = i;
+
+#pragma acc loop reduction(+:result)
+      for (j = 0; j < n; j++)
+	result ++;
+
+      array[i] = result;
+    }
+
+#pragma acc parallel
+#pragma acc loop
+  for (i = 0; i < n; i++)
+    {
+      result = i;
+
+#pragma acc loop worker vector reduction(+:result)
+      for (j = 0; j < n; j++)
+	result ++;
+
+      array[i] = result;
+    }
+
+#pragma acc parallel
+#pragma acc loop // { dg-warning "insufficient partitioning" }
+  for (i = 0; i < n; i++)
+    {
+      result = i;
+
+#pragma acc loop gang reduction(+:result)
+      for (j = 0; j < n; j++)
+	result ++;
+
+      array[i] = result;
+    }
+
+#pragma acc parallel copy(result)
+#pragma acc loop // { dg-warning "insufficient partitioning" }
+  for (i = 0; i < n; i++)
+    {
+      result = i;
+
+#pragma acc loop gang reduction(+:result)
+      for (j = 0; j < n; j++)
+	result ++;
+
+      array[i] = result;
+    }
+  
+#pragma acc kernels
+#pragma acc loop
+  for (i = 0; i < n; i++)
+    {
+      result = i;
+
+#pragma acc loop reduction(+:result)
+      for (j = 0; j < n; j++)
+	result ++;
+
+      array[i] = result;
+    }
+
+  return 0;
+}
+
+/* Check that default copy maps are generated for loop reductions.  */
+/* { dg-final { scan-tree-dump-times "reduction..:result. map.tofrom:result .len: 4.." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "oacc_parallel map.tofrom:result .len: 4.." 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map.tofrom:array .len: 4000.. firstprivate.result." 3 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map.tofrom:result .len: 4.. map.tofrom:array .len: 4000.." 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map.tofrom:array .len: 4000.. map.force_tofrom:result .len: 4.." 1 "gimple" } } */
+
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-1.c
new file mode 100644
index 00000000000..206e66fec79
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-1.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main (int argc, char *argv[])
+{
+#define N 100
+  int n = N;
+  int i, j, tmp;
+  int input[N*N], output[N], houtput[N];
+
+  for (i = 0; i < n * n; i++)
+    input[i] = i;
+
+  for (i = 0; i < n; i++)
+    {
+      tmp = 0;
+      for (j = 0; j < n; j++)
+	tmp += input[i * n + j];
+      houtput[i] = tmp;
+    }
+
+  #pragma acc parallel loop gang
+  for (i = 0; i < n; i++)
+    {
+      tmp = 0;
+
+      #pragma acc loop worker reduction(+:tmp)
+      for (j = 0; j < n; j++)
+	tmp += input[i * n + j];
+
+      output[i] = tmp;
+    }
+
+  /* Test if every worker-level reduction had correct private result.  */
+  for (i = 0; i < n; i++)
+    if (houtput[i] != output[i])
+      abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-2.c
new file mode 100644
index 00000000000..0c317dcf8a6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/privatize-reduction-2.c
@@ -0,0 +1,23 @@
+#include <assert.h>
+
+int
+main ()
+{
+  const int n = 1000;
+  int i, j, temp, a[n];
+
+#pragma acc parallel loop
+  for (i = 0; i < n; i++)
+    {
+      temp = i;
+#pragma acc loop reduction (+:temp)
+      for (j = 0; j < n; j++)
+	temp ++;
+      a[i] = temp;
+    }
+
+  for (i = 0; i < n; i++)
+    assert (a[i] == i+n);
+
+  return 0;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [OpenACC] Enable firstprivate OpenACC reductions
  2018-09-05 20:08 [OpenACC] Enable firstprivate OpenACC reductions Cesar Philippidis
@ 2018-12-04 12:25 ` Jakub Jelinek
  0 siblings, 0 replies; 2+ messages in thread
From: Jakub Jelinek @ 2018-12-04 12:25 UTC (permalink / raw)
  To: Cesar Philippidis, Thomas Schwinge; +Cc: gcc-patches

On Wed, Sep 05, 2018 at 01:08:21PM -0700, Cesar Philippidis wrote:
> 2018-XX-YY  Cesar Philippidis  <cesar@codesourcery.com>
> 	    Chung-Lin Tang  <cltang@codesourcery.com>
> 
> 	gcc/
> 	* gimplify.c (omp_add_variable): Enable firstprivate reduction
> 	variables.
> 
> 	gcc/testsuite/
> 	* c-c++-common/goacc/reduction-8.c: New test.
> 
> 	libgomp/
> 	* testsuite/libgomp.oacc-c-c++-common/privatize-reduction-1.c: New
> 	test.
> 	* testsuite/libgomp.oacc-c-c++-common/privatize-reduction-2.c: New
> 	test.

This is all in ORT_ACC guarded code and I don't see anything wrong with it,
so if Thomas is ok with it, it is ok for trunk.

> --- a/gcc/gimplify.c
> +++ b/gcc/gimplify.c
> @@ -6823,20 +6823,27 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
>    else
>      splay_tree_insert (ctx->variables, (splay_tree_key)decl, flags);
>  
> -  /* For reductions clauses in OpenACC loop directives, by default create a
> -     copy clause on the enclosing parallel construct for carrying back the
> -     results.  */
> +  /* For OpenACC loop directives, when a reduction clause is placed on
> +     the outermost acc loop within an acc parallel or kernels
> +     construct, it must have an implied copy data mapping. E.g.
> +
> +       #pragma acc parallel
> +	 {
> +	   #pragma acc loop reduction (+:sum)
> +
> +     a copy clause for sum should be added on the enclosing parallel
> +     construct for carrying back the results.  */
>    if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION))
>      {
>        struct gimplify_omp_ctx *outer_ctx = ctx->outer_context;
> -      while (outer_ctx)
> +      if (outer_ctx)
>  	{
>  	  n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl);
>  	  if (n != NULL)
>  	    {
>  	      /* Ignore local variables and explicitly declared clauses.  */
>  	      if (n->value & (GOVD_LOCAL | GOVD_EXPLICIT))
> -		break;
> +		;
>  	      else if (outer_ctx->region_type == ORT_ACC_KERNELS)
>  		{
>  		  /* According to the OpenACC spec, such a reduction variable
> @@ -6856,9 +6863,7 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
>  	    {
>  	      splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl,
>  				 GOVD_MAP | GOVD_SEEN);
> -	      break;
>  	    }
> -	  outer_ctx = outer_ctx->outer_context;
>  	}
>      }
>  }

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-12-04 12:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-05 20:08 [OpenACC] Enable firstprivate OpenACC reductions Cesar Philippidis
2018-12-04 12:25 ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).