public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Julian Brown <julian@codesourcery.com>
To: <gcc-patches@gcc.gnu.org>
Cc: Andrew Stubbs <andrew_stubbs@mentor.com>
Subject: [PATCH 5/6] [og9] Reference reduction localization
Date: Thu, 05 Sep 2019 01:47:00 -0000	[thread overview]
Message-ID: <a86b272418e2650df15b17bfcf52ad1189e5105c.1567644180.git.julian@codesourcery.com> (raw)
In-Reply-To: <cover.1567644180.git.julian@codesourcery.com>

This is a version of Cesar's patch to rewrite OpenACC reference reductions
(e.g. Fortran function arguments) to use temporary scalar variables. This
is necessary at present to avoid stack-slot clashes between multiple
workers on AMD GCN.

Julian

ChangeLog

	gcc/
	* gimplify.c (privatize_reduction): New struct.
	(localize_reductions_r, localize_reductions): New functions.
	(gimplify_omp_for): Call localize_reductions.
	(gimplify_omp_workshare): Likewise.
	* omp-low.c (lower_oacc_reductions): Handle localized reductions.
	Create fewer temp vars.
	* tree-core.h (omp_clause_code): Add OMP_CLAUSE_REDUCTION_PRIVATE_DECL
	documentation.
	* tree.c (omp_clause_num_ops): Bump number of ops for
	OMP_CLAUSE_REDUCTION to 6.
	(walk_tree_1): Adjust accordingly.
	* tree.h (OMP_CLAUSE_REDUCTION_PRIVATE_DECL): Add macro.
---
 gcc/ChangeLog.openacc |  16 +++++++
 gcc/gimplify.c        | 102 ++++++++++++++++++++++++++++++++++++++++++
 gcc/omp-low.c         |  47 ++++++-------------
 gcc/tree-core.h       |   4 +-
 gcc/tree.c            |  11 +++--
 gcc/tree.h            |   2 +
 6 files changed, 145 insertions(+), 37 deletions(-)

diff --git a/gcc/ChangeLog.openacc b/gcc/ChangeLog.openacc
index 0d068ac8ae2..2b7f616810d 100644
--- a/gcc/ChangeLog.openacc
+++ b/gcc/ChangeLog.openacc
@@ -1,3 +1,19 @@
+2019-09-05  Cesar Philippidis  <cesar@codesourcery.com>
+	    Julian Brown  <julian@codesourcery.com>
+
+	* gimplify.c (privatize_reduction): New struct.
+	(localize_reductions_r, localize_reductions): New functions.
+	(gimplify_omp_for): Call localize_reductions.
+	(gimplify_omp_workshare): Likewise.
+	* omp-low.c (lower_oacc_reductions): Handle localized reductions.
+	Create fewer temp vars.
+	* tree-core.h (omp_clause_code): Add OMP_CLAUSE_REDUCTION_PRIVATE_DECL
+	documentation.
+	* tree.c (omp_clause_num_ops): Bump number of ops for
+	OMP_CLAUSE_REDUCTION to 6.
+	(walk_tree_1): Adjust accordingly.
+	* tree.h (OMP_CLAUSE_REDUCTION_PRIVATE_DECL): Add macro.
+
 2019-09-05  Julian Brown  <julian@codesourcery.com>
 
 	* config/gcn/gcn-protos.h (gcn_goacc_adjust_propagation_record): Rename
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 58142c9eb90..685db1763e0 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -234,6 +234,11 @@ struct gimplify_omp_ctx
   hash_map<tree, oacc_array_mapping_info> *decl_data_clause;
 };
 
+struct privatize_reduction
+{
+  tree ref_var, local_var;
+};
+
 static struct gimplify_ctx *gimplify_ctxp;
 static struct gimplify_omp_ctx *gimplify_omp_ctxp;
 
@@ -10804,6 +10809,80 @@ find_combined_omp_for (tree *tp, int *walk_subtrees, void *data)
   return NULL_TREE;
 }
 
+/* Helper function for localize_reductions.  Replace all uses of REF_VAR with
+   LOCAL_VAR.  */
+
+static tree
+localize_reductions_r (tree *tp, int *walk_subtrees, void *data)
+{
+  enum tree_code tc = TREE_CODE (*tp);
+  struct privatize_reduction *pr = (struct privatize_reduction *) data;
+
+  if (TYPE_P (*tp))
+    *walk_subtrees = 0;
+
+  switch (tc)
+    {
+    case INDIRECT_REF:
+    case MEM_REF:
+      if (TREE_OPERAND (*tp, 0) == pr->ref_var)
+	*tp = pr->local_var;
+
+      *walk_subtrees = 0;
+      break;
+
+    case VAR_DECL:
+    case PARM_DECL:
+    case RESULT_DECL:
+      if (*tp == pr->ref_var)
+	*tp = pr->local_var;
+
+      *walk_subtrees = 0;
+      break;
+
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+/* OpenACC worker and vector loop state propagation requires reductions
+   to be inside local variables.  This function replaces all reference-type
+   reductions variables associated with the loop with a local copy.  It is
+   also used to create private copies of reduction variables for those
+   which are not associated with acc loops.  */
+
+static void
+localize_reductions (tree clauses, tree body)
+{
+  tree c, var, type, new_var;
+  struct privatize_reduction pr;
+
+  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
+      {
+	var = OMP_CLAUSE_DECL (c);
+
+	if (!lang_hooks.decls.omp_privatize_by_reference (var))
+	  {
+	    OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = NULL;
+	    continue;
+	  }
+
+	type = TREE_TYPE (TREE_TYPE (var));
+	new_var = create_tmp_var (type, IDENTIFIER_POINTER (DECL_NAME (var)));
+
+	pr.ref_var = var;
+	pr.local_var = new_var;
+
+	walk_tree (&body, localize_reductions_r, &pr, NULL);
+
+	OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = new_var;
+      }
+}
+
+
 /* Gimplify the gross structure of an OMP_FOR statement.  */
 
 static enum gimplify_status
@@ -10989,6 +11068,23 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       gcc_unreachable ();
     }
 
+  if (ort == ORT_ACC)
+    {
+      gimplify_omp_ctx *outer = gimplify_omp_ctxp;
+
+      while (outer
+	     && outer->region_type != ORT_ACC_PARALLEL
+	     && outer->region_type != ORT_ACC_KERNELS)
+	outer = outer->outer_context;
+
+      /* FIXME: Reductions only work in parallel regions at present.  We avoid
+	 doing the reduction localization transformation in kernels regions
+	 here, because the code to remove reductions in kernels regions cannot
+	 handle that.  */
+      if (outer && outer->region_type == ORT_ACC_PARALLEL)
+	localize_reductions (OMP_FOR_CLAUSES (*expr_p), OMP_FOR_BODY (*expr_p));
+    }
+
   /* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear
      clause for the IV.  */
   if (ort == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1)
@@ -12154,6 +12250,12 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
       || (ort & ORT_HOST_TEAMS) == ORT_HOST_TEAMS)
     {
       push_gimplify_context ();
+
+      /* FIXME: Reductions are not supported in kernels regions yet.  */
+      if (/*ort == ORT_ACC_KERNELS ||*/ ort == ORT_ACC_PARALLEL)
+        localize_reductions (OMP_TARGET_CLAUSES (*expr_p),
+			     OMP_TARGET_BODY (*expr_p));
+
       gimple *g = gimplify_and_return_first (OMP_BODY (expr), &body);
       if (gimple_code (g) == GIMPLE_BIND)
 	pop_gimplify_context (g);
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index fe911599142..4b21769a9a7 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -6129,9 +6129,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
     if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
       {
 	tree orig = OMP_CLAUSE_DECL (c);
-	tree var = maybe_lookup_decl (orig, ctx);
+	tree var;
 	tree ref_to_res = NULL_TREE;
-	tree incoming, outgoing, v1, v2, v3;
+	tree incoming, outgoing;
 	bool is_private = false;
 	bool is_fpp = false;
 
@@ -6144,6 +6144,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 	  rcode = BIT_IOR_EXPR;
 	tree op = build_int_cst (unsigned_type_node, rcode);
 
+	var = OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c);
+	if (!var)
+	  var = maybe_lookup_decl (orig, ctx);
 	if (!var)
 	  var = orig;
 
@@ -6255,36 +6258,13 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 	if (!ref_to_res)
 	  ref_to_res = integer_zero_node;
 
-	if (omp_is_reference (orig))
+	if (omp_is_reference (outgoing))
 	  {
-	    tree type = TREE_TYPE (var);
-	    const char *id = IDENTIFIER_POINTER (DECL_NAME (var));
-
-	    if (!inner)
-	      {
-		tree x = create_tmp_var (TREE_TYPE (type), id);
-		gimplify_assign (var, build_fold_addr_expr (x), fork_seq);
-	      }
-
-	    v1 = create_tmp_var (type, id);
-	    v2 = create_tmp_var (type, id);
-	    v3 = create_tmp_var (type, id);
-
-	    gimplify_assign (v1, var, fork_seq);
-	    gimplify_assign (v2, var, fork_seq);
-	    gimplify_assign (v3, var, fork_seq);
-
-	    var = build_simple_mem_ref (var);
-	    v1 = build_simple_mem_ref (v1);
-	    v2 = build_simple_mem_ref (v2);
-	    v3 = build_simple_mem_ref (v3);
 	    outgoing = build_simple_mem_ref (outgoing);
 
 	    if (!TREE_CONSTANT (incoming))
 	      incoming = build_simple_mem_ref (incoming);
 	  }
-	else
-	  v1 = v2 = v3 = var;
 
 	/* Determine position in reduction buffer, which may be used
 	   by target.  The parser has ensured that this is not a
@@ -6317,20 +6297,21 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 	  = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
 					  TREE_TYPE (var), 6, init_code,
 					  unshare_expr (ref_to_res),
-					  v1, level, op, off);
+					  var, level, op, off);
 	tree fini_call
 	  = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
 					  TREE_TYPE (var), 6, fini_code,
 					  unshare_expr (ref_to_res),
-					  v2, level, op, off);
+					  var, level, op, off);
 	tree teardown_call
 	  = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
-					  TREE_TYPE (var), 6, teardown_code,
-					  ref_to_res, v3, level, op, off);
+					  TREE_TYPE (var), 6,
+					  teardown_code, ref_to_res, var,
+					  level, op, off);
 
-	gimplify_assign (v1, setup_call, &before_fork);
-	gimplify_assign (v2, init_call, &after_fork);
-	gimplify_assign (v3, fini_call, &before_join);
+	gimplify_assign (var, setup_call, &before_fork);
+	gimplify_assign (var, init_call, &after_fork);
+	gimplify_assign (var, fini_call, &before_join);
 	gimplify_assign (outgoing, teardown_call, &after_join);
       }
 
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index 5bcecc160bc..94a2582d49c 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -255,7 +255,9 @@ enum omp_clause_code {
                 placeholder used in OMP_CLAUSE_REDUCTION_{INIT,MERGE}.
      Operand 4: OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER: Another dummy
 		VAR_DECL placeholder, used like the above for C/C++ array
-		reductions.  */
+		reductions.
+     Operand 5: OMP_CLAUSE_REDUCTION_PRIVATE_DECL: A private VAR_DECL of
+                the original DECL associated with the reduction clause.  */
   OMP_CLAUSE_REDUCTION,
 
   /* OpenMP clause: task_reduction (operator:variable_list).  */
diff --git a/gcc/tree.c b/gcc/tree.c
index 7c891dcbf91..089e2418747 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -283,7 +283,7 @@ unsigned const char omp_clause_num_ops[] =
   1, /* OMP_CLAUSE_SHARED  */
   1, /* OMP_CLAUSE_FIRSTPRIVATE  */
   2, /* OMP_CLAUSE_LASTPRIVATE  */
-  5, /* OMP_CLAUSE_REDUCTION  */
+  6, /* OMP_CLAUSE_REDUCTION  */
   5, /* OMP_CLAUSE_TASK_REDUCTION  */
   5, /* OMP_CLAUSE_IN_REDUCTION  */
   1, /* OMP_CLAUSE_COPYIN  */
@@ -12361,11 +12361,16 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data,
 	  WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
 
 	case OMP_CLAUSE_REDUCTION:
+	  {
+	    for (int i = 0; i < 6; i++)
+	      WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i));
+	    WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+	  }
+
 	case OMP_CLAUSE_TASK_REDUCTION:
 	case OMP_CLAUSE_IN_REDUCTION:
 	  {
-	    int i;
-	    for (i = 0; i < 5; i++)
+	    for (int i = 0; i < 5; i++)
 	      WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i));
 	    WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
 	  }
diff --git a/gcc/tree.h b/gcc/tree.h
index 2f2f109451a..ece8a6496b0 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1629,6 +1629,8 @@ class auto_suppress_location_wrappers
 #define OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER(NODE) \
   OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (NODE, OMP_CLAUSE_REDUCTION, \
 					      OMP_CLAUSE_IN_REDUCTION), 4)
+#define OMP_CLAUSE_REDUCTION_PRIVATE_DECL(NODE) \
+  OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_REDUCTION), 5)
 
 /* True if a REDUCTION clause may reference the original list item (omp_orig)
    in its OMP_CLAUSE_REDUCTION_{,GIMPLE_}INIT.  */
-- 
2.22.0

  parent reply	other threads:[~2019-09-05  1:47 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-05  1:46 [PATCH 0/6] [og9] OpenACC worker partitioning in middle end (AMD GCN) Julian Brown
2019-09-05  1:46 ` [PATCH 1/6] [og9] Target-dependent gang-private variable decl rewriting Julian Brown
2019-09-05  1:46 ` [PATCH 4/6] [og9] Fix up tests for oaccdevlow pass splitting Julian Brown
2019-09-05  1:46 ` [PATCH 3/6] [og9] AMD GCN adjustments for middle-end worker partitioning Julian Brown
2019-09-05  1:46 ` [PATCH 2/6] [og9] OpenACC middle-end worker-partitioning support Julian Brown
2019-09-05 13:52   ` Chung-Lin Tang
2019-09-05 15:01     ` Julian Brown
2019-09-06 12:32       ` Julian Brown
2019-09-05  1:47 ` Julian Brown [this message]
2019-09-05  1:47 ` [PATCH 6/6] [og9] Enable worker partitioning for AMD GCN Julian Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a86b272418e2650df15b17bfcf52ad1189e5105c.1567644180.git.julian@codesourcery.com \
    --to=julian@codesourcery.com \
    --cc=andrew_stubbs@mentor.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).