From: Julian Brown <julian@codesourcery.com>
To: <gcc-patches@gcc.gnu.org>
Cc: Andrew Stubbs <andrew_stubbs@mentor.com>
Subject: [PATCH 1/6] [og9] Target-dependent gang-private variable decl rewriting
Date: Thu, 05 Sep 2019 01:46:00 -0000 [thread overview]
Message-ID: <2c432092fae99930879687f88f2e8e97d29c786d.1567644180.git.julian@codesourcery.com> (raw)
In-Reply-To: <cover.1567644180.git.julian@codesourcery.com>
This patch adds support for rewriting variables marked up with the "oacc
gangprivate" attributes in a target-dependent way in the oaccdevlow pass
of the offload compiler.
This behaviour is controlled by a new target hook,
TARGET_GOACC_ADJUST_GANGPRIVATE_DECL. This is conceptually similar to
the existing TARGET_GOACC_EXPAND_ACCEL_VAR hook, but that one works too
late in the compilation process for AMD GCN.
The patch to set the "oacc gangprivate" attribute was posted upstream here:
https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00749.html
A version of that is already present on the og9 branch.
Julian
ChangeLog
gcc/
* omp-offload.c (convert.h): Include.
(struct addr_expr_rewrite_info): Add struct.
(rewrite_addr_expr): New function.
(is_sync_builtin_call): New function.
(execute_oacc_device_lower): Support rewriting gang-private variables
using target hook, and fix up addr_expr nodes afterwards.
* target.def (adjust_gangprivate_decl): New target hook.
* doc/tm.texi.in (TARGET_GOACC_ADJUST_GANGPRIVATE_DECL): Document new
target hook.
* doc/tm.texi: Regenerate.
---
gcc/ChangeLog.openacc | 13 +++++
gcc/doc/tm.texi | 4 ++
gcc/doc/tm.texi.in | 2 +
gcc/omp-offload.c | 133 ++++++++++++++++++++++++++++++++++++++++++
gcc/target.def | 6 ++
5 files changed, 158 insertions(+)
diff --git a/gcc/ChangeLog.openacc b/gcc/ChangeLog.openacc
index a22f07c817c..b1c627b394c 100644
--- a/gcc/ChangeLog.openacc
+++ b/gcc/ChangeLog.openacc
@@ -1,3 +1,16 @@
+2019-09-05 Julian Brown <julian@codesourcery.com>
+
+ * omp-offload.c (convert.h): Include.
+ (struct addr_expr_rewrite_info): Add struct.
+ (rewrite_addr_expr): New function.
+ (is_sync_builtin_call): New function.
+ (execute_oacc_device_lower): Support rewriting gang-private variables
+ using target hook, and fix up addr_expr nodes afterwards.
+ * target.def (adjust_gangprivate_decl): New target hook.
+ * doc/tm.texi.in (TARGET_GOACC_ADJUST_GANGPRIVATE_DECL): Document new
+ target hook.
+ * doc/tm.texi: Regenerate.
+
2019-08-13 Julian Brown <julian@codesourcery.com>
* omp-oacc-kernels.c (add_wait): New function, split out of...
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 9b88498eb95..f3707c6abe3 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6162,6 +6162,10 @@ memories. A return value of NULL indicates that the target does not
handle this VAR_DECL, and normal RTL expanding is resumed.
@end deftypefn
+@deftypefn {Target Hook} void TARGET_GOACC_ADJUST_GANGPRIVATE_DECL (tree @var{var})
+Tweak variable declaration for a gang-private variable.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_GOACC_EXPLODE_ARGS (void)
Define this hook to TRUE if arguments to offload regions should be
exploded, i.e. passed as true arguments rather than in an argument array.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index c9c4341a35f..cebadf4a502 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4210,6 +4210,8 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_GOACC_EXPAND_ACCEL_VAR
+@hook TARGET_GOACC_ADJUST_GANGPRIVATE_DECL
+
@hook TARGET_GOACC_EXPLODE_ARGS
@node Anchored Addresses
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 1129b00511e..c94dc956d7e 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see
#include "stringpool.h"
#include "attribs.h"
#include "cfgloop.h"
+#include "convert.h"
/* Describe the OpenACC looping structure of a function. The entire
function is held in a 'NULL' loop. */
@@ -1570,6 +1571,78 @@ maybe_discard_oacc_function (tree decl)
return false;
}
+struct addr_expr_rewrite_info
+{
+ gimple *stmt;
+ hash_set<tree> *adjusted_vars;
+ bool avoid_pointer_conversion;
+ bool modified;
+};
+
+static tree
+rewrite_addr_expr (tree *tp, int *walk_subtrees, void *data)
+{
+ walk_stmt_info *wi = (walk_stmt_info *) data;
+ addr_expr_rewrite_info *info = (addr_expr_rewrite_info *) wi->info;
+
+ if (TREE_CODE (*tp) == ADDR_EXPR)
+ {
+ tree arg = TREE_OPERAND (*tp, 0);
+
+ if (info->adjusted_vars->contains (arg))
+ {
+ if (info->avoid_pointer_conversion)
+ {
+ *tp = build_fold_addr_expr (arg);
+ info->modified = true;
+ *walk_subtrees = 0;
+ }
+ else
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
+ tree repl = build_fold_addr_expr (arg);
+ gimple *stmt1
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
+ tree conv = convert_to_pointer (TREE_TYPE (*tp),
+ gimple_assign_lhs (stmt1));
+ gimple *stmt2
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (*tp)), conv);
+ gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
+ gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
+ *tp = gimple_assign_lhs (stmt2);
+ info->modified = true;
+ *walk_subtrees = 0;
+ }
+ }
+ }
+
+ return NULL_TREE;
+}
+
+/* Return TRUE if CALL is a call to a builtin atomic/sync operation. */
+
+static bool
+is_sync_builtin_call (gcall *call)
+{
+ tree callee = gimple_call_fndecl (call);
+
+ if (callee != NULL_TREE
+ && gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+ switch (DECL_FUNCTION_CODE (callee))
+ {
+#undef DEF_SYNC_BUILTIN
+#define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
+#include "sync-builtins.def"
+#undef DEF_SYNC_BUILTIN
+ return true;
+
+ default:
+ ;
+ }
+
+ return false;
+}
+
/* Main entry point for oacc transformations which run on the device
compiler after LTO, so we know what the target device is at this
point (including the host fallback). */
@@ -1815,6 +1888,66 @@ execute_oacc_device_lower ()
gsi_next (&gsi);
}
+ /* Make adjustments to gang-private local variables if required by the
+ target, e.g. forcing them into a particular address space. Afterwards,
+ ADDR_EXPR nodes which have adjusted variables as their argument need to
+ be modified in one of two ways:
+
+ 1. They can be recreated, making a pointer to the variable in the new
+ address space, or
+
+ 2. The address of the variable in the new address space can be taken,
+ converted to the default (original) address space, and the result of
+ that conversion subsituted in place of the original ADDR_EXPR node.
+
+ Which of these is done depends on the gimple statement being processed.
+ At present atomic operations and inline asms use (1), and everything else
+ uses (2). At least on AMD GCN, there are atomic operations that work
+ directly in the LDS address space. */
+
+ if (targetm.goacc.adjust_gangprivate_decl)
+ {
+ tree var;
+ unsigned i;
+ hash_set<tree> adjusted_vars;
+
+ FOR_EACH_LOCAL_DECL (cfun, i, var)
+ {
+ if (!VAR_P (var)
+ || !lookup_attribute ("oacc gangprivate", DECL_ATTRIBUTES (var)))
+ continue;
+
+ targetm.goacc.adjust_gangprivate_decl (var);
+ adjusted_vars.add (var);
+ }
+
+ FOR_ALL_BB_FN (bb, cfun)
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ walk_stmt_info wi;
+ addr_expr_rewrite_info info;
+
+ info.avoid_pointer_conversion
+ = (is_gimple_call (stmt)
+ && is_sync_builtin_call (as_a <gcall *> (stmt)))
+ || gimple_code (stmt) == GIMPLE_ASM;
+ info.stmt = stmt;
+ info.modified = false;
+ info.adjusted_vars = &adjusted_vars;
+
+ memset (&wi, 0, sizeof (wi));
+ wi.info = &info;
+
+ walk_gimple_op (stmt, rewrite_addr_expr, &wi);
+
+ if (info.modified)
+ update_stmt (stmt);
+ }
+ }
+
free_oacc_loop (loops);
return 0;
diff --git a/gcc/target.def b/gcc/target.def
index d26b888a485..d82db232e40 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1729,6 +1729,12 @@ handle this VAR_DECL, and normal RTL expanding is resumed.",
rtx, (tree var),
NULL)
+DEFHOOK
+(adjust_gangprivate_decl,
+"Tweak variable declaration for a gang-private variable.",
+void, (tree var),
+NULL)
+
DEFHOOK
(explode_args,
"Define this hook to TRUE if arguments to offload regions should be\n\
--
2.22.0
next prev parent reply other threads:[~2019-09-05 1:46 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-05 1:46 [PATCH 0/6] [og9] OpenACC worker partitioning in middle end (AMD GCN) Julian Brown
2019-09-05 1:46 ` [PATCH 3/6] [og9] AMD GCN adjustments for middle-end worker partitioning Julian Brown
2019-09-05 1:46 ` [PATCH 2/6] [og9] OpenACC middle-end worker-partitioning support Julian Brown
2019-09-05 13:52 ` Chung-Lin Tang
2019-09-05 15:01 ` Julian Brown
2019-09-06 12:32 ` Julian Brown
2019-09-05 1:46 ` Julian Brown [this message]
2019-09-05 1:46 ` [PATCH 4/6] [og9] Fix up tests for oaccdevlow pass splitting Julian Brown
2019-09-05 1:47 ` [PATCH 6/6] [og9] Enable worker partitioning for AMD GCN Julian Brown
2019-09-05 1:47 ` [PATCH 5/6] [og9] Reference reduction localization Julian Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2c432092fae99930879687f88f2e8e97d29c786d.1567644180.git.julian@codesourcery.com \
--to=julian@codesourcery.com \
--cc=andrew_stubbs@mentor.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).