From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 8748 invoked by alias); 5 Sep 2019 01:47:19 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 8567 invoked by uid 89); 5 Sep 2019 01:47:16 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-19.6 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,RCVD_IN_DNSWL_NONE autolearn=ham version=3.3.1 spammy= X-HELO: esa3.mentor.iphmx.com Received: from esa3.mentor.iphmx.com (HELO esa3.mentor.iphmx.com) (68.232.137.180) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Thu, 05 Sep 2019 01:47:12 +0000 IronPort-SDR: 1ONYaEmT2O7pBK67f4zjrXReqVOA69/g0+VBl+iNtd8MpBuB3b+/w6SWIxOZPh2aPJSVYPSaSI kMnmAf5dkUEafjz30hDSzeVsFSdqgC5v7zI9bEg8MbIxH7zkJPKPOMcISZeAJk1wYtLfUXEh9D 7aq0tZtlvcUHnLVKzzS4JSMjpZdUn2XF0mZa8mQOQZ4pjZpnqy8yKGtdBqvq5o52J4nYcda+aB e1O55+Tter4re57FPEWw4YNV8GmuRr9UnlLkkJzrGPA1dNVFZoC5T+ap5f8ZFyWmZBJ/oaXbnN UfU= Received: from orw-gwy-02-in.mentorg.com ([192.94.38.167]) by esa3.mentor.iphmx.com with ESMTP; 04 Sep 2019 17:47:10 -0800 IronPort-SDR: TX3aiPwsfmUoDWtqcBs9TckwlknUkijoubEf14ZzNhTc610O2FP1UQUftJCS/jYEMgHKn7EvLD VnigPHWyGZf0DreiF0lcWWDYy4nb68D1Jo2302+rXZBAXR6r7Bl+ZvCOjmv+Uvdhj46XzeQMFu kcD9RXNArMc6cTGuM0eKYNneX52FrnWNTUeGlBAwwNcUcDvACnLSc6wbALsUHLCsyMeEdGQU/X JFr4cDohbPLXixpMgRUO+ZmutJyVzEOY4JGGWlm0OoTMYveFFjPIsMT6kL0UPRe7qbmgLRn1/n 4iI= From: Julian Brown To: CC: Andrew Stubbs Subject: [PATCH 5/6] [og9] Reference reduction localization Date: Thu, 05 Sep 2019 01:47:00 -0000 Message-ID: In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain Return-Path: julian@codesourcery.com X-IsSubscribed: yes X-SW-Source: 2019-09/txt/msg00234.txt.bz2 This is a version of Cesar's patch to rewrite OpenACC reference reductions (e.g. Fortran function arguments) to use temporary scalar variables. This is necessary at present to avoid stack-slot clashes between multiple workers on AMD GCN. Julian ChangeLog gcc/ * gimplify.c (privatize_reduction): New struct. (localize_reductions_r, localize_reductions): New functions. (gimplify_omp_for): Call localize_reductions. (gimplify_omp_workshare): Likewise. * omp-low.c (lower_oacc_reductions): Handle localized reductions. Create fewer temp vars. * tree-core.h (omp_clause_code): Add OMP_CLAUSE_REDUCTION_PRIVATE_DECL documentation. * tree.c (omp_clause_num_ops): Bump number of ops for OMP_CLAUSE_REDUCTION to 6. (walk_tree_1): Adjust accordingly. * tree.h (OMP_CLAUSE_REDUCTION_PRIVATE_DECL): Add macro. --- gcc/ChangeLog.openacc | 16 +++++++ gcc/gimplify.c | 102 ++++++++++++++++++++++++++++++++++++++++++ gcc/omp-low.c | 47 ++++++------------- gcc/tree-core.h | 4 +- gcc/tree.c | 11 +++-- gcc/tree.h | 2 + 6 files changed, 145 insertions(+), 37 deletions(-) diff --git a/gcc/ChangeLog.openacc b/gcc/ChangeLog.openacc index 0d068ac8ae2..2b7f616810d 100644 --- a/gcc/ChangeLog.openacc +++ b/gcc/ChangeLog.openacc @@ -1,3 +1,19 @@ +2019-09-05 Cesar Philippidis + Julian Brown + + * gimplify.c (privatize_reduction): New struct. + (localize_reductions_r, localize_reductions): New functions. + (gimplify_omp_for): Call localize_reductions. + (gimplify_omp_workshare): Likewise. + * omp-low.c (lower_oacc_reductions): Handle localized reductions. + Create fewer temp vars. + * tree-core.h (omp_clause_code): Add OMP_CLAUSE_REDUCTION_PRIVATE_DECL + documentation. + * tree.c (omp_clause_num_ops): Bump number of ops for + OMP_CLAUSE_REDUCTION to 6. + (walk_tree_1): Adjust accordingly. + * tree.h (OMP_CLAUSE_REDUCTION_PRIVATE_DECL): Add macro. + 2019-09-05 Julian Brown * config/gcn/gcn-protos.h (gcn_goacc_adjust_propagation_record): Rename diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 58142c9eb90..685db1763e0 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -234,6 +234,11 @@ struct gimplify_omp_ctx hash_map *decl_data_clause; }; +struct privatize_reduction +{ + tree ref_var, local_var; +}; + static struct gimplify_ctx *gimplify_ctxp; static struct gimplify_omp_ctx *gimplify_omp_ctxp; @@ -10804,6 +10809,80 @@ find_combined_omp_for (tree *tp, int *walk_subtrees, void *data) return NULL_TREE; } +/* Helper function for localize_reductions. Replace all uses of REF_VAR with + LOCAL_VAR. */ + +static tree +localize_reductions_r (tree *tp, int *walk_subtrees, void *data) +{ + enum tree_code tc = TREE_CODE (*tp); + struct privatize_reduction *pr = (struct privatize_reduction *) data; + + if (TYPE_P (*tp)) + *walk_subtrees = 0; + + switch (tc) + { + case INDIRECT_REF: + case MEM_REF: + if (TREE_OPERAND (*tp, 0) == pr->ref_var) + *tp = pr->local_var; + + *walk_subtrees = 0; + break; + + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + if (*tp == pr->ref_var) + *tp = pr->local_var; + + *walk_subtrees = 0; + break; + + default: + break; + } + + return NULL_TREE; +} + +/* OpenACC worker and vector loop state propagation requires reductions + to be inside local variables. This function replaces all reference-type + reductions variables associated with the loop with a local copy. It is + also used to create private copies of reduction variables for those + which are not associated with acc loops. */ + +static void +localize_reductions (tree clauses, tree body) +{ + tree c, var, type, new_var; + struct privatize_reduction pr; + + for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) + { + var = OMP_CLAUSE_DECL (c); + + if (!lang_hooks.decls.omp_privatize_by_reference (var)) + { + OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = NULL; + continue; + } + + type = TREE_TYPE (TREE_TYPE (var)); + new_var = create_tmp_var (type, IDENTIFIER_POINTER (DECL_NAME (var))); + + pr.ref_var = var; + pr.local_var = new_var; + + walk_tree (&body, localize_reductions_r, &pr, NULL); + + OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = new_var; + } +} + + /* Gimplify the gross structure of an OMP_FOR statement. */ static enum gimplify_status @@ -10989,6 +11068,23 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) gcc_unreachable (); } + if (ort == ORT_ACC) + { + gimplify_omp_ctx *outer = gimplify_omp_ctxp; + + while (outer + && outer->region_type != ORT_ACC_PARALLEL + && outer->region_type != ORT_ACC_KERNELS) + outer = outer->outer_context; + + /* FIXME: Reductions only work in parallel regions at present. We avoid + doing the reduction localization transformation in kernels regions + here, because the code to remove reductions in kernels regions cannot + handle that. */ + if (outer && outer->region_type == ORT_ACC_PARALLEL) + localize_reductions (OMP_FOR_CLAUSES (*expr_p), OMP_FOR_BODY (*expr_p)); + } + /* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear clause for the IV. */ if (ort == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1) @@ -12154,6 +12250,12 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) || (ort & ORT_HOST_TEAMS) == ORT_HOST_TEAMS) { push_gimplify_context (); + + /* FIXME: Reductions are not supported in kernels regions yet. */ + if (/*ort == ORT_ACC_KERNELS ||*/ ort == ORT_ACC_PARALLEL) + localize_reductions (OMP_TARGET_CLAUSES (*expr_p), + OMP_TARGET_BODY (*expr_p)); + gimple *g = gimplify_and_return_first (OMP_BODY (expr), &body); if (gimple_code (g) == GIMPLE_BIND) pop_gimplify_context (g); diff --git a/gcc/omp-low.c b/gcc/omp-low.c index fe911599142..4b21769a9a7 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -6129,9 +6129,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) { tree orig = OMP_CLAUSE_DECL (c); - tree var = maybe_lookup_decl (orig, ctx); + tree var; tree ref_to_res = NULL_TREE; - tree incoming, outgoing, v1, v2, v3; + tree incoming, outgoing; bool is_private = false; bool is_fpp = false; @@ -6144,6 +6144,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, rcode = BIT_IOR_EXPR; tree op = build_int_cst (unsigned_type_node, rcode); + var = OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c); + if (!var) + var = maybe_lookup_decl (orig, ctx); if (!var) var = orig; @@ -6255,36 +6258,13 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, if (!ref_to_res) ref_to_res = integer_zero_node; - if (omp_is_reference (orig)) + if (omp_is_reference (outgoing)) { - tree type = TREE_TYPE (var); - const char *id = IDENTIFIER_POINTER (DECL_NAME (var)); - - if (!inner) - { - tree x = create_tmp_var (TREE_TYPE (type), id); - gimplify_assign (var, build_fold_addr_expr (x), fork_seq); - } - - v1 = create_tmp_var (type, id); - v2 = create_tmp_var (type, id); - v3 = create_tmp_var (type, id); - - gimplify_assign (v1, var, fork_seq); - gimplify_assign (v2, var, fork_seq); - gimplify_assign (v3, var, fork_seq); - - var = build_simple_mem_ref (var); - v1 = build_simple_mem_ref (v1); - v2 = build_simple_mem_ref (v2); - v3 = build_simple_mem_ref (v3); outgoing = build_simple_mem_ref (outgoing); if (!TREE_CONSTANT (incoming)) incoming = build_simple_mem_ref (incoming); } - else - v1 = v2 = v3 = var; /* Determine position in reduction buffer, which may be used by target. The parser has ensured that this is not a @@ -6317,20 +6297,21 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, TREE_TYPE (var), 6, init_code, unshare_expr (ref_to_res), - v1, level, op, off); + var, level, op, off); tree fini_call = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, TREE_TYPE (var), 6, fini_code, unshare_expr (ref_to_res), - v2, level, op, off); + var, level, op, off); tree teardown_call = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION, - TREE_TYPE (var), 6, teardown_code, - ref_to_res, v3, level, op, off); + TREE_TYPE (var), 6, + teardown_code, ref_to_res, var, + level, op, off); - gimplify_assign (v1, setup_call, &before_fork); - gimplify_assign (v2, init_call, &after_fork); - gimplify_assign (v3, fini_call, &before_join); + gimplify_assign (var, setup_call, &before_fork); + gimplify_assign (var, init_call, &after_fork); + gimplify_assign (var, fini_call, &before_join); gimplify_assign (outgoing, teardown_call, &after_join); } diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 5bcecc160bc..94a2582d49c 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -255,7 +255,9 @@ enum omp_clause_code { placeholder used in OMP_CLAUSE_REDUCTION_{INIT,MERGE}. Operand 4: OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER: Another dummy VAR_DECL placeholder, used like the above for C/C++ array - reductions. */ + reductions. + Operand 5: OMP_CLAUSE_REDUCTION_PRIVATE_DECL: A private VAR_DECL of + the original DECL associated with the reduction clause. */ OMP_CLAUSE_REDUCTION, /* OpenMP clause: task_reduction (operator:variable_list). */ diff --git a/gcc/tree.c b/gcc/tree.c index 7c891dcbf91..089e2418747 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -283,7 +283,7 @@ unsigned const char omp_clause_num_ops[] = 1, /* OMP_CLAUSE_SHARED */ 1, /* OMP_CLAUSE_FIRSTPRIVATE */ 2, /* OMP_CLAUSE_LASTPRIVATE */ - 5, /* OMP_CLAUSE_REDUCTION */ + 6, /* OMP_CLAUSE_REDUCTION */ 5, /* OMP_CLAUSE_TASK_REDUCTION */ 5, /* OMP_CLAUSE_IN_REDUCTION */ 1, /* OMP_CLAUSE_COPYIN */ @@ -12361,11 +12361,16 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); case OMP_CLAUSE_REDUCTION: + { + for (int i = 0; i < 6; i++) + WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i)); + WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); + } + case OMP_CLAUSE_TASK_REDUCTION: case OMP_CLAUSE_IN_REDUCTION: { - int i; - for (i = 0; i < 5; i++) + for (int i = 0; i < 5; i++) WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i)); WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); } diff --git a/gcc/tree.h b/gcc/tree.h index 2f2f109451a..ece8a6496b0 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1629,6 +1629,8 @@ class auto_suppress_location_wrappers #define OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (NODE, OMP_CLAUSE_REDUCTION, \ OMP_CLAUSE_IN_REDUCTION), 4) +#define OMP_CLAUSE_REDUCTION_PRIVATE_DECL(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_REDUCTION), 5) /* True if a REDUCTION clause may reference the original list item (omp_orig) in its OMP_CLAUSE_REDUCTION_{,GIMPLE_}INIT. */ -- 2.22.0