From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 37193 invoked by alias); 3 Nov 2015 14:25:58 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 37184 invoked by uid 89); 3 Nov 2015 14:25:58 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.3 required=5.0 tests=AWL,BAYES_50,KAM_LAZY_DOMAIN_SECURITY,RCVD_IN_DNSWL_LOW,T_RP_MATCHES_RCVD autolearn=no version=3.3.2 X-HELO: smtp.ispras.ru Received: from smtp.ispras.ru (HELO smtp.ispras.ru) (83.149.199.79) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 03 Nov 2015 14:25:56 +0000 Received: from [10.10.3.121] (unknown [83.149.199.91]) by smtp.ispras.ru (Postfix) with ESMTP id 2665A204FF; Tue, 3 Nov 2015 17:25:53 +0300 (MSK) Date: Tue, 03 Nov 2015 14:25:00 -0000 From: Alexander Monakov To: gcc-patches@gcc.gnu.org cc: Jakub Jelinek , Dmitry Melnik Subject: Re: [gomp4 06/14] omp-low: copy omp_data_o to shared memory on NVPTX In-Reply-To: <1445366076-16082-7-git-send-email-amonakov@ispras.ru> Message-ID: References: <1445366076-16082-1-git-send-email-amonakov@ispras.ru> <1445366076-16082-7-git-send-email-amonakov@ispras.ru> User-Agent: Alpine 2.20 (LNX 67 2015-01-07) MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII X-SW-Source: 2015-11/txt/msg00217.txt.bz2 Hello, Here's an alternative patch that does not depend on exposure of shared-memory address space, and does not try to use pass_late_lower_omp. It's based on Bernd's suggestion to transform (use .omp_data_o) GOMP_parallel (fn, &omp_data_o, ...); .omp_data_o = {CLOBBER}; to .omp_data_o_ptr = __internal_omp_alloc_shared (&.omp_data_o, sizeof ...); (use (*.omp_data_o_ptr) instead of .omp_data_o) GOMP_parallel (fn, .omp_data_o_ptr, ...); __internal_omp_free_shared (.omp_data_o_ptr); .omp_data_o = {CLOBBER}; Every target except nvptx can lower free_shared to nothing and alloc_shared to just returning the first argument, and nvptx can select storage in shared memory or global memory. For now it simply uses malloc/free. Sanity-checked by running the libgomp testsuite. I realize the #ifdef in internal-fn.c is not appropriate: it's there to make the patch smaller, I'll replace it with a target hook if otherwise this approach is ok. Thanks. Alexander diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index bf0f23e..3145a8d 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -175,6 +175,38 @@ expand_GOMP_SIMD_LAST_LANE (gcall *) gcc_unreachable (); } +static void +expand_GOMP_ALLOC_SHARED (gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + + /* XXX PoC only, needs to be a target hook. */ +#ifdef GCC_NVPTX_H + tree fndecl = builtin_decl_explicit (BUILT_IN_MALLOC); + tree t = build_call_expr (fndecl, 1, gimple_call_arg (stmt, 1)); + + expand_call (t, target, 0); +#else + tree rhs = gimple_call_arg (stmt, 0); + + rtx src = expand_normal (rhs); + + emit_move_insn (target, src); +#endif +} + +static void +expand_GOMP_FREE_SHARED (gcall *stmt) +{ +#ifdef GCC_NVPTX_H + tree fndecl = builtin_decl_explicit (BUILT_IN_FREE); + tree t = build_call_expr (fndecl, 1, gimple_call_arg (stmt, 0)); + + expand_call (t, NULL_RTX, 1); +#endif +} + /* This should get expanded in the sanopt pass. */ static void diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 0db03f1..0c8e76a 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -44,6 +44,8 @@ DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF, NULL) DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_ALLOC_SHARED, ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_FREE_SHARED, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (LOOP_VECTORIZED, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (MASK_LOAD, ECF_PURE | ECF_LEAF, NULL) DEF_INTERNAL_FN (MASK_STORE, ECF_LEAF, NULL) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 696889d..225bf20 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -5870,7 +5870,8 @@ expand_omp_taskreg (struct omp_region *region) a function call that has been inlined, the original PARM_DECL .OMP_DATA_I may have been converted into a different local variable. In which case, we need to keep the assignment. */ - if (gimple_omp_taskreg_data_arg (entry_stmt)) + tree data_arg = gimple_omp_taskreg_data_arg (entry_stmt); + if (data_arg) { basic_block entry_succ_bb = single_succ_p (entry_bb) ? single_succ (entry_bb) @@ -5894,9 +5895,10 @@ expand_omp_taskreg (struct omp_region *region) /* We're ignore the subcode because we're effectively doing a STRIP_NOPS. */ - if (TREE_CODE (arg) == ADDR_EXPR - && TREE_OPERAND (arg, 0) - == gimple_omp_taskreg_data_arg (entry_stmt)) + if ((TREE_CODE (arg) == ADDR_EXPR + && TREE_OPERAND (arg, 0) == data_arg) + || (TREE_CODE (data_arg) == INDIRECT_REF + && TREE_OPERAND (data_arg, 0) == arg)) { parcopy_stmt = stmt; break; @@ -11835,27 +11837,44 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) record_vars_into (ctx->block_vars, child_fn); record_vars_into (gimple_bind_vars (par_bind), child_fn); + ilist = NULL; + tree sender_decl = NULL_TREE; + if (ctx->record_type) { - ctx->sender_decl + sender_decl = create_tmp_var (ctx->srecord_type ? ctx->srecord_type : ctx->record_type, ".omp_data_o"); - DECL_NAMELESS (ctx->sender_decl) = 1; - TREE_ADDRESSABLE (ctx->sender_decl) = 1; + DECL_NAMELESS (sender_decl) = 1; + TREE_ADDRESSABLE (sender_decl) = 1; + + /* Instead of using the automatic variable .omp_data_o directly, build + .omp_data_o_ptr = GOMP_ALLOC_SHARED (&.omp_data_o, sizeof .omp_data_o) + ... and replace SENDER_DECL with indirect ref *.omp_data_o_ptr. */ + tree ae = build_fold_addr_expr (sender_decl); + tree sz = TYPE_SIZE_UNIT (TREE_TYPE (sender_decl)); + gimple g = gimple_build_call_internal (IFN_GOMP_ALLOC_SHARED, 2, ae, sz); + gimple_seq_add_stmt (&ilist, g); + tree result = create_tmp_var (TREE_TYPE (ae), ".omp_data_o_ptr"); + gimple_call_set_lhs (g, result); + ctx->sender_decl = build_fold_indirect_ref (result); gimple_omp_taskreg_set_data_arg (stmt, ctx->sender_decl); } olist = NULL; - ilist = NULL; lower_send_clauses (clauses, &ilist, &olist, ctx); lower_send_shared_vars (&ilist, &olist, ctx); if (ctx->record_type) { - tree clobber = build_constructor (TREE_TYPE (ctx->sender_decl), NULL); + /* GOMP_FREE_SHARED (.omp_data_o_ptr). */ + tree ae = build_fold_addr_expr (ctx->sender_decl); + gimple g = gimple_build_call_internal (IFN_GOMP_FREE_SHARED, 1, ae); + gimple_seq_add_stmt (&olist, g); + /* Clobber the original stack variable. */ + tree clobber = build_constructor (TREE_TYPE (sender_decl), NULL); TREE_THIS_VOLATILE (clobber) = 1; - gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl, - clobber)); + gimple_seq_add_stmt (&olist, gimple_build_assign (sender_decl, clobber)); } /* Once all the expansions are done, sequence all the different