* [OpenACC 0/7] host_data construct @ 2015-10-22 19:14 James Norris 2015-10-22 19:15 ` [OpenACC 2/7] host_data construct (C FE) James Norris ` (8 more replies) 0 siblings, 9 replies; 33+ messages in thread From: James Norris @ 2015-10-22 19:14 UTC (permalink / raw) To: GCC Patches; +Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek Hi! This patch series adds the handling of OpenACC host_data construct in the C and C++ front-ends, gimple, and supporting functions in libgomp. Commentary on the changes is included in each individual patch. All of the code is in the gomp-4_0-branch. Regtested on x86_64-linux. Thanks! Jim ^ permalink raw reply [flat|nested] 33+ messages in thread
* [OpenACC 2/7] host_data construct (C FE) 2015-10-22 19:14 [OpenACC 0/7] host_data construct James Norris @ 2015-10-22 19:15 ` James Norris 2015-10-22 19:15 ` [OpenACC 1/7] host_data construct (C/C++ common) James Norris ` (7 subsequent siblings) 8 siblings, 0 replies; 33+ messages in thread From: James Norris @ 2015-10-22 19:15 UTC (permalink / raw) To: GCC Patches; +Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek [-- Attachment #1: Type: text/plain, Size: 710 bytes --] gcc/c/c-parser.c b/gcc/c/c-parser.c - Add handling of use_device clause in c_parser_omp_clause_name(). - Add new function c_parser_oacc_clause_use_device() to handle use_device clause with host_data. - Add handling of use_device clause in c_parser_oacc_all_clauses(). - Add new macro OACC_HOST_DATA_CLAUSE_MASK. - Add new function c_parser_oacc_host_data() to handle host_data. - Add handling of host_data pragma to c_parser_omp_construct(). gcc/c/c-tree.h b/gcc/c/c-tree.h - Add definition for c_finish_oacc_host_data(). gcc/c/c-typeck.c b/gcc/c/c-typeck.c - Add new function c_finish_oacc_host_data(). [-- Attachment #2: p2.patch --] [-- Type: text/x-patch, Size: 4133 bytes --] diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 704ebc6..ead98b9 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -10116,6 +10116,8 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -11219,6 +11221,15 @@ c_parser_oacc_clause_async (c_parser *parser, tree list) return list; } +/* OpenACC 2.0: + use_device ( variable-list ) */ + +static tree +c_parser_oacc_clause_use_device (c_parser *parser, tree list) +{ + return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_USE_DEVICE, list); +} + /* OpenACC: wait ( int-expr-list ) */ @@ -12474,6 +12485,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = c_parser_oacc_clause_use_device (parser, clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_VECTOR_LENGTH: clauses = c_parser_omp_clause_vector_length (parser, clauses); c_name = "vector_length"; @@ -13003,6 +13018,29 @@ c_parser_oacc_enter_exit_data (c_parser *parser, bool enter) /* OpenACC 2.0: + # pragma acc host_data oacc-data-clause[optseq] new-line + structured-block +*/ + +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +static tree +c_parser_oacc_host_data (location_t loc, c_parser *parser) +{ + tree stmt, clauses, block; + + clauses = c_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data"); + + block = c_begin_omp_parallel (); + add_stmt (c_parser_omp_structured_block (parser)); + stmt = c_finish_oacc_host_data (loc, clauses, block); + return stmt; +} + + +/* OpenACC 2.0: # pragma acc loop oacc-loop-clause[optseq] new-line structured-block @@ -16075,6 +16113,9 @@ c_parser_omp_construct (c_parser *parser) case PRAGMA_OACC_DATA: stmt = c_parser_oacc_data (loc, parser); break; + case PRAGMA_OACC_HOST_DATA: + stmt = c_parser_oacc_host_data (loc, parser); + break; case PRAGMA_OACC_KERNELS: strcpy (p_name, "#pragma acc"); stmt = c_parser_oacc_kernels (loc, parser, p_name); diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index bee03d3..a9c5975 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -643,6 +643,7 @@ extern tree c_expr_to_decl (tree, bool *, bool *); extern tree c_finish_oacc_parallel (location_t, tree, tree); extern tree c_finish_oacc_kernels (location_t, tree, tree); extern tree c_finish_oacc_data (location_t, tree, tree); +extern tree c_finish_oacc_host_data (location_t, tree, tree); extern tree c_begin_omp_parallel (void); extern tree c_finish_omp_parallel (location_t, tree, tree); extern tree c_begin_omp_task (void); diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index bc43602..a5e2a4a 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -11510,6 +11510,25 @@ c_finish_oacc_data (location_t loc, tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_HOST_DATA. */ + +tree +c_finish_oacc_host_data (location_t loc, tree clauses, tree block) +{ + tree stmt; + + block = c_end_compound_stmt (loc, block, true); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + SET_EXPR_LOCATION (stmt, loc); + + return add_stmt (stmt); +} + /* Like c_begin_compound_stmt, except force the retention of the BLOCK. */ tree @@ -12942,6 +12961,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_USE_DEVICE: pc = &OMP_CLAUSE_CHAIN (c); continue; ^ permalink raw reply [flat|nested] 33+ messages in thread
* [OpenACC 1/7] host_data construct (C/C++ common) 2015-10-22 19:14 [OpenACC 0/7] host_data construct James Norris 2015-10-22 19:15 ` [OpenACC 2/7] host_data construct (C FE) James Norris @ 2015-10-22 19:15 ` James Norris 2015-10-22 19:16 ` [OpenACC 3/7] host_data construct (C front-end) James Norris ` (6 subsequent siblings) 8 siblings, 0 replies; 33+ messages in thread From: James Norris @ 2015-10-22 19:15 UTC (permalink / raw) To: GCC Patches; +Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek [-- Attachment #1: Type: text/plain, Size: 318 bytes --] gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c - Add host_data pragma definition to list in oacc_pragma[]. gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h - Add host_data pragma enum to list in pragma_kind[]. - Add use_device clause enum to list in pragma_omp_clause[]. [-- Attachment #2: p1.patch --] [-- Type: text/x-patch, Size: 1126 bytes --] diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c index 834a916..b748e2f 100644 --- a/gcc/c-family/c-pragma.c +++ b/gcc/c-family/c-pragma.c @@ -1214,6 +1214,7 @@ static const struct omp_pragma_def oacc_pragmas[] = { { "data", PRAGMA_OACC_DATA }, { "enter", PRAGMA_OACC_ENTER_DATA }, { "exit", PRAGMA_OACC_EXIT_DATA }, + { "host_data", PRAGMA_OACC_HOST_DATA }, { "kernels", PRAGMA_OACC_KERNELS }, { "loop", PRAGMA_OACC_LOOP }, { "parallel", PRAGMA_OACC_PARALLEL }, diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index cec920f..23a72a3 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -31,6 +31,7 @@ enum pragma_kind { PRAGMA_OACC_DATA, PRAGMA_OACC_ENTER_DATA, PRAGMA_OACC_EXIT_DATA, + PRAGMA_OACC_HOST_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_LOOP, PRAGMA_OACC_PARALLEL, @@ -161,6 +162,7 @@ enum pragma_omp_clause { PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE, PRAGMA_OACC_CLAUSE_SELF, PRAGMA_OACC_CLAUSE_SEQ, + PRAGMA_OACC_CLAUSE_USE_DEVICE, PRAGMA_OACC_CLAUSE_VECTOR, PRAGMA_OACC_CLAUSE_VECTOR_LENGTH, PRAGMA_OACC_CLAUSE_WAIT, ^ permalink raw reply [flat|nested] 33+ messages in thread
* [OpenACC 3/7] host_data construct (C front-end) 2015-10-22 19:14 [OpenACC 0/7] host_data construct James Norris 2015-10-22 19:15 ` [OpenACC 2/7] host_data construct (C FE) James Norris 2015-10-22 19:15 ` [OpenACC 1/7] host_data construct (C/C++ common) James Norris @ 2015-10-22 19:16 ` James Norris 2015-10-22 19:18 ` [OpenACC 4/7] host_data construct (middle end) James Norris ` (5 subsequent siblings) 8 siblings, 0 replies; 33+ messages in thread From: James Norris @ 2015-10-22 19:16 UTC (permalink / raw) To: GCC Patches; +Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek [-- Attachment #1: Type: text/plain, Size: 738 bytes --] gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h - Add definition for c_finish_oacc_host_data(). gcc/cp/parser.c b/gcc/cp/parser.c - Add handling of use_device clause in cp_parser_omp_clause_name(). - Add handling of use_device clause in cp_parser_oacc_all_clauses(). - Add new macro OACC_HOST_DATA_CLAUSE_MASK. - Add new function cp_parser_oacc_host_data() to handle host_data. - Add handling of host_data pragma to cp_parser_omp_construct(). - Add handling of host_data pragma to cp_parser_pragma(). gcc/cp/semantics.c b/gcc/cp/semantics.c - Add handling of use_device clause to finish_omp_clauses(). - Add new function finish_oacc_host_data(). [-- Attachment #2: p3.patch --] [-- Type: text/x-patch, Size: 4175 bytes --] diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 16db41f..76ece42 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6318,6 +6318,7 @@ extern void finish_omp_threadprivate (tree); extern tree begin_omp_structured_block (void); extern tree finish_omp_structured_block (tree); extern tree finish_oacc_data (tree, tree); +extern tree finish_oacc_host_data (tree, tree); extern tree finish_oacc_kernels (tree, tree); extern tree finish_oacc_parallel (tree, tree); extern tree begin_omp_parallel (void); diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index f07a5e4..714e69c 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -29235,6 +29235,8 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector_length", p)) @@ -31381,6 +31383,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, + clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_VECTOR_LENGTH: clauses = cp_parser_oacc_clause_vector_length (parser, clauses); c_name = "vector_length"; @@ -34221,6 +34228,30 @@ cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok) return stmt; } +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +/* OpenACC 2.0: + # pragma acc host_data <clauses> new-line + structured-block */ + +static tree +cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + unsigned int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data", pragma_tok); + + block = begin_omp_parallel (); + save = cp_parser_begin_omp_structured_block (parser); + cp_parser_statement (parser, NULL_TREE, false, NULL); + cp_parser_end_omp_structured_block (parser, save); + stmt = finish_oacc_host_data (clauses, block); + return stmt; +} + /* OpenACC 2.0: # pragma acc enter data oacc-enter-data-clause[optseq] new-line @@ -35288,6 +35319,9 @@ cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok) case PRAGMA_OACC_EXIT_DATA: stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, false); break; + case PRAGMA_OACC_HOST_DATA: + stmt = cp_parser_oacc_host_data (parser, pragma_tok); + break; case PRAGMA_OACC_KERNELS: stmt = cp_parser_oacc_kernels (parser, pragma_tok); break; @@ -35856,6 +35890,7 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context) case PRAGMA_OACC_DATA: case PRAGMA_OACC_ENTER_DATA: case PRAGMA_OACC_EXIT_DATA: + case PRAGMA_OACC_HOST_DATA: case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: case PRAGMA_OACC_LOOP: diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index c0a8b32..25482e7 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -6689,6 +6689,7 @@ finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE__CILK_FOR_COUNT_: + case OMP_CLAUSE_USE_DEVICE: break; case OMP_CLAUSE_INBRANCH: @@ -7119,6 +7120,24 @@ finish_oacc_data (tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. */ + +tree +finish_oacc_host_data (tree clauses, tree block) +{ + tree stmt; + + block = finish_omp_structured_block (block); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + + return add_stmt (stmt); +} + /* Generate OACC_KERNELS, with CLAUSES and BLOCK as its compound statement. LOC is the location of the OACC_KERNELS. */ ^ permalink raw reply [flat|nested] 33+ messages in thread
* [OpenACC 4/7] host_data construct (middle end) 2015-10-22 19:14 [OpenACC 0/7] host_data construct James Norris ` (2 preceding siblings ...) 2015-10-22 19:16 ` [OpenACC 3/7] host_data construct (C front-end) James Norris @ 2015-10-22 19:18 ` James Norris 2015-10-22 19:19 ` [OpenACC 5/7] host_data construct (gcc tests) James Norris ` (4 subsequent siblings) 8 siblings, 0 replies; 33+ messages in thread From: James Norris @ 2015-10-22 19:18 UTC (permalink / raw) To: GCC Patches; +Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek [-- Attachment #1: Type: text/plain, Size: 498 bytes --] gcc/gimplify.c b/gcc/gimplify.c - Add new enum for use_device clause handling to gimplify_omp_var_data. - Add new enum for host_data regions to omp_region_type. - Move handling of use_device clause in gimplify_scan_omp_clauses(). - Add new functions gimplify_host_data() and gimplify_host_data_1(). - Add handling of host_data to gimplify_expr(). gcc/omp-builtins.def b/gcc/omp-builtins.def - Add builtin for GOACC_deviceptr(). [-- Attachment #2: p4.patch --] [-- Type: text/x-patch, Size: 5398 bytes --] diff --git a/gcc/gimplify.c b/gcc/gimplify.c index ab9e540..0c32219 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -93,6 +93,8 @@ enum gimplify_omp_var_data GOVD_MAP_0LEN_ARRAY = 32768, + GOVD_USE_DEVICE = 65536, + GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR | GOVD_LOCAL) @@ -116,7 +118,9 @@ enum omp_region_type ORT_COMBINED_TARGET = 33, /* Dummy OpenMP region, used to disable expansion of DECL_VALUE_EXPRs in taskloop pre body. */ - ORT_NONE = 64 + ORT_NONE = 64, + /* An OpenACC host-data region. */ + ORT_HOST_DATA = 128 }; /* Gimplify hashtable helper. */ @@ -6338,6 +6342,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, decl = TREE_OPERAND (decl, 0); } goto do_add_decl; + case OMP_CLAUSE_USE_DEVICE: + flags = GOVD_USE_DEVICE | GOVD_EXPLICIT; + check_non_private = "use_device"; + goto do_add; case OMP_CLAUSE_LINEAR: if (gimplify_expr (&OMP_CLAUSE_LINEAR_STEP (c), pre_p, NULL, is_gimple_val, fb_rvalue) == GS_ERROR) @@ -7005,7 +7013,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_INDEPENDENT: remove = true; break; @@ -7529,6 +7536,127 @@ gimplify_oacc_cache (tree *expr_p, gimple_seq *pre_p) *expr_p = NULL_TREE; } +static tree +gimplify_oacc_host_data_1 (tree *tp, int *walk_subtrees, + void *data ATTRIBUTE_UNUSED) +{ + splay_tree_node n = NULL; + location_t loc = EXPR_LOCATION (*tp); + + switch (TREE_CODE (*tp)) + { + case ADDR_EXPR: + { + tree decl = TREE_OPERAND (*tp, 0); + + switch (TREE_CODE (decl)) + { + case ARRAY_REF: + case ARRAY_RANGE_REF: + case COMPONENT_REF: + case VIEW_CONVERT_EXPR: + case REALPART_EXPR: + case IMAGPART_EXPR: + if (TREE_CODE (TREE_OPERAND (decl, 0)) == VAR_DECL) + n = splay_tree_lookup (gimplify_omp_ctxp->variables, + (splay_tree_key) TREE_OPERAND (decl, 0)); + break; + + case VAR_DECL: + n = splay_tree_lookup (gimplify_omp_ctxp->variables, + (splay_tree_key) decl); + break; + + default: + ; + } + + if (n != NULL && (n->value & GOVD_USE_DEVICE) != 0) + { + tree t = builtin_decl_explicit (BUILT_IN_GOACC_DEVICEPTR); + *tp = build_call_expr_loc (loc, t, 1, *tp); + } + + *walk_subtrees = 0; + } + break; + + case VAR_DECL: + { + tree decl = *tp; + + n = splay_tree_lookup (gimplify_omp_ctxp->variables, + (splay_tree_key) decl); + + if (n != NULL && (n->value & GOVD_USE_DEVICE) != 0) + { + if (!POINTER_TYPE_P (TREE_TYPE (decl))) + return decl; + + tree t = builtin_decl_explicit (BUILT_IN_GOACC_DEVICEPTR); + *tp = build_call_expr_loc (loc, t, 1, *tp); + *walk_subtrees = 0; + } + } + break; + + case OACC_PARALLEL: + case OACC_KERNELS: + case OACC_LOOP: + *walk_subtrees = 0; + break; + + default: + ; + } + + return NULL_TREE; +} + +static enum gimplify_status +gimplify_oacc_host_data (tree *expr_p, gimple_seq *pre_p) +{ + tree expr = *expr_p, orig_body; + gimple_seq body = NULL; + + gimplify_scan_omp_clauses (&OACC_HOST_DATA_CLAUSES (expr), pre_p, + ORT_HOST_DATA, OACC_HOST_DATA); + + orig_body = OACC_HOST_DATA_BODY (expr); + + /* Perform a pre-pass over the host_data region's body, inserting calls to + GOACC_deviceptr where appropriate. */ + + tree ret = walk_tree_without_duplicates (&orig_body, + &gimplify_oacc_host_data_1, 0); + + if (ret) + { + error_at (EXPR_LOCATION (expr), + "undefined use of variable %qE in host_data region", + DECL_NAME (ret)); + gimplify_adjust_omp_clauses (pre_p, &OACC_HOST_DATA_CLAUSES (expr), + OACC_HOST_DATA); + return GS_ERROR; + } + + push_gimplify_context (); + + gimple *g = gimplify_and_return_first (orig_body, &body); + + if (gimple_code (g) == GIMPLE_BIND) + pop_gimplify_context (g); + else + pop_gimplify_context (NULL); + + gimplify_adjust_omp_clauses (pre_p, &OACC_HOST_DATA_CLAUSES (expr), + OACC_HOST_DATA); + + gimplify_seq_add_stmt (pre_p, g); + + return GS_ALL_DONE; +} + /* Gimplify the contents of an OMP_PARALLEL statement. This involves gimplification of the body, as well as scanning the body for used variables. We need to do this scan now, because variable-sized @@ -9595,6 +9723,9 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, break; case OACC_HOST_DATA: + ret = gimplify_oacc_host_data (expr_p, pre_p); + break; + case OACC_DECLARE: sorry ("directive not yet implemented"); ret = GS_ALL_DONE; diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index ea9cf0d..9ed075f 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -47,6 +47,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr", + BT_FN_PTR_PTR, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_THREAD_NUM, "GOACC_get_thread_num", BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_NUM_THREADS, "GOACC_get_num_threads", ^ permalink raw reply [flat|nested] 33+ messages in thread
* [OpenACC 5/7] host_data construct (gcc tests) 2015-10-22 19:14 [OpenACC 0/7] host_data construct James Norris ` (3 preceding siblings ...) 2015-10-22 19:18 ` [OpenACC 4/7] host_data construct (middle end) James Norris @ 2015-10-22 19:19 ` James Norris 2015-10-22 19:20 ` [OpenACC 6/7] host_data construct James Norris ` (3 subsequent siblings) 8 siblings, 0 replies; 33+ messages in thread From: James Norris @ 2015-10-22 19:19 UTC (permalink / raw) To: GCC Patches; +Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek [-- Attachment #1: Type: text/plain, Size: 461 bytes --] gcc/testsuite/c-c++-common/goacc/host_data-1.c b/gcc/testsuite/c-c++-common/goacc/host_data-1.c gcc/testsuite/c-c++-common/goacc/host_data-2.c b/gcc/testsuite/c-c++-common/goacc/host_data-2.c gcc/testsuite/c-c++-common/goacc/host_data-3.c b/gcc/testsuite/c-c++-common/goacc/host_data-3.c gcc/testsuite/c-c++-common/goacc/host_data-4.c b/gcc/testsuite/c-c++-common/goacc/host_data-4.c - New compile time tests for host_data. [-- Attachment #2: p5.patch --] [-- Type: text/x-patch, Size: 2220 bytes --] diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-1.c b/gcc/testsuite/c-c++-common/goacc/host_data-1.c new file mode 100644 index 0000000..521c854 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-1.c @@ -0,0 +1,13 @@ +/* Test valid use of host_data directive. */ +/* { dg-do compile } */ + +int v0; +int v1[3][3]; + +void +f (void) +{ + int v2 = 3; +#pragma acc host_data use_device(v2, v0, v1) + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-2.c b/gcc/testsuite/c-c++-common/goacc/host_data-2.c new file mode 100644 index 0000000..e5213a0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-2.c @@ -0,0 +1,13 @@ +/* Test invalid use of host_data directive. */ +/* { dg-do compile } */ + +int v0; +#pragma acc host_data use_device(v0) /* { dg-error "expected" } */ + +void +f (void) +{ + int v2 = 3; +#pragma acc host_data copy(v2) /* { dg-error "not valid for" } */ + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-3.c b/gcc/testsuite/c-c++-common/goacc/host_data-3.c new file mode 100644 index 0000000..f9621c9 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-3.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ + +int main (int argc, char* argv[]) +{ + int x = 5, y; + + #pragma acc enter data copyin (x) + /* It's not clear what attempts to use non-pointer variables "directly" + (rather than merely taking their address) should do in host_data regions. + We choose to make it an error. */ + #pragma acc host_data use_device (x) /* TODO { dg-error "" } */ + { + y = x; + } + #pragma acc exit data delete (x) + + return y - 5; +} diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-4.c b/gcc/testsuite/c-c++-common/goacc/host_data-4.c new file mode 100644 index 0000000..3dac5f3 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ + +int main (int argc, char* argv[]) +{ + int x[100]; + + #pragma acc enter data copyin (x) + /* Specifying an array index is not valid for host_data/use_device. */ + #pragma acc host_data use_device (x[4]) /* { dg-error "expected '\\\)' before '\\\[' token" } */ + ; + #pragma acc exit data delete (x) + + return 0; +} ^ permalink raw reply [flat|nested] 33+ messages in thread
* [OpenACC 6/7] host_data construct 2015-10-22 19:14 [OpenACC 0/7] host_data construct James Norris ` (4 preceding siblings ...) 2015-10-22 19:19 ` [OpenACC 5/7] host_data construct (gcc tests) James Norris @ 2015-10-22 19:20 ` James Norris 2015-10-22 19:22 ` [OpenACC 7/7] host_data construct (runtime tests) James Norris ` (2 subsequent siblings) 8 siblings, 0 replies; 33+ messages in thread From: James Norris @ 2015-10-22 19:20 UTC (permalink / raw) To: GCC Patches; +Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek [-- Attachment #1: Type: text/plain, Size: 242 bytes --] libgomp/libgomp.map b/libgomp/libgomp.map - Add new symbol GOACC_deviceptr. libgomp/oacc-mem.c b/libgomp/oacc-mem.c - Add new function GOACC_deviceptr() to handle pointer lookup for host_data regions. [-- Attachment #2: p6.patch --] [-- Type: text/x-patch, Size: 1251 bytes --] diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 2153661..2a43a8c 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -378,6 +378,7 @@ GOACC_2.0 { GOACC_wait; GOACC_get_thread_num; GOACC_get_num_threads; + GOACC_deviceptr; }; GOACC_2.0.1 { diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index af067d6..497ab92 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -204,6 +204,38 @@ acc_deviceptr (void *h) return d; } +/* This function is used as a helper in generated code to implement pointer + lookup in host_data regions. Unlike acc_deviceptr, it returns its argument + unchanged on a shared-memory system (e.g. the host). */ + +void * +GOACC_deviceptr (void *h) +{ + splay_tree_key n; + void *d; + void *offset; + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + + if ((thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) == 0) + { + n = lookup_host (thr->dev, h, 1); + + if (!n) + return NULL; + + offset = h - n->host_start; + + d = n->tgt->tgt_start + n->tgt_offset + offset; + + return d; + } + else + return h; +} + /* Return the host pointer that corresponds to device data D. Or NULL if no mapping. */ ^ permalink raw reply [flat|nested] 33+ messages in thread
* [OpenACC 7/7] host_data construct (runtime tests) 2015-10-22 19:14 [OpenACC 0/7] host_data construct James Norris ` (5 preceding siblings ...) 2015-10-22 19:20 ` [OpenACC 6/7] host_data construct James Norris @ 2015-10-22 19:22 ` James Norris 2015-10-22 20:42 ` [OpenACC 0/7] host_data construct Joseph Myers 2015-10-23 16:01 ` [Bulk] " James Norris 8 siblings, 0 replies; 33+ messages in thread From: James Norris @ 2015-10-22 19:22 UTC (permalink / raw) To: GCC Patches; +Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek [-- Attachment #1: Type: text/plain, Size: 295 bytes --] libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c - New runtime tests for host_data. [-- Attachment #2: p7.patch --] [-- Type: text/x-patch, Size: 4123 bytes --] diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c new file mode 100644 index 0000000..15ccb27 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c @@ -0,0 +1,125 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <openacc.h> +#include <cuda.h> +#include <cuda_runtime_api.h> +#include <cublas_v2.h> + +void +saxpy_host (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +#pragma acc routine +void +saxpy_target (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +int +main(int argc, char **argv) +{ + const int N = 8; + int i; + float *x_ref, *y_ref; + float *x, *y; + cublasHandle_t h; + float a = 2.0; + + x_ref = (float*) malloc (N * sizeof(float)); + y_ref = (float*) malloc (N * sizeof(float)); + + x = (float*) malloc (N * sizeof(float)); + y = (float*) malloc (N * sizeof(float)); + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { + float *xp, *yp; +#pragma acc host_data use_device (x, y) + { +#pragma acc parallel pcopy (xp, yp) present (x, y) + { + xp = x; + yp = y; + } + } + + if (xp != acc_deviceptr (x) || yp != acc_deviceptr (y)) + abort (); + } + + for (i = 0; i < N; i++) + { + x[i] = x_ref[i] = 4.0 + i; + y[i] = y_ref[i] = 3.0; + } + + saxpy_host (N, a, x_ref, y_ref); + + cublasCreate (&h); + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + +#pragma acc data create (x[0:N]) copyout (y[0:N]) + { +#pragma acc kernels + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + cublasDestroy (h); + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc data copyin (x[0:N]) copyin (a, N) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { +#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a, N) + saxpy_target (N, a, x, y); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c new file mode 100644 index 0000000..511ec64 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ + +#include <stdlib.h> + +struct by_lightning { + int a; + int b; + int c; +}; + +int main (int argc, char* argv[]) +{ + int x; + void *q = NULL, *r = NULL, *p = NULL, *s = NULL, *t = NULL; + long u; + struct by_lightning on_the_head = {1, 2, 3}; + int arr[10], *f = NULL; + _Complex float cf; + #pragma acc enter data copyin (x, arr, on_the_head, cf) + #pragma acc host_data use_device (x, arr, on_the_head, cf) + { + q = &x; + { + f = &arr[5]; + r = f; + s = &__real__ cf; + t = &on_the_head.c; + u = (long) &__imag__ cf; + #pragma acc parallel copyout(p) present (x, arr, on_the_head, cf) + { + /* This will not (and must not) call GOACC_deviceptr, but '&x' will be + the address on the device (if appropriate) regardless. */ + p = &x; + } + } + } + #pragma acc exit data delete (x) + +#if ACC_MEM_SHARED + if (q != &x || f != &arr[5] || r != f || s != &(__real__ cf) + || t != &on_the_head.c || u != (long) &(__imag__ cf) || p != &x) + abort (); +#else + if (q == &x || f == &arr[5] || r != f || s == &(__real__ cf) + || t == &on_the_head.c || u == (long) &(__imag__ cf) || p == &x) + abort (); +#endif + + return 0; +} ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-10-22 19:14 [OpenACC 0/7] host_data construct James Norris ` (6 preceding siblings ...) 2015-10-22 19:22 ` [OpenACC 7/7] host_data construct (runtime tests) James Norris @ 2015-10-22 20:42 ` Joseph Myers 2015-10-22 20:53 ` James Norris 2015-10-23 16:01 ` [Bulk] " James Norris 8 siblings, 1 reply; 33+ messages in thread From: Joseph Myers @ 2015-10-22 20:42 UTC (permalink / raw) To: James Norris; +Cc: GCC Patches, Nathan Sidwell, Jakub Jelinek I think this patch is small enough, and the pieces insufficiently self-contained, that splitting it up rather than posting as one patch just makes it harder to understand. My strong preference is that the same patch that introduces a feature should also add the testcases for that feature, for example - they should not be split out (that's not even a split by reviewer, testcases are critical to reviewing functionality patches). -- Joseph S. Myers joseph@codesourcery.com ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-10-22 20:42 ` [OpenACC 0/7] host_data construct Joseph Myers @ 2015-10-22 20:53 ` James Norris 0 siblings, 0 replies; 33+ messages in thread From: James Norris @ 2015-10-22 20:53 UTC (permalink / raw) To: Joseph Myers; +Cc: GCC Patches, Nathan Sidwell, Jakub Jelinek To all, On 10/22/2015 03:36 PM, Joseph Myers wrote: > I think this patch is small enough, and the pieces insufficiently > self-contained, that splitting it up rather than posting as one patch just > makes it harder to understand. My strong preference is that the same > patch that introduces a feature should also add the testcases for that > feature, for example - they should not be split out (that's not even a > split by reviewer, testcases are critical to reviewing functionality > patches). > Okay, I'll rewrite the posting and submit it with a single patch as a response to the initial posting. I'll also re-write the comments as they have been pointed out by Nathan as being too terse. My apologies for wasting people's time. Jim ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Bulk] [OpenACC 0/7] host_data construct 2015-10-22 19:14 [OpenACC 0/7] host_data construct James Norris ` (7 preceding siblings ...) 2015-10-22 20:42 ` [OpenACC 0/7] host_data construct Joseph Myers @ 2015-10-23 16:01 ` James Norris 2015-10-26 18:36 ` Jakub Jelinek 8 siblings, 1 reply; 33+ messages in thread From: James Norris @ 2015-10-23 16:01 UTC (permalink / raw) To: GCC Patches; +Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek [-- Attachment #1: Type: text/plain, Size: 3367 bytes --] Hi, This a re-posting of the original note incorporating the suggestions from Joseph and Nathan (thank you). This patch adds the processing of OpenACC host_data construct in C and C++. (Note: Support in Fortran is already in trunk.) The patch also adds the required support in the middle-end and libgomp. Background The host data construct is used to make an address of device data available on the host. The following illustrates use of the host data construct in conjunction with arrays which are already device-resident and an accelerator-only function. int main(int argc, char **argv) { float *x, *y; const int n = 1024; int i; x = (float*) malloc (n * sizeof(float)); y = (float*) malloc (n * sizeof(float)); /* Copy the arrays out to the device. */ #pragma acc data create(x[0:n]) copyout(y[0:n]) { #pragma acc parallel { for (i = 0; i < n; i++) { x[i] = 1.0f; y[i] = 0.0f; } } /* * The arrays are already on the device, so * pass the device addresses to saxpy. NOTE: * saxpy has been previously defined as an * accelerator function. */ #pragma acc host_data use_device(x, y) { saxpy(n, 2.0, x, 1, y, 1); } } fprintf(stdout, "y[0] = %f\n", y[0]); return 0; } C and C++ front-ends Definitions for use by C and C++ were added to identify the host_data construct pragma and its' only valid clause: use_device. New functionality was added to do the parsing of the host_data pragma and validate the sole clause valid clause: use_device. As the host_data construct has associated with it a structured block, new functionality was added to build the compound statement to represent the block. Middle-end A gimple definition: GOVD_USE_DEVICE, has been added to indicate the use of the use_device clause. This flag is asserted as part of installing mappings into a omp context. The flag is subsequently reacted to during the gimplying of the host_data region's body. When this flag is encountered, an GOACC_deviceptr builtin call is inserted at the appropriate place. libgomp A new function has been added to handle pointer lookup for host data regions. As the comment in the code describes, this function will return the appropriate address based on whether it is called for the host or the target. This function is used in response to usage of the use_device clause. Tests New compile and runtime tests have been added. All of the code is in the gomp-4_0-branch. Regtested on x86_64-linux. Thanks! Jim [-- Attachment #2: ChangeLog --] [-- Type: text/plain, Size: 2075 bytes --] 2015-10-23 Julian Brown <julian@codesourcery.com> James Norris <jnorris@codesourcery.com> gcc/c-family/ * c-pragma.c (oacc_pragmas): Add host_data pragma definition. * c-pragma.h (enum pragma_kind): Add PRAGMA_OACC_HOST_DATA. (enum pragma_omp_clause): Add PRAGMA_OACC_CLAUSE_USE_DEVICE. gcc/c/ * c-parser.c (c_parser_omp_clause_name): Add handling of use_device clause. (c_parser_oacc_clause_use_device): New function. (c_parser_oacc_all_clauses): Handle PRAGMA_OACC_CLAUSE_USE_DEVICE. (OACC_HOST_DATA_CLAUSE_MASK): New definition. (c_parser_oacc_host_data): New function. (c_parser_omp_construct): Handle PRAGMA_OACC_HOST_DATA. * c-tree.h: Add definition for c_finish_oacc_host_data. * c-typeck.c (c_finish_oacc_host_data): New function. gcc/cp/ * cp-tree.h (finish_oacc_host_data): New function. * parser.c (cp_parser_omp_clause_name): Add handling of use_device clause. (cp_parser_oacc_all_clauses): Handle PRAGMA_OACC_CLAUSE_USE_DEVICE. (OACC_HOST_DATA_CLAUSE_MASK): New definition. (cp_parser_oacc_host_data): New function. (cp_parser_omp_construct): Handle PRAGMA_OACC_HOST_DATA. (cp_parser_pragma): Handle PRAGMA_OACC_HOST_DATA. * semantics.c (finish_omp_clauses): Hnadle OMP_CLAUSE_USE_DEVICE. (finish_oacc_host_data): New function. gcc/ * gimplify.c (enum gimplify_omp_var_data): Add GOVD_USE_DEVICE. (enum omp_region_type): Add ORT_HOST_DATA. (gimplify_scan_omp_clauses): Adjust handling of OMP_CLAUSE_USE_DEVICE. (gimpify_host_data, gimplify_host_data_1): New functions. (gimplify_expr): Handle OACC_HOST_DATA. * omp-builtins.def (BUILT_IN_GOACC_DEVICEPTR): New builtin. gcc/testsuite/ * c-c++-common/goacc/host_data-1.c: New test. * c-c++-common/goacc/host_data-2.c: Likewise. * c-c++-common/goacc/host_data-3.c: Likewise. * c-c++-common/goacc/host_data-4.c: Likewise. libgomp/ * libgomp.map (GOACC_2.0): Add GOACC_deviceptr. * oacc-mem.c (GOACC_deviceptr): New function. * testsuite/libgomp.oacc-c-c++-common/host_data-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/host_data-2.c: Likewise. [-- Attachment #3: host_data.patch --] [-- Type: text/x-patch, Size: 22426 bytes --] diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c index 834a916..b748e2f 100644 --- a/gcc/c-family/c-pragma.c +++ b/gcc/c-family/c-pragma.c @@ -1214,6 +1214,7 @@ static const struct omp_pragma_def oacc_pragmas[] = { { "data", PRAGMA_OACC_DATA }, { "enter", PRAGMA_OACC_ENTER_DATA }, { "exit", PRAGMA_OACC_EXIT_DATA }, + { "host_data", PRAGMA_OACC_HOST_DATA }, { "kernels", PRAGMA_OACC_KERNELS }, { "loop", PRAGMA_OACC_LOOP }, { "parallel", PRAGMA_OACC_PARALLEL }, diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index cec920f..23a72a3 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -31,6 +31,7 @@ enum pragma_kind { PRAGMA_OACC_DATA, PRAGMA_OACC_ENTER_DATA, PRAGMA_OACC_EXIT_DATA, + PRAGMA_OACC_HOST_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_LOOP, PRAGMA_OACC_PARALLEL, @@ -161,6 +162,7 @@ enum pragma_omp_clause { PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE, PRAGMA_OACC_CLAUSE_SELF, PRAGMA_OACC_CLAUSE_SEQ, + PRAGMA_OACC_CLAUSE_USE_DEVICE, PRAGMA_OACC_CLAUSE_VECTOR, PRAGMA_OACC_CLAUSE_VECTOR_LENGTH, PRAGMA_OACC_CLAUSE_WAIT, diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 704ebc6..ead98b9 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -10116,6 +10116,8 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -11219,6 +11221,15 @@ c_parser_oacc_clause_async (c_parser *parser, tree list) return list; } +/* OpenACC 2.0: + use_device ( variable-list ) */ + +static tree +c_parser_oacc_clause_use_device (c_parser *parser, tree list) +{ + return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_USE_DEVICE, list); +} + /* OpenACC: wait ( int-expr-list ) */ @@ -12474,6 +12485,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = c_parser_oacc_clause_use_device (parser, clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_VECTOR_LENGTH: clauses = c_parser_omp_clause_vector_length (parser, clauses); c_name = "vector_length"; @@ -13003,6 +13018,29 @@ c_parser_oacc_enter_exit_data (c_parser *parser, bool enter) /* OpenACC 2.0: + # pragma acc host_data oacc-data-clause[optseq] new-line + structured-block +*/ + +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +static tree +c_parser_oacc_host_data (location_t loc, c_parser *parser) +{ + tree stmt, clauses, block; + + clauses = c_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data"); + + block = c_begin_omp_parallel (); + add_stmt (c_parser_omp_structured_block (parser)); + stmt = c_finish_oacc_host_data (loc, clauses, block); + return stmt; +} + + +/* OpenACC 2.0: # pragma acc loop oacc-loop-clause[optseq] new-line structured-block @@ -16075,6 +16113,9 @@ c_parser_omp_construct (c_parser *parser) case PRAGMA_OACC_DATA: stmt = c_parser_oacc_data (loc, parser); break; + case PRAGMA_OACC_HOST_DATA: + stmt = c_parser_oacc_host_data (loc, parser); + break; case PRAGMA_OACC_KERNELS: strcpy (p_name, "#pragma acc"); stmt = c_parser_oacc_kernels (loc, parser, p_name); diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index bee03d3..a9c5975 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -643,6 +643,7 @@ extern tree c_expr_to_decl (tree, bool *, bool *); extern tree c_finish_oacc_parallel (location_t, tree, tree); extern tree c_finish_oacc_kernels (location_t, tree, tree); extern tree c_finish_oacc_data (location_t, tree, tree); +extern tree c_finish_oacc_host_data (location_t, tree, tree); extern tree c_begin_omp_parallel (void); extern tree c_finish_omp_parallel (location_t, tree, tree); extern tree c_begin_omp_task (void); diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index bc43602..a5e2a4a 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -11510,6 +11510,25 @@ c_finish_oacc_data (location_t loc, tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_HOST_DATA. */ + +tree +c_finish_oacc_host_data (location_t loc, tree clauses, tree block) +{ + tree stmt; + + block = c_end_compound_stmt (loc, block, true); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + SET_EXPR_LOCATION (stmt, loc); + + return add_stmt (stmt); +} + /* Like c_begin_compound_stmt, except force the retention of the BLOCK. */ tree @@ -12942,6 +12961,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_USE_DEVICE: pc = &OMP_CLAUSE_CHAIN (c); continue; diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 16db41f..76ece42 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6318,6 +6318,7 @@ extern void finish_omp_threadprivate (tree); extern tree begin_omp_structured_block (void); extern tree finish_omp_structured_block (tree); extern tree finish_oacc_data (tree, tree); +extern tree finish_oacc_host_data (tree, tree); extern tree finish_oacc_kernels (tree, tree); extern tree finish_oacc_parallel (tree, tree); extern tree begin_omp_parallel (void); diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index f07a5e4..714e69c 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -29235,6 +29235,8 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector_length", p)) @@ -31381,6 +31383,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, + clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_VECTOR_LENGTH: clauses = cp_parser_oacc_clause_vector_length (parser, clauses); c_name = "vector_length"; @@ -34221,6 +34228,30 @@ cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok) return stmt; } +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +/* OpenACC 2.0: + # pragma acc host_data <clauses> new-line + structured-block */ + +static tree +cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + unsigned int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data", pragma_tok); + + block = begin_omp_parallel (); + save = cp_parser_begin_omp_structured_block (parser); + cp_parser_statement (parser, NULL_TREE, false, NULL); + cp_parser_end_omp_structured_block (parser, save); + stmt = finish_oacc_host_data (clauses, block); + return stmt; +} + /* OpenACC 2.0: # pragma acc enter data oacc-enter-data-clause[optseq] new-line @@ -35288,6 +35319,9 @@ cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok) case PRAGMA_OACC_EXIT_DATA: stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, false); break; + case PRAGMA_OACC_HOST_DATA: + stmt = cp_parser_oacc_host_data (parser, pragma_tok); + break; case PRAGMA_OACC_KERNELS: stmt = cp_parser_oacc_kernels (parser, pragma_tok); break; @@ -35856,6 +35890,7 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context) case PRAGMA_OACC_DATA: case PRAGMA_OACC_ENTER_DATA: case PRAGMA_OACC_EXIT_DATA: + case PRAGMA_OACC_HOST_DATA: case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: case PRAGMA_OACC_LOOP: diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index c0a8b32..25482e7 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -6689,6 +6689,7 @@ finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE__CILK_FOR_COUNT_: + case OMP_CLAUSE_USE_DEVICE: break; case OMP_CLAUSE_INBRANCH: @@ -7119,6 +7120,24 @@ finish_oacc_data (tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. */ + +tree +finish_oacc_host_data (tree clauses, tree block) +{ + tree stmt; + + block = finish_omp_structured_block (block); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + + return add_stmt (stmt); +} + /* Generate OACC_KERNELS, with CLAUSES and BLOCK as its compound statement. LOC is the location of the OACC_KERNELS. */ diff --git a/gcc/gimplify.c b/gcc/gimplify.c index ab9e540..0c32219 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -93,6 +93,8 @@ enum gimplify_omp_var_data GOVD_MAP_0LEN_ARRAY = 32768, + GOVD_USE_DEVICE = 65536, + GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR | GOVD_LOCAL) @@ -116,7 +118,9 @@ enum omp_region_type ORT_COMBINED_TARGET = 33, /* Dummy OpenMP region, used to disable expansion of DECL_VALUE_EXPRs in taskloop pre body. */ - ORT_NONE = 64 + ORT_NONE = 64, + /* An OpenACC host-data region. */ + ORT_HOST_DATA = 128 }; /* Gimplify hashtable helper. */ @@ -6338,6 +6342,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, decl = TREE_OPERAND (decl, 0); } goto do_add_decl; + case OMP_CLAUSE_USE_DEVICE: + flags = GOVD_USE_DEVICE | GOVD_EXPLICIT; + check_non_private = "use_device"; + goto do_add; case OMP_CLAUSE_LINEAR: if (gimplify_expr (&OMP_CLAUSE_LINEAR_STEP (c), pre_p, NULL, is_gimple_val, fb_rvalue) == GS_ERROR) @@ -7005,7 +7013,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_INDEPENDENT: remove = true; break; @@ -7529,6 +7536,127 @@ gimplify_oacc_cache (tree *expr_p, gimple_seq *pre_p) *expr_p = NULL_TREE; } +static tree +gimplify_oacc_host_data_1 (tree *tp, int *walk_subtrees, + void *data ATTRIBUTE_UNUSED) +{ + splay_tree_node n = NULL; + location_t loc = EXPR_LOCATION (*tp); + + switch (TREE_CODE (*tp)) + { + case ADDR_EXPR: + { + tree decl = TREE_OPERAND (*tp, 0); + + switch (TREE_CODE (decl)) + { + case ARRAY_REF: + case ARRAY_RANGE_REF: + case COMPONENT_REF: + case VIEW_CONVERT_EXPR: + case REALPART_EXPR: + case IMAGPART_EXPR: + if (TREE_CODE (TREE_OPERAND (decl, 0)) == VAR_DECL) + n = splay_tree_lookup (gimplify_omp_ctxp->variables, + (splay_tree_key) TREE_OPERAND (decl, 0)); + break; + + case VAR_DECL: + n = splay_tree_lookup (gimplify_omp_ctxp->variables, + (splay_tree_key) decl); + break; + + default: + ; + } + + if (n != NULL && (n->value & GOVD_USE_DEVICE) != 0) + { + tree t = builtin_decl_explicit (BUILT_IN_GOACC_DEVICEPTR); + *tp = build_call_expr_loc (loc, t, 1, *tp); + } + + *walk_subtrees = 0; + } + break; + + case VAR_DECL: + { + tree decl = *tp; + + n = splay_tree_lookup (gimplify_omp_ctxp->variables, + (splay_tree_key) decl); + + if (n != NULL && (n->value & GOVD_USE_DEVICE) != 0) + { + if (!POINTER_TYPE_P (TREE_TYPE (decl))) + return decl; + + tree t = builtin_decl_explicit (BUILT_IN_GOACC_DEVICEPTR); + *tp = build_call_expr_loc (loc, t, 1, *tp); + *walk_subtrees = 0; + } + } + break; + + case OACC_PARALLEL: + case OACC_KERNELS: + case OACC_LOOP: + *walk_subtrees = 0; + break; + + default: + ; + } + + return NULL_TREE; +} + +static enum gimplify_status +gimplify_oacc_host_data (tree *expr_p, gimple_seq *pre_p) +{ + tree expr = *expr_p, orig_body; + gimple_seq body = NULL; + + gimplify_scan_omp_clauses (&OACC_HOST_DATA_CLAUSES (expr), pre_p, + ORT_HOST_DATA, OACC_HOST_DATA); + + orig_body = OACC_HOST_DATA_BODY (expr); + + /* Perform a pre-pass over the host_data region's body, inserting calls to + GOACC_deviceptr where appropriate. */ + + tree ret = walk_tree_without_duplicates (&orig_body, + &gimplify_oacc_host_data_1, 0); + + if (ret) + { + error_at (EXPR_LOCATION (expr), + "undefined use of variable %qE in host_data region", + DECL_NAME (ret)); + gimplify_adjust_omp_clauses (pre_p, &OACC_HOST_DATA_CLAUSES (expr), + OACC_HOST_DATA); + return GS_ERROR; + } + + push_gimplify_context (); + + gimple *g = gimplify_and_return_first (orig_body, &body); + + if (gimple_code (g) == GIMPLE_BIND) + pop_gimplify_context (g); + else + pop_gimplify_context (NULL); + + gimplify_adjust_omp_clauses (pre_p, &OACC_HOST_DATA_CLAUSES (expr), + OACC_HOST_DATA); + + gimplify_seq_add_stmt (pre_p, g); + + return GS_ALL_DONE; +} + /* Gimplify the contents of an OMP_PARALLEL statement. This involves gimplification of the body, as well as scanning the body for used variables. We need to do this scan now, because variable-sized @@ -9595,6 +9723,9 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, break; case OACC_HOST_DATA: + ret = gimplify_oacc_host_data (expr_p, pre_p); + break; + case OACC_DECLARE: sorry ("directive not yet implemented"); ret = GS_ALL_DONE; diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index ea9cf0d..9ed075f 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -47,6 +47,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr", + BT_FN_PTR_PTR, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_THREAD_NUM, "GOACC_get_thread_num", BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_NUM_THREADS, "GOACC_get_num_threads", diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-1.c b/gcc/testsuite/c-c++-common/goacc/host_data-1.c new file mode 100644 index 0000000..521c854 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-1.c @@ -0,0 +1,13 @@ +/* Test valid use of host_data directive. */ +/* { dg-do compile } */ + +int v0; +int v1[3][3]; + +void +f (void) +{ + int v2 = 3; +#pragma acc host_data use_device(v2, v0, v1) + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-2.c b/gcc/testsuite/c-c++-common/goacc/host_data-2.c new file mode 100644 index 0000000..e5213a0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-2.c @@ -0,0 +1,13 @@ +/* Test invalid use of host_data directive. */ +/* { dg-do compile } */ + +int v0; +#pragma acc host_data use_device(v0) /* { dg-error "expected" } */ + +void +f (void) +{ + int v2 = 3; +#pragma acc host_data copy(v2) /* { dg-error "not valid for" } */ + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-3.c b/gcc/testsuite/c-c++-common/goacc/host_data-3.c new file mode 100644 index 0000000..f9621c9 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-3.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ + +int main (int argc, char* argv[]) +{ + int x = 5, y; + + #pragma acc enter data copyin (x) + /* It's not clear what attempts to use non-pointer variables "directly" + (rather than merely taking their address) should do in host_data regions. + We choose to make it an error. */ + #pragma acc host_data use_device (x) /* TODO { dg-error "" } */ + { + y = x; + } + #pragma acc exit data delete (x) + + return y - 5; +} diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-4.c b/gcc/testsuite/c-c++-common/goacc/host_data-4.c new file mode 100644 index 0000000..3dac5f3 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ + +int main (int argc, char* argv[]) +{ + int x[100]; + + #pragma acc enter data copyin (x) + /* Specifying an array index is not valid for host_data/use_device. */ + #pragma acc host_data use_device (x[4]) /* { dg-error "expected '\\\)' before '\\\[' token" } */ + ; + #pragma acc exit data delete (x) + + return 0; +} diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 2153661..2a43a8c 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -378,6 +378,7 @@ GOACC_2.0 { GOACC_wait; GOACC_get_thread_num; GOACC_get_num_threads; + GOACC_deviceptr; }; GOACC_2.0.1 { diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index af067d6..497ab92 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -204,6 +204,38 @@ acc_deviceptr (void *h) return d; } +/* This function is used as a helper in generated code to implement pointer + lookup in host_data regions. Unlike acc_deviceptr, it returns its argument + unchanged on a shared-memory system (e.g. the host). */ + +void * +GOACC_deviceptr (void *h) +{ + splay_tree_key n; + void *d; + void *offset; + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + + if ((thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) == 0) + { + n = lookup_host (thr->dev, h, 1); + + if (!n) + return NULL; + + offset = h - n->host_start; + + d = n->tgt->tgt_start + n->tgt_offset + offset; + + return d; + } + else + return h; +} + /* Return the host pointer that corresponds to device data D. Or NULL if no mapping. */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c new file mode 100644 index 0000000..15ccb27 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c @@ -0,0 +1,125 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include <stdio.h> +#include <stdlib.h> +#include <openacc.h> +#include <cuda.h> +#include <cuda_runtime_api.h> +#include <cublas_v2.h> + +void +saxpy_host (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +#pragma acc routine +void +saxpy_target (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +int +main(int argc, char **argv) +{ + const int N = 8; + int i; + float *x_ref, *y_ref; + float *x, *y; + cublasHandle_t h; + float a = 2.0; + + x_ref = (float*) malloc (N * sizeof(float)); + y_ref = (float*) malloc (N * sizeof(float)); + + x = (float*) malloc (N * sizeof(float)); + y = (float*) malloc (N * sizeof(float)); + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { + float *xp, *yp; +#pragma acc host_data use_device (x, y) + { +#pragma acc parallel pcopy (xp, yp) present (x, y) + { + xp = x; + yp = y; + } + } + + if (xp != acc_deviceptr (x) || yp != acc_deviceptr (y)) + abort (); + } + + for (i = 0; i < N; i++) + { + x[i] = x_ref[i] = 4.0 + i; + y[i] = y_ref[i] = 3.0; + } + + saxpy_host (N, a, x_ref, y_ref); + + cublasCreate (&h); + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + +#pragma acc data create (x[0:N]) copyout (y[0:N]) + { +#pragma acc kernels + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + cublasDestroy (h); + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc data copyin (x[0:N]) copyin (a, N) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { +#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a, N) + saxpy_target (N, a, x, y); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c new file mode 100644 index 0000000..511ec64 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ + +#include <stdlib.h> + +struct by_lightning { + int a; + int b; + int c; +}; + +int main (int argc, char* argv[]) +{ + int x; + void *q = NULL, *r = NULL, *p = NULL, *s = NULL, *t = NULL; + long u; + struct by_lightning on_the_head = {1, 2, 3}; + int arr[10], *f = NULL; + _Complex float cf; + #pragma acc enter data copyin (x, arr, on_the_head, cf) + #pragma acc host_data use_device (x, arr, on_the_head, cf) + { + q = &x; + { + f = &arr[5]; + r = f; + s = &__real__ cf; + t = &on_the_head.c; + u = (long) &__imag__ cf; + #pragma acc parallel copyout(p) present (x, arr, on_the_head, cf) + { + /* This will not (and must not) call GOACC_deviceptr, but '&x' will be + the address on the device (if appropriate) regardless. */ + p = &x; + } + } + } + #pragma acc exit data delete (x) + +#if ACC_MEM_SHARED + if (q != &x || f != &arr[5] || r != f || s != &(__real__ cf) + || t != &on_the_head.c || u != (long) &(__imag__ cf) || p != &x) + abort (); +#else + if (q == &x || f == &arr[5] || r != f || s == &(__real__ cf) + || t == &on_the_head.c || u == (long) &(__imag__ cf) || p == &x) + abort (); +#endif + + return 0; +} ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Bulk] [OpenACC 0/7] host_data construct 2015-10-23 16:01 ` [Bulk] " James Norris @ 2015-10-26 18:36 ` Jakub Jelinek 2015-10-27 15:57 ` Cesar Philippidis ` (2 more replies) 0 siblings, 3 replies; 33+ messages in thread From: Jakub Jelinek @ 2015-10-26 18:36 UTC (permalink / raw) To: James Norris; +Cc: GCC Patches, Joseph S. Myers, Nathan Sidwell On Fri, Oct 23, 2015 at 10:51:42AM -0500, James Norris wrote: > @@ -12942,6 +12961,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd) > case OMP_CLAUSE_GANG: > case OMP_CLAUSE_WORKER: > case OMP_CLAUSE_VECTOR: > + case OMP_CLAUSE_USE_DEVICE: > pc = &OMP_CLAUSE_CHAIN (c); > continue; > Are there any restrictions on whether you can specify the same var multiple times in use_device clause? #pragma acc host_data use_device (x) use_device (x) use_device (y, y, y) ? If not, have you verified that the gimplifier doesn't ICE on it? Generally it doesn't like the same var being mentioned multiple times. If yes, you can use e.g. the generic_head bitmap for that and in any case, cover that with sufficient testsuite coverage. > diff --git a/gcc/gimplify.c b/gcc/gimplify.c > index ab9e540..0c32219 100644 > --- a/gcc/gimplify.c > +++ b/gcc/gimplify.c > @@ -93,6 +93,8 @@ enum gimplify_omp_var_data > > GOVD_MAP_0LEN_ARRAY = 32768, > > + GOVD_USE_DEVICE = 65536, > + > GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE > | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR > | GOVD_LOCAL) > @@ -116,7 +118,9 @@ enum omp_region_type > ORT_COMBINED_TARGET = 33, > /* Dummy OpenMP region, used to disable expansion of > DECL_VALUE_EXPRs in taskloop pre body. */ > - ORT_NONE = 64 > + ORT_NONE = 64, > + /* An OpenACC host-data region. */ > + ORT_HOST_DATA = 128 I'd prefer ORT_NONE to be the last one, can you just renumber it and put ORT_HOST_DATA before it? > +static tree > +gimplify_oacc_host_data_1 (tree *tp, int *walk_subtrees, > + void *data ATTRIBUTE_UNUSED) > +{ Your use_device sounds very similar to use_device_ptr clause in OpenMP, which is allowed on #pragma omp target data construct and is implemented quite a bit differently from this; it is unclear if the OpenACC standard requires this kind of implementation, or you just chose to implement it this way. In particular, the GOMP_target_data call puts the variables mentioned in the use_device_ptr clauses into the mapping structures (similarly how map clause appears) and the corresponding vars are privatized within the target data region (which is a host region, basically a fancy { } braces), where the private variables contain the offloading device's pointers. > + splay_tree_node n = NULL; > + location_t loc = EXPR_LOCATION (*tp); > + > + switch (TREE_CODE (*tp)) > + { > + case ADDR_EXPR: > + { > + tree decl = TREE_OPERAND (*tp, 0); > + > + switch (TREE_CODE (decl)) > + { > + case ARRAY_REF: > + case ARRAY_RANGE_REF: > + case COMPONENT_REF: > + case VIEW_CONVERT_EXPR: > + case REALPART_EXPR: > + case IMAGPART_EXPR: > + if (TREE_CODE (TREE_OPERAND (decl, 0)) == VAR_DECL) > + n = splay_tree_lookup (gimplify_omp_ctxp->variables, > + (splay_tree_key) TREE_OPERAND (decl, 0)); > + break; I must say this looks really strange, you throw away all the offsets embedded in the component codes (fixed or variable). Where comes the above list? What about other components (say bit field refs, etc.)? > + case VAR_DECL: What is so special about VAR_DECLs? Shouldn't PARM_DECLs / RESULT_DECLs be treated the same way? > --- a/libgomp/libgomp.map > +++ b/libgomp/libgomp.map > @@ -378,6 +378,7 @@ GOACC_2.0 { > GOACC_wait; > GOACC_get_thread_num; > GOACC_get_num_threads; > + GOACC_deviceptr; > }; > > GOACC_2.0.1 { You shouldn't be adding new symbols into a symbol version that appeared in a compiler that shipped already (GCC 5 already had GOACC_2.0 symbols). So it should go into GOACC_2.0.1. > diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c > index af067d6..497ab92 100644 > --- a/libgomp/oacc-mem.c > +++ b/libgomp/oacc-mem.c > @@ -204,6 +204,38 @@ acc_deviceptr (void *h) > return d; > } > > +/* This function is used as a helper in generated code to implement pointer > + lookup in host_data regions. Unlike acc_deviceptr, it returns its argument > + unchanged on a shared-memory system (e.g. the host). */ > + > +void * > +GOACC_deviceptr (void *h) > +{ > + splay_tree_key n; > + void *d; > + void *offset; > + > + goacc_lazy_initialize (); > + > + struct goacc_thread *thr = goacc_thread (); > + > + if ((thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) == 0) > + { > + n = lookup_host (thr->dev, h, 1); What is supposed to be the behavior when the h pointer points at object boundary, rather than into the middle of existing mapped object? Say you have: char a[16], b[0], c[16]; // b is GCC extension Now, char *p = &a[5]; is unambiguous, either a is mapped, or not. But, if p = &a[16];, then it could be either the one-past-last byte in a, or it could be the start of b (== one-past-last byte in b) or it could be the pointer to start of c. In OpenMP 4.5, I had endless discussions about this and the end result is that one-past-last byte addresses are unspecified behavior Jakub ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: Re: [Bulk] [OpenACC 0/7] host_data construct 2015-10-26 18:36 ` Jakub Jelinek @ 2015-10-27 15:57 ` Cesar Philippidis 2015-11-02 18:33 ` Julian Brown 2015-12-23 11:02 ` Thomas Schwinge 2 siblings, 0 replies; 33+ messages in thread From: Cesar Philippidis @ 2015-10-27 15:57 UTC (permalink / raw) To: Jakub Jelinek, James Norris; +Cc: GCC Patches, Joseph S. Myers, Nathan Sidwell On 10/26/2015 11:34 AM, Jakub Jelinek wrote: > On Fri, Oct 23, 2015 at 10:51:42AM -0500, James Norris wrote: >> @@ -12942,6 +12961,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd) >> case OMP_CLAUSE_GANG: >> case OMP_CLAUSE_WORKER: >> case OMP_CLAUSE_VECTOR: >> + case OMP_CLAUSE_USE_DEVICE: >> pc = &OMP_CLAUSE_CHAIN (c); >> continue; >> > > Are there any restrictions on whether you can specify the same var multiple > times in use_device clause? > #pragma acc host_data use_device (x) use_device (x) use_device (y, y, y) > ? > If not, have you verified that the gimplifier doesn't ICE on it? Generally > it doesn't like the same var being mentioned multiple times. > If yes, you can use e.g. the generic_head bitmap for that and in any case, > cover that with sufficient testsuite coverage. Generally variables cannot appear in multiple clauses. I'll add more testing for this. >> diff --git a/gcc/gimplify.c b/gcc/gimplify.c >> index ab9e540..0c32219 100644 >> --- a/gcc/gimplify.c >> +++ b/gcc/gimplify.c >> @@ -93,6 +93,8 @@ enum gimplify_omp_var_data >> >> GOVD_MAP_0LEN_ARRAY = 32768, >> >> + GOVD_USE_DEVICE = 65536, >> + >> GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE >> | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR >> | GOVD_LOCAL) >> @@ -116,7 +118,9 @@ enum omp_region_type >> ORT_COMBINED_TARGET = 33, >> /* Dummy OpenMP region, used to disable expansion of >> DECL_VALUE_EXPRs in taskloop pre body. */ >> - ORT_NONE = 64 >> + ORT_NONE = 64, >> + /* An OpenACC host-data region. */ >> + ORT_HOST_DATA = 128 > > I'd prefer ORT_NONE to be the last one, can you just renumber it and put > ORT_HOST_DATA before it? OK. >> +static tree >> +gimplify_oacc_host_data_1 (tree *tp, int *walk_subtrees, >> + void *data ATTRIBUTE_UNUSED) >> +{ > > Your use_device sounds very similar to use_device_ptr clause in OpenMP, > which is allowed on #pragma omp target data construct and is implemented > quite a bit differently from this; it is unclear if the OpenACC standard > requires this kind of implementation, or you just chose to implement it this > way. In particular, the GOMP_target_data call puts the variables mentioned > in the use_device_ptr clauses into the mapping structures (similarly how > map clause appears) and the corresponding vars are privatized within the > target data region (which is a host region, basically a fancy { } braces), > where the private variables contain the offloading device's pointers. Is this a new OpenMP 4.5 feature? I'll take a closer look and see if they are similar enough. I also noticed that OpenMP 4.5 has something similar to OpenACC's enter/exit data construct now. >> + splay_tree_node n = NULL; >> + location_t loc = EXPR_LOCATION (*tp); >> + >> + switch (TREE_CODE (*tp)) >> + { >> + case ADDR_EXPR: >> + { >> + tree decl = TREE_OPERAND (*tp, 0); >> + >> + switch (TREE_CODE (decl)) >> + { >> + case ARRAY_REF: >> + case ARRAY_RANGE_REF: >> + case COMPONENT_REF: >> + case VIEW_CONVERT_EXPR: >> + case REALPART_EXPR: >> + case IMAGPART_EXPR: >> + if (TREE_CODE (TREE_OPERAND (decl, 0)) == VAR_DECL) >> + n = splay_tree_lookup (gimplify_omp_ctxp->variables, >> + (splay_tree_key) TREE_OPERAND (decl, 0)); >> + break; > > I must say this looks really strange, you throw away all the offsets > embedded in the component codes (fixed or variable). > Where comes the above list? What about other components (say bit field refs, > etc.)? I'm not sure. This is one of those things where multiple developers worked on it, and the history got lost. I'll investigate it. >> + case VAR_DECL: > > What is so special about VAR_DECLs? Shouldn't PARM_DECLs / RESULT_DECLs > be treated the same way? >> --- a/libgomp/libgomp.map >> +++ b/libgomp/libgomp.map >> @@ -378,6 +378,7 @@ GOACC_2.0 { >> GOACC_wait; >> GOACC_get_thread_num; >> GOACC_get_num_threads; >> + GOACC_deviceptr; >> }; >> >> GOACC_2.0.1 { > > You shouldn't be adding new symbols into a symbol version that appeared in a > compiler that shipped already (GCC 5 already had GOACC_2.0 symbols). > So it should go into GOACC_2.0.1. OK. >> diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c >> index af067d6..497ab92 100644 >> --- a/libgomp/oacc-mem.c >> +++ b/libgomp/oacc-mem.c >> @@ -204,6 +204,38 @@ acc_deviceptr (void *h) >> return d; >> } >> >> +/* This function is used as a helper in generated code to implement pointer >> + lookup in host_data regions. Unlike acc_deviceptr, it returns its argument >> + unchanged on a shared-memory system (e.g. the host). */ >> + >> +void * >> +GOACC_deviceptr (void *h) >> +{ >> + splay_tree_key n; >> + void *d; >> + void *offset; >> + >> + goacc_lazy_initialize (); >> + >> + struct goacc_thread *thr = goacc_thread (); >> + >> + if ((thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) == 0) >> + { >> + n = lookup_host (thr->dev, h, 1); > > What is supposed to be the behavior when the h pointer points at object > boundary, rather than into the middle of existing mapped object? Probably undefined with the way that OpenACC is defined. > Say you have: > char a[16], b[0], c[16]; // b is GCC extension > Now, char *p = &a[5]; is unambiguous, either a is mapped, or not. > But, if p = &a[16];, then it could be either the one-past-last byte in a, > or it could be the start of b (== one-past-last byte in b) or it could be > the pointer to start of c. > > In OpenMP 4.5, I had endless discussions about this and the end result is > that one-past-last byte addresses are unspecified behavior OK. Thanks for you feedback. Cesar ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Bulk] [OpenACC 0/7] host_data construct 2015-10-26 18:36 ` Jakub Jelinek 2015-10-27 15:57 ` Cesar Philippidis @ 2015-11-02 18:33 ` Julian Brown 2015-11-02 19:29 ` Jakub Jelinek ` (2 more replies) 2015-12-23 11:02 ` Thomas Schwinge 2 siblings, 3 replies; 33+ messages in thread From: Julian Brown @ 2015-11-02 18:33 UTC (permalink / raw) To: Jakub Jelinek; +Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell On Mon, 26 Oct 2015 19:34:22 +0100 Jakub Jelinek <jakub@redhat.com> wrote: > Your use_device sounds very similar to use_device_ptr clause in > OpenMP, which is allowed on #pragma omp target data construct and is > implemented quite a bit differently from this; it is unclear if the > OpenACC standard requires this kind of implementation, or you just > chose to implement it this way. In particular, the GOMP_target_data > call puts the variables mentioned in the use_device_ptr clauses into > the mapping structures (similarly how map clause appears) and the > corresponding vars are privatized within the target data region > (which is a host region, basically a fancy { } braces), where the > private variables contain the offloading device's pointers. As the author of the original patch, I have to say using the mapping structures seems like a far better approach, but I've hit some trouble with the details of adapting OpenACC to use that method. Firstly, on trunk at least, use_device_ptr variables are restricted to pointer or array types: that restriction doesn't exist in OpenACC, nor actually could I find it in the OpenMP 4.1 document (my guess is the standards are supposed to match in this regard). I think that a program such as this should work: void target_fn (int *targ_data); int main (int argc, char *argv[]) { char out; int myvar; #pragma omp target enter data map(to: myvar) #pragma omp target data use_device_ptr(myvar) map(from:out) { target_fn (&myvar); out = 5; } return 0; } "myvar" would have its address taken in the use_device_ptr region, and places where the corresponding mapped variable has its address taken would be replaced by a direct use of the mapped pointer. (Or is that not a well-formed thing to do, in general?). This fails with "error: 'use_device_ptr' variable is neither a pointer nor an array". Secondly, attempts to use use_device_ptr on (e.g. dynamically-allocated) arrays accessed through a pointer cause an ICE with the existing trunk OpenMP code: #include <stdlib.h> void target_fn (char *targ_data); int main (int argc, char *argv[]) { char *myarr, out; myarr = malloc (1024); #pragma omp target data map(to: myarr[0:1024]) { #pragma omp target data use_device_ptr(myarr) map(from:out) { target_fn (myarr); out = 5; } } return 0; } udp3.c: In function 'main': udp3.c:6:1: internal compiler error: in make_decl_rtl, at varasm.c:1298 main (int argc, char *argv[]) ^ 0x111256b make_decl_rtl(tree_node*) /scratch/jbrown/openacc-trunk/src/gcc-mainline/gcc/varasm.c:1294 0x9ea005 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) /scratch/jbrown/openacc-trunk/src/gcc-mainline/gcc/expr.c:9559 0x9e31c2 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool) /scratch/jbrown/openacc-trunk/src/gcc-mainline/gcc/expr.c:7892 0x9cb4ae expand_expr /scratch/jbrown/openacc-trunk/src/gcc-mainline/gcc/expr.h:255 0x9d907d expand_assignment(tree_node*, tree_node*, bool) /scratch/jbrown/openacc-trunk/src/gcc-mainline/gcc/expr.c:5089 0x89e219 expand_gimple_stmt_1 /scratch/jbrown/openacc-trunk/src/gcc-mainline/gcc/cfgexpand.c:3576 0x89e60d expand_gimple_stmt /scratch/jbrown/openacc-trunk/src/gcc-mainline/gcc/cfgexpand.c:3672 0x8a5773 expand_gimple_basic_block /scratch/jbrown/openacc-trunk/src/gcc-mainline/gcc/cfgexpand.c:5676 0x8a72d4 execute /scratch/jbrown/openacc-trunk/src/gcc-mainline/gcc/cfgexpand.c:6288 Furthermore, this looks strange to me (006t.omplower): .omp_data_arr.5.out = &out; myarr.8 = myarr; .omp_data_arr.5.myarr = myarr.8; #pragma omp target data map(from:out [len: 1]) use_device_ptr(myarr) { D.2436 = .omp_data_arr.5.myarr; myarr = D.2436; That's clobbering the original myarr variable, right? Any clues on these two? The omp-low.c code is rather opaque to me... Thanks, Julian ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Bulk] [OpenACC 0/7] host_data construct 2015-11-02 18:33 ` Julian Brown @ 2015-11-02 19:29 ` Jakub Jelinek 2015-11-12 11:16 ` Julian Brown 2015-11-13 15:31 ` [Bulk] " Jakub Jelinek 2 siblings, 0 replies; 33+ messages in thread From: Jakub Jelinek @ 2015-11-02 19:29 UTC (permalink / raw) To: Julian Brown; +Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell On Mon, Nov 02, 2015 at 06:33:39PM +0000, Julian Brown wrote: > As the author of the original patch, I have to say using the mapping > structures seems like a far better approach, but I've hit some trouble > with the details of adapting OpenACC to use that method. > > Firstly, on trunk at least, use_device_ptr variables are restricted to > pointer or array types: that restriction doesn't exist in OpenACC, nor > actually could I find it in the OpenMP 4.1 document (my guess is the ^^ 4.5 ;) > standards are supposed to match in this regard). I think that a program > such as this should work: > > void target_fn (int *targ_data); > > int > main (int argc, char *argv[]) > { > char out; > int myvar; > #pragma omp target enter data map(to: myvar) > > #pragma omp target data use_device_ptr(myvar) map(from:out) > { > target_fn (&myvar); > out = 5; > } > > return 0; > } You are right, I've been misreading the standard (the only testcases that have been provided with the change were using arrays and so it was non-obvious that it relies on array to pointer decay). I'll work on changing the implementation accordingly tomorrow. Jakub ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-11-02 18:33 ` Julian Brown 2015-11-02 19:29 ` Jakub Jelinek @ 2015-11-12 11:16 ` Julian Brown 2015-11-18 12:48 ` Julian Brown 2015-11-13 15:31 ` [Bulk] " Jakub Jelinek 2 siblings, 1 reply; 33+ messages in thread From: Julian Brown @ 2015-11-12 11:16 UTC (permalink / raw) To: Jakub Jelinek; +Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell [-- Attachment #1: Type: text/plain, Size: 5124 bytes --] On Mon, 2 Nov 2015 18:33:39 +0000 Julian Brown <julian@codesourcery.com> wrote: > On Mon, 26 Oct 2015 19:34:22 +0100 > Jakub Jelinek <jakub@redhat.com> wrote: > > > Your use_device sounds very similar to use_device_ptr clause in > > OpenMP, which is allowed on #pragma omp target data construct and is > > implemented quite a bit differently from this; it is unclear if the > > OpenACC standard requires this kind of implementation, or you just > > chose to implement it this way. In particular, the GOMP_target_data > > call puts the variables mentioned in the use_device_ptr clauses into > > the mapping structures (similarly how map clause appears) and the > > corresponding vars are privatized within the target data region > > (which is a host region, basically a fancy { } braces), where the > > private variables contain the offloading device's pointers. > > As the author of the original patch, I have to say using the mapping > structures seems like a far better approach, but I've hit some trouble > with the details of adapting OpenACC to use that method. Here's a version of the patch which (hopefully) brings OpenACC on par with OpenMP with respect to use_device/use_device_ptr variables. The implementation is essentially the same now for OpenACC as for OpenMP (i.e. using mapping structures): so for now, only array or pointer variables can be used as use_device variables. The included tests have been adjusted accordingly. One awkward part of the implementation concerns nesting offloaded regions within host_data regions: #define N 1024 int main (int argc, char* argv[]) { int x[N]; #pragma acc data copyin (x[0:N]) { int *xp; #pragma acc host_data use_device (x) { [...] #pragma acc parallel present (x) copyout (xp) { xp = x; } } assert (xp == acc_deviceptr (x)); } return 0; } I think the meaning of 'x' as seen within the clauses of the parallel directive should be the *host* version of x, not the mapped target address (I've asked on the OpenACC technical mailing list to clarify this point, but no reply as yet). The changes to {maybe_,}lookup_decl_in_outer_ctx "skip over" host_data contexts when called from lower_omp_target. There's probably an analogous case for OpenMP, but I've not tried to handle that. No regressions for libgomp tests, and the new tests pass. OK for trunk? Thanks, Julian ChangeLog Julian Brown <julian@codesourcery.com> Cesar Philippidis <cesar@codesourcery.com> James Norris <James_Norris@mentor.com> gcc/ * c-family/c-pragma.c (oacc_pragmas): Add PRAGMA_OACC_HOST_DATA. * c-family/c-pragma.h (pragma_kind): Add PRAGMA_OACC_HOST_DATA. (pragma_omp_clause): Add PRAGMA_OACC_CLAUSE_USE_DEVICE. * c/c-parser.c (c_parser_omp_clause_name): Add use_device support. (c_parser_oacc_clause_use_device): New function. (c_parser_oacc_all_clauses): Add use_device support. (OACC_HOST_DATA_CLAUSE_MASK): New macro. (c_parser_oacc_host_data): New function. (c_parser_omp_construct): Add host_data support. * c/c-tree.h (c_finish_oacc_host_data): Add prototype. * c/c-typeck.c (c_finish_oacc_host_data): New function. (c_finish_omp_clauses): Add use_device support. * cp/cp-tree.h (finish_oacc_host_data): Add prototype. * cp/parser.c (cp_parser_omp_clause_name): Add use_device support. (cp_parser_oacc_all_clauses): Add use_device support. (OACC_HOST_DATA_CLAUSE_MASK): New macro. (cp_parser_oacc_host_data): New function. (cp_parser_omp_construct): Add host_data support. (cp_parser_pragma): Add host_data support. * cp/semantics.c (finish_omp_clauses): Add use_device support. (finish_oacc_host_data): New function. * gimple-pretty-print.c (dump_gimple_omp_target): Add host_data support. * gimple.h (gf_mask): Add GF_OMP_TARGET_KIND_OACC_HOST_DATA. (is_gimple_omp_oacc): Add support for above. * gimplify.c (gimplify_scan_omp_clauses): Add host_data, use_device support. (gimplify_omp_workshare): Add host_data support. (gimplify_expr): Likewise. * omp-builtins.def (BUILT_IN_GOACC_HOST_DATA): New. * omp-low.c (lookup_decl_in_outer_ctx) (maybe_lookup_decl_in_outer_ctx): Add optional argument to skip host_data regions. (scan_sharing_clauses): Support use_device. (check_omp_nesting_restrictions): Support host_data. (expand_omp_target): Support host_data. (lower_omp_target): Skip over outer host_data regions when looking up decls. Support use_device. (make_gimple_omp_edges): Support host_data. * tree-nested.c (convert_nonlocal_omp_clauses): Add use_device clause. libgomp/ * oacc-parallel.c (GOACC_host_data): New function. * libgomp.map (GOACC_host_data): Add to GOACC_2.0.1. * testsuite/libgomp.oacc-c-c++-common/host_data-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/host_data-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/host_data-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/host_data-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/host_data-5.c: New test. [-- Attachment #2: fsf-host-data-3.diff --] [-- Type: text/x-patch, Size: 29811 bytes --] commit ac4269627c5b3f5d5c20fab7517c066ae6dfce74 Author: Julian Brown <julian@codesourcery.com> Date: Mon Nov 2 06:31:47 2015 -0800 OpenACC host_data support using mapping regions. diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c index f86ed38..3b30191 100644 --- a/gcc/c-family/c-pragma.c +++ b/gcc/c-family/c-pragma.c @@ -1250,6 +1250,7 @@ static const struct omp_pragma_def oacc_pragmas[] = { { "data", PRAGMA_OACC_DATA }, { "enter", PRAGMA_OACC_ENTER_DATA }, { "exit", PRAGMA_OACC_EXIT_DATA }, + { "host_data", PRAGMA_OACC_HOST_DATA }, { "kernels", PRAGMA_OACC_KERNELS }, { "loop", PRAGMA_OACC_LOOP }, { "parallel", PRAGMA_OACC_PARALLEL }, diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index afeceff..2ad7356 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -32,6 +32,7 @@ enum pragma_kind { PRAGMA_OACC_DATA, PRAGMA_OACC_ENTER_DATA, PRAGMA_OACC_EXIT_DATA, + PRAGMA_OACC_HOST_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_LOOP, PRAGMA_OACC_PARALLEL, @@ -165,6 +166,7 @@ enum pragma_omp_clause { PRAGMA_OACC_CLAUSE_SELF, PRAGMA_OACC_CLAUSE_SEQ, PRAGMA_OACC_CLAUSE_TILE, + PRAGMA_OACC_CLAUSE_USE_DEVICE, PRAGMA_OACC_CLAUSE_VECTOR, PRAGMA_OACC_CLAUSE_VECTOR_LENGTH, PRAGMA_OACC_CLAUSE_WAIT, diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 2484b92..8b048a3 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -10139,6 +10139,8 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -11485,6 +11487,15 @@ c_parser_oacc_clause_tile (c_parser *parser, tree list) return c; } +/* OpenACC 2.0: + use_device ( variable-list ) */ + +static tree +c_parser_oacc_clause_use_device (c_parser *parser, tree list) +{ + return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_USE_DEVICE, list); +} + /* OpenACC: wait ( int-expr-list ) */ @@ -12786,6 +12797,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = c_parser_oacc_clause_use_device (parser, clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_SEQ: clauses = c_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, clauses); @@ -13280,6 +13295,29 @@ c_parser_oacc_enter_exit_data (c_parser *parser, bool enter) /* OpenACC 2.0: + # pragma acc host_data oacc-data-clause[optseq] new-line + structured-block +*/ + +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +static tree +c_parser_oacc_host_data (location_t loc, c_parser *parser) +{ + tree stmt, clauses, block; + + clauses = c_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data"); + + block = c_begin_omp_parallel (); + add_stmt (c_parser_omp_structured_block (parser)); + stmt = c_finish_oacc_host_data (loc, clauses, block); + return stmt; +} + + +/* OpenACC 2.0: # pragma acc loop oacc-loop-clause[optseq] new-line structured-block @@ -16573,6 +16611,9 @@ c_parser_omp_construct (c_parser *parser) case PRAGMA_OACC_DATA: stmt = c_parser_oacc_data (loc, parser); break; + case PRAGMA_OACC_HOST_DATA: + stmt = c_parser_oacc_host_data (loc, parser); + break; case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: strcpy (p_name, "#pragma acc"); diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index 04991f7..f332661 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -642,6 +642,7 @@ extern tree c_finish_goto_ptr (location_t, tree); extern tree c_expr_to_decl (tree, bool *, bool *); extern tree c_finish_omp_construct (location_t, enum tree_code, tree, tree); extern tree c_finish_oacc_data (location_t, tree, tree); +extern tree c_finish_oacc_host_data (location_t, tree, tree); extern tree c_begin_omp_parallel (void); extern tree c_finish_omp_parallel (location_t, tree, tree); extern tree c_begin_omp_task (void); diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index 4335a87..12edfba 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -11541,6 +11541,25 @@ c_finish_oacc_data (location_t loc, tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_HOST_DATA. */ + +tree +c_finish_oacc_host_data (location_t loc, tree clauses, tree block) +{ + tree stmt; + + block = c_end_compound_stmt (loc, block, true); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + SET_EXPR_LOCATION (stmt, loc); + + return add_stmt (stmt); +} + /* Like c_begin_compound_stmt, except force the retention of the BLOCK. */ tree @@ -12981,6 +13000,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd) bitmap_set_bit (&map_head, DECL_UID (t)); goto check_dup_generic; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE_USE_DEVICE_PTR: t = OMP_CLAUSE_DECL (c); diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 828f268..11bd663 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6317,6 +6317,7 @@ extern void finish_omp_threadprivate (tree); extern tree begin_omp_structured_block (void); extern tree finish_omp_structured_block (tree); extern tree finish_oacc_data (tree, tree); +extern tree finish_oacc_host_data (tree, tree); extern tree finish_omp_construct (enum tree_code, tree, tree); extern tree begin_omp_parallel (void); extern tree finish_omp_parallel (tree, tree); diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index a87675e..20c19b1 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -29262,6 +29262,8 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -31614,6 +31616,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, + clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_SEQ: clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, clauses, here); @@ -34525,6 +34532,30 @@ cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok) return stmt; } +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +/* OpenACC 2.0: + # pragma acc host_data <clauses> new-line + structured-block */ + +static tree +cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + unsigned int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data", pragma_tok); + + block = begin_omp_parallel (); + save = cp_parser_begin_omp_structured_block (parser); + cp_parser_statement (parser, NULL_TREE, false, NULL); + cp_parser_end_omp_structured_block (parser, save); + stmt = finish_oacc_host_data (clauses, block); + return stmt; +} + /* OpenACC 2.0: # pragma acc enter data oacc-enter-data-clause[optseq] new-line @@ -35789,6 +35820,9 @@ cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok) case PRAGMA_OACC_EXIT_DATA: stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, false); break; + case PRAGMA_OACC_HOST_DATA: + stmt = cp_parser_oacc_host_data (parser, pragma_tok); + break; case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: strcpy (p_name, "#pragma acc"); @@ -36363,6 +36397,7 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context) case PRAGMA_OACC_DATA: case PRAGMA_OACC_ENTER_DATA: case PRAGMA_OACC_EXIT_DATA: + case PRAGMA_OACC_HOST_DATA: case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: case PRAGMA_OACC_LOOP: diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index db37e85..36a1b25 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -6810,6 +6810,7 @@ finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd) } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE_USE_DEVICE_PTR: field_ok = allow_fields; @@ -7365,6 +7366,24 @@ finish_oacc_data (tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. */ + +tree +finish_oacc_host_data (tree clauses, tree block) +{ + tree stmt; + + block = finish_omp_structured_block (block); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + + return add_stmt (stmt); +} + /* Generate OMP construct CODE, with BODY and CLAUSES as its compound statement. */ diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index 7b50cdf..c148c3c 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -1353,6 +1353,9 @@ dump_gimple_omp_target (pretty_printer *buffer, gomp_target *gs, case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: kind = " oacc_enter_exit_data"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + kind = " oacc_host_data"; + break; default: gcc_unreachable (); } diff --git a/gcc/gimple.h b/gcc/gimple.h index 781801b..c88da95 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -170,6 +170,7 @@ enum gf_mask { GF_OMP_TARGET_KIND_OACC_DATA = 7, GF_OMP_TARGET_KIND_OACC_UPDATE = 8, GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 9, + GF_OMP_TARGET_KIND_OACC_HOST_DATA = 10, /* True on an GIMPLE_OMP_RETURN statement if the return does not require a thread synchronization via some sort of barrier. The exact barrier @@ -6004,6 +6005,7 @@ is_gimple_omp_oacc (const gimple *stmt) case GF_OMP_TARGET_KIND_OACC_DATA: case GF_OMP_TARGET_KIND_OACC_UPDATE: case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: return true; default: return false; diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 66e5168..1259061 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -6294,6 +6294,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_TARGET_DATA: case OMP_TARGET_ENTER_DATA: case OMP_TARGET_EXIT_DATA: + case OACC_HOST_DATA: ctx->target_firstprivatize_array_bases = true; default: break; @@ -6559,6 +6560,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_TARGET_DATA: case OMP_TARGET_ENTER_DATA: case OMP_TARGET_EXIT_DATA: + case OACC_HOST_DATA: if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER || (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) @@ -6968,6 +6970,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } goto do_notice; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: flags = GOVD_FIRSTPRIVATE | GOVD_EXPLICIT; goto do_add; @@ -7203,7 +7206,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: remove = true; break; @@ -8961,6 +8963,9 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) case OMP_TEAMS: ort = OMP_TEAMS_COMBINED (expr) ? ORT_COMBINED_TEAMS : ORT_TEAMS; break; + case OACC_HOST_DATA: + ort = ORT_TARGET_DATA; + break; default: gcc_unreachable (); } @@ -8982,6 +8987,7 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) switch (TREE_CODE (expr)) { case OACC_DATA: + case OACC_HOST_DATA: end_ix = BUILT_IN_GOACC_DATA_END; break; case OMP_TARGET_DATA: @@ -9013,6 +9019,10 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_KERNELS, OMP_CLAUSES (expr)); break; + case OACC_HOST_DATA: + stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_HOST_DATA, + OMP_CLAUSES (expr)); + break; case OACC_PARALLEL: stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_PARALLEL, OMP_CLAUSES (expr)); @@ -10122,12 +10132,12 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, ret = GS_ALL_DONE; break; - case OACC_HOST_DATA: case OACC_DECLARE: sorry ("directive not yet implemented"); ret = GS_ALL_DONE; break; + case OACC_HOST_DATA: case OACC_DATA: case OACC_KERNELS: case OACC_PARALLEL: diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index 0b6bd58..109d374 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -47,6 +47,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_HOST_DATA, "GOACC_host_data", + BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 51b471c..0bb993f 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -390,8 +390,8 @@ scan_omp_op (tree *tp, omp_context *ctx) } static void lower_omp (gimple_seq *, omp_context *); -static tree lookup_decl_in_outer_ctx (tree, omp_context *); -static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *); +static tree lookup_decl_in_outer_ctx (tree, omp_context *, bool = false); +static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *, bool = false); /* Find an OMP clause of type KIND within CLAUSES. */ @@ -1935,6 +1935,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) install_var_local (decl, ctx); break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: decl = OMP_CLAUSE_DECL (c); if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) @@ -2134,7 +2135,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE__CACHE_: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: @@ -2288,6 +2288,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_NOGROUP: case OMP_CLAUSE_DEFAULTMAP: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE__CILK_FOR_COUNT_: case OMP_CLAUSE_ASYNC: @@ -2302,7 +2303,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE__CACHE_: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: @@ -3608,6 +3608,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break; case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: stmt_name = "enter/exit data"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data"; + break; default: gcc_unreachable (); } switch (gimple_omp_target_kind (ctx->stmt)) @@ -3619,6 +3621,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) case GF_OMP_TARGET_KIND_OACC_KERNELS: ctx_stmt_name = "kernels"; break; case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + ctx_stmt_name = "host_data"; break; default: gcc_unreachable (); } @@ -3941,13 +3945,22 @@ maybe_lookup_ctx (gimple *stmt) parallelism happens only rarely. */ static tree -lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) +lookup_decl_in_outer_ctx (tree decl, omp_context *ctx, + bool skip_hostdata) { tree t; omp_context *up; for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) - t = maybe_lookup_decl (decl, up); + { + if (skip_hostdata + && gimple_code (up->stmt) == GIMPLE_OMP_TARGET + && gimple_omp_target_kind (up->stmt) + == GF_OMP_TARGET_KIND_OACC_HOST_DATA) + continue; + + t = maybe_lookup_decl (decl, up); + } gcc_assert (!ctx->is_nested || t || is_global_var (decl)); @@ -3959,13 +3972,22 @@ lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) in outer contexts. */ static tree -maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) +maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx, + bool skip_hostdata) { tree t = NULL; omp_context *up; for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) - t = maybe_lookup_decl (decl, up); + { + if (skip_hostdata + && gimple_code (up->stmt) == GIMPLE_OMP_TARGET + && gimple_omp_target_kind (up->stmt) + == GF_OMP_TARGET_KIND_OACC_HOST_DATA) + continue; + + t = maybe_lookup_decl (decl, up); + } return t ? t : decl; } @@ -12458,6 +12480,7 @@ expand_omp_target (struct omp_region *region) break; case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: data_region = true; break; default: @@ -12697,6 +12720,9 @@ expand_omp_target (struct omp_region *region) case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + start_ix = BUILT_IN_GOACC_HOST_DATA; + break; default: gcc_unreachable (); } @@ -12820,6 +12846,7 @@ expand_omp_target (struct omp_region *region) { case BUILT_IN_GOACC_DATA_START: case BUILT_IN_GOMP_TARGET_DATA: + case BUILT_IN_GOACC_HOST_DATA: break; case BUILT_IN_GOMP_TARGET: case BUILT_IN_GOMP_TARGET_UPDATE: @@ -13127,6 +13154,7 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent, case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: break; case GF_OMP_TARGET_KIND_UPDATE: case GF_OMP_TARGET_KIND_ENTER_DATA: @@ -14920,6 +14948,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) break; case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: data_region = true; break; default: @@ -15025,7 +15054,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE) { - if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)) + if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx, + true)) && varpool_node::get_create (var)->offloadable) continue; @@ -15124,6 +15154,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: var = OMP_CLAUSE_DECL (c); @@ -15262,7 +15293,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) talign = DECL_ALIGN_UNIT (ovar); if (nc) { - var = lookup_decl_in_outer_ctx (ovar, ctx); + var = lookup_decl_in_outer_ctx (ovar, ctx, true); x = build_sender_ref (ovar, ctx); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP @@ -15509,12 +15540,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) build_int_cstu (tkind_type, tkind)); break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: ovar = OMP_CLAUSE_DECL (c); var = lookup_decl_in_outer_ctx (ovar, ctx); x = build_sender_ref (ovar, ctx); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) tkind = GOMP_MAP_USE_DEVICE_PTR; else tkind = GOMP_MAP_FIRSTPRIVATE_INT; @@ -15717,10 +15750,12 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_build_assign (new_var, x)); } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: var = OMP_CLAUSE_DECL (c); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) x = build_sender_ref (var, ctx); else x = build_receiver_ref (var, false, ctx); @@ -16707,6 +16742,7 @@ make_gimple_omp_edges (basic_block bb, struct omp_region **region, case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: break; case GF_OMP_TARGET_KIND_UPDATE: case GF_OMP_TARGET_KIND_ENTER_DATA: diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c index 1f6311c..7579cb6 100644 --- a/gcc/tree-nested.c +++ b/gcc/tree-nested.c @@ -1072,6 +1072,7 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SHARED: case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_LINK: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: do_decl_clause: @@ -1719,6 +1720,7 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SHARED: case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_LINK: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: do_decl_clause: diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 39faba9..2e6561e 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -393,6 +393,7 @@ GOACC_2.0 { GOACC_2.0.1 { global: GOACC_parallel_keyed; + GOACC_host_data; } GOACC_2.0; GOMP_PLUGIN_1.0 { diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 525846b..f261dce 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -490,6 +490,46 @@ GOACC_wait (int async, int num_waits, ...) goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); } +void +GOACC_host_data (int device, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; + struct target_mem_desc *tgt; + +#ifdef HAVE_INTTYPES_H + gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", + __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); +#else + gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", + __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); +#endif + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + /* Host fallback or 'do nothing'. */ + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + || host_fallback) + { + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, + GOMP_MAP_VARS_OPENACC); + tgt->prev = thr->mapped_data; + thr->mapped_data = tgt; + + return; + } + + gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); + tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_OPENACC); + gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); + tgt->prev = thr->mapped_data; + thr->mapped_data = tgt; +} + int GOACC_get_num_threads (void) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c new file mode 100644 index 0000000..8dc7c2d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c @@ -0,0 +1,118 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include <stdlib.h> +#include <openacc.h> +#include <cuda.h> +#include <cuda_runtime_api.h> +#include <cublas_v2.h> + +void +saxpy_host (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +#pragma acc routine +void +saxpy_target (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +int +main(int argc, char **argv) +{ +#define N 8 + int i; + float x_ref[N], y_ref[N]; + float x[N], y[N]; + cublasHandle_t h; + float a = 2.0; + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { + float *xp, *yp; +#pragma acc host_data use_device (x, y) + { +#pragma acc parallel pcopy (xp, yp) + { + xp = x; + yp = y; + } + } + + if (xp != acc_deviceptr (x) || yp != acc_deviceptr (y)) + abort (); + } + + for (i = 0; i < N; i++) + { + x[i] = x_ref[i] = 4.0 + i; + y[i] = y_ref[i] = 3.0; + } + + saxpy_host (N, a, x_ref, y_ref); + + cublasCreate (&h); + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + +#pragma acc data create (x[0:N]) copyout (y[0:N]) + { +#pragma acc kernels + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + cublasDestroy (h); + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { +#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a) + saxpy_target (N, a, x, y); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c new file mode 100644 index 0000000..614f143 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c @@ -0,0 +1,31 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include <stdlib.h> +#include <openacc.h> + +char *global_in_host; + +void foo (char *in) +{ + if (!acc_is_present (global_in_host, sizeof (*global_in_host)) + || in != acc_deviceptr (global_in_host)) + abort (); +} + +int +main (int argc, char **argv) +{ + char mydata[1024]; + + global_in_host = mydata; + +#pragma acc data copyin(mydata) + { +#pragma acc host_data use_device (mydata) + { + foo (mydata); + } + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c new file mode 100644 index 0000000..942a01d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c @@ -0,0 +1,28 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N]; + +#pragma acc data copyin (x[0:N]) + { + int *xp; +#pragma acc host_data use_device (x) + { +#pragma acc parallel present (x) copyout (xp) + { + xp = x; + } + } + + if (xp != acc_deviceptr (x)) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c new file mode 100644 index 0000000..f53fc90 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c @@ -0,0 +1,29 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N], *xp2; + +#pragma acc data copyin (x[0:N]) + { + int *xp; +#pragma acc host_data use_device (x) + { +#pragma acc data present (x) + { + xp = x; + } + xp2 = x; + } + + if (xp != acc_deviceptr (x) || xp2 != xp) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c new file mode 100644 index 0000000..82c84a6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c @@ -0,0 +1,38 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N], y[N], *yp; + + yp = y + 1; + +#pragma acc data copyin (x[0:N]) + { + int *xp, *yp2; +#pragma acc host_data use_device (x) + { +#pragma acc data present (x) copyin (y) + { +#pragma acc host_data use_device (yp) + { + xp = x; + yp2 = yp; + } + + if (yp2 != acc_deviceptr (yp)) + abort (); + } + } + + if (xp != acc_deviceptr (x)) + abort (); + + } + + return 0; +} ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-11-12 11:16 ` Julian Brown @ 2015-11-18 12:48 ` Julian Brown 2015-11-19 13:13 ` Jakub Jelinek 0 siblings, 1 reply; 33+ messages in thread From: Julian Brown @ 2015-11-18 12:48 UTC (permalink / raw) To: Jakub Jelinek; +Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell [-- Attachment #1: Type: text/plain, Size: 627 bytes --] On Thu, 12 Nov 2015 11:16:21 +0000 Julian Brown <julian@codesourcery.com> wrote: > Here's a version of the patch which (hopefully) brings OpenACC on par > with OpenMP with respect to use_device/use_device_ptr variables. The > implementation is essentially the same now for OpenACC as for OpenMP > (i.e. using mapping structures): so for now, only array or pointer > variables can be used as use_device variables. The included tests have > been adjusted accordingly. Here's a rebased version of the patch, since the previous version no longer applies cleanly. Re-tested OK (libgomp tests). ChangeLog as before. (Ping.) Julian [-- Attachment #2: fsf-host-data-4.diff --] [-- Type: text/x-patch, Size: 29890 bytes --] commit 0201a5927c380da65d6400afad4a0e277fb85786 Author: Julian Brown <julian@codesourcery.com> Date: Mon Nov 2 06:31:47 2015 -0800 OpenACC host_data support using mapping regions. diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c index 12c3e75..56cf697 100644 --- a/gcc/c-family/c-pragma.c +++ b/gcc/c-family/c-pragma.c @@ -1251,6 +1251,7 @@ static const struct omp_pragma_def oacc_pragmas[] = { { "declare", PRAGMA_OACC_DECLARE }, { "enter", PRAGMA_OACC_ENTER_DATA }, { "exit", PRAGMA_OACC_EXIT_DATA }, + { "host_data", PRAGMA_OACC_HOST_DATA }, { "kernels", PRAGMA_OACC_KERNELS }, { "loop", PRAGMA_OACC_LOOP }, { "parallel", PRAGMA_OACC_PARALLEL }, diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index 999ac67..dd246b9 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -33,6 +33,7 @@ enum pragma_kind { PRAGMA_OACC_DECLARE, PRAGMA_OACC_ENTER_DATA, PRAGMA_OACC_EXIT_DATA, + PRAGMA_OACC_HOST_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_LOOP, PRAGMA_OACC_PARALLEL, @@ -167,6 +168,7 @@ enum pragma_omp_clause { PRAGMA_OACC_CLAUSE_SELF, PRAGMA_OACC_CLAUSE_SEQ, PRAGMA_OACC_CLAUSE_TILE, + PRAGMA_OACC_CLAUSE_USE_DEVICE, PRAGMA_OACC_CLAUSE_VECTOR, PRAGMA_OACC_CLAUSE_VECTOR_LENGTH, PRAGMA_OACC_CLAUSE_WAIT, diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 7b10764..0a5c8bb 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -10267,6 +10267,8 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -11619,6 +11621,15 @@ c_parser_oacc_clause_tile (c_parser *parser, tree list) return c; } +/* OpenACC 2.0: + use_device ( variable-list ) */ + +static tree +c_parser_oacc_clause_use_device (c_parser *parser, tree list) +{ + return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_USE_DEVICE, list); +} + /* OpenACC: wait ( int-expr-list ) */ @@ -12928,6 +12939,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = c_parser_oacc_clause_use_device (parser, clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_SEQ: clauses = c_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, clauses); @@ -13577,6 +13592,29 @@ c_parser_oacc_enter_exit_data (c_parser *parser, bool enter) /* OpenACC 2.0: + # pragma acc host_data oacc-data-clause[optseq] new-line + structured-block +*/ + +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +static tree +c_parser_oacc_host_data (location_t loc, c_parser *parser) +{ + tree stmt, clauses, block; + + clauses = c_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data"); + + block = c_begin_omp_parallel (); + add_stmt (c_parser_omp_structured_block (parser)); + stmt = c_finish_oacc_host_data (loc, clauses, block); + return stmt; +} + + +/* OpenACC 2.0: # pragma acc loop oacc-loop-clause[optseq] new-line structured-block @@ -16884,6 +16922,9 @@ c_parser_omp_construct (c_parser *parser) case PRAGMA_OACC_DATA: stmt = c_parser_oacc_data (loc, parser); break; + case PRAGMA_OACC_HOST_DATA: + stmt = c_parser_oacc_host_data (loc, parser); + break; case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: strcpy (p_name, "#pragma acc"); diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index 6bc216a..848131e 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -653,6 +653,7 @@ extern tree c_finish_goto_ptr (location_t, tree); extern tree c_expr_to_decl (tree, bool *, bool *); extern tree c_finish_omp_construct (location_t, enum tree_code, tree, tree); extern tree c_finish_oacc_data (location_t, tree, tree); +extern tree c_finish_oacc_host_data (location_t, tree, tree); extern tree c_begin_omp_parallel (void); extern tree c_finish_omp_parallel (location_t, tree, tree); extern tree c_begin_omp_task (void); diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index c18c307..837775b 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -11597,6 +11597,25 @@ c_finish_oacc_data (location_t loc, tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_HOST_DATA. */ + +tree +c_finish_oacc_host_data (location_t loc, tree clauses, tree block) +{ + tree stmt; + + block = c_end_compound_stmt (loc, block, true); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + SET_EXPR_LOCATION (stmt, loc); + + return add_stmt (stmt); +} + /* Like c_begin_compound_stmt, except force the retention of the BLOCK. */ tree @@ -13040,6 +13059,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd) bitmap_set_bit (&map_head, DECL_UID (t)); goto check_dup_generic; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE_USE_DEVICE_PTR: t = OMP_CLAUSE_DECL (c); diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 160bf1e..2300220 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6349,6 +6349,7 @@ extern void finish_omp_threadprivate (tree); extern tree begin_omp_structured_block (void); extern tree finish_omp_structured_block (tree); extern tree finish_oacc_data (tree, tree); +extern tree finish_oacc_host_data (tree, tree); extern tree finish_omp_construct (enum tree_code, tree, tree); extern tree begin_omp_parallel (void); extern tree finish_omp_parallel (tree, tree); diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 0e1116b..462aef7 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -29230,6 +29230,8 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -31596,6 +31598,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, + clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_SEQ: clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, clauses, here); @@ -34507,6 +34514,30 @@ cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok) return stmt; } +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +/* OpenACC 2.0: + # pragma acc host_data <clauses> new-line + structured-block */ + +static tree +cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + unsigned int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data", pragma_tok); + + block = begin_omp_parallel (); + save = cp_parser_begin_omp_structured_block (parser); + cp_parser_statement (parser, NULL_TREE, false, NULL); + cp_parser_end_omp_structured_block (parser, save); + stmt = finish_oacc_host_data (clauses, block); + return stmt; +} + /* OpenACC 2.0: # pragma acc declare oacc-data-clause[optseq] new-line */ @@ -35926,6 +35957,9 @@ cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok) case PRAGMA_OACC_EXIT_DATA: stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, false); break; + case PRAGMA_OACC_HOST_DATA: + stmt = cp_parser_oacc_host_data (parser, pragma_tok); + break; case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: strcpy (p_name, "#pragma acc"); @@ -36504,6 +36538,7 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context) case PRAGMA_OACC_DATA: case PRAGMA_OACC_ENTER_DATA: case PRAGMA_OACC_EXIT_DATA: + case PRAGMA_OACC_HOST_DATA: case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: case PRAGMA_OACC_LOOP: diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index e7e5d8e..3bb6184 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -6835,6 +6835,7 @@ finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd) } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE_USE_DEVICE_PTR: field_ok = allow_fields; @@ -7390,6 +7391,24 @@ finish_oacc_data (tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. */ + +tree +finish_oacc_host_data (tree clauses, tree block) +{ + tree stmt; + + block = finish_omp_structured_block (block); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + + return add_stmt (stmt); +} + /* Generate OMP construct CODE, with BODY and CLAUSES as its compound statement. */ diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index 7764201..f1abf5c 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -1356,6 +1356,9 @@ dump_gimple_omp_target (pretty_printer *buffer, gomp_target *gs, case GF_OMP_TARGET_KIND_OACC_DECLARE: kind = " oacc_declare"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + kind = " oacc_host_data"; + break; default: gcc_unreachable (); } diff --git a/gcc/gimple.h b/gcc/gimple.h index 6eb22de..3e9fb2e 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -171,6 +171,7 @@ enum gf_mask { GF_OMP_TARGET_KIND_OACC_UPDATE = 8, GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 9, GF_OMP_TARGET_KIND_OACC_DECLARE = 10, + GF_OMP_TARGET_KIND_OACC_HOST_DATA = 11, /* True on an GIMPLE_OMP_RETURN statement if the return does not require a thread synchronization via some sort of barrier. The exact barrier @@ -6003,6 +6004,7 @@ is_gimple_omp_oacc (const gimple *stmt) case GF_OMP_TARGET_KIND_OACC_UPDATE: case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: case GF_OMP_TARGET_KIND_OACC_DECLARE: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: return true; default: return false; diff --git a/gcc/gimplify.c b/gcc/gimplify.c index a3ed378..cedc485 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -6414,6 +6414,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_TARGET_DATA: case OMP_TARGET_ENTER_DATA: case OMP_TARGET_EXIT_DATA: + case OACC_HOST_DATA: ctx->target_firstprivatize_array_bases = true; default: break; @@ -6679,6 +6680,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_TARGET_DATA: case OMP_TARGET_ENTER_DATA: case OMP_TARGET_EXIT_DATA: + case OACC_HOST_DATA: if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER || (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) @@ -7088,6 +7090,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } goto do_notice; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: flags = GOVD_FIRSTPRIVATE | GOVD_EXPLICIT; goto do_add; @@ -7323,7 +7326,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: remove = true; break; @@ -9196,6 +9198,9 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) case OMP_TEAMS: ort = OMP_TEAMS_COMBINED (expr) ? ORT_COMBINED_TEAMS : ORT_TEAMS; break; + case OACC_HOST_DATA: + ort = ORT_TARGET_DATA; + break; default: gcc_unreachable (); } @@ -9217,6 +9222,7 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) switch (TREE_CODE (expr)) { case OACC_DATA: + case OACC_HOST_DATA: end_ix = BUILT_IN_GOACC_DATA_END; break; case OMP_TARGET_DATA: @@ -9248,6 +9254,10 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_KERNELS, OMP_CLAUSES (expr)); break; + case OACC_HOST_DATA: + stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_HOST_DATA, + OMP_CLAUSES (expr)); + break; case OACC_PARALLEL: stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_PARALLEL, OMP_CLAUSES (expr)); @@ -10357,16 +10367,12 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, ret = GS_ALL_DONE; break; - case OACC_HOST_DATA: - sorry ("directive not yet implemented"); - ret = GS_ALL_DONE; - break; - case OACC_DECLARE: gimplify_oacc_declare (expr_p, pre_p); ret = GS_ALL_DONE; break; + case OACC_HOST_DATA: case OACC_DATA: case OACC_KERNELS: case OACC_PARALLEL: diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index d540dab..35f5014 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -47,6 +47,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_HOST_DATA, "GOACC_host_data", + BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 830db75..756ea5a 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -390,8 +390,8 @@ scan_omp_op (tree *tp, omp_context *ctx) } static void lower_omp (gimple_seq *, omp_context *); -static tree lookup_decl_in_outer_ctx (tree, omp_context *); -static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *); +static tree lookup_decl_in_outer_ctx (tree, omp_context *, bool = false); +static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *, bool = false); /* Find an OMP clause of type KIND within CLAUSES. */ @@ -1935,6 +1935,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) install_var_local (decl, ctx); break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: decl = OMP_CLAUSE_DECL (c); if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) @@ -2137,7 +2138,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE__CACHE_: sorry ("Clause not supported yet"); break; @@ -2288,6 +2288,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_NOGROUP: case OMP_CLAUSE_DEFAULTMAP: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE__CILK_FOR_COUNT_: case OMP_CLAUSE_ASYNC: @@ -2305,7 +2306,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE__CACHE_: sorry ("Clause not supported yet"); break; @@ -3608,6 +3608,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break; case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: stmt_name = "enter/exit data"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data"; + break; default: gcc_unreachable (); } switch (gimple_omp_target_kind (ctx->stmt)) @@ -3619,6 +3621,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) case GF_OMP_TARGET_KIND_OACC_KERNELS: ctx_stmt_name = "kernels"; break; case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + ctx_stmt_name = "host_data"; break; default: gcc_unreachable (); } @@ -3941,13 +3945,22 @@ maybe_lookup_ctx (gimple *stmt) parallelism happens only rarely. */ static tree -lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) +lookup_decl_in_outer_ctx (tree decl, omp_context *ctx, + bool skip_hostdata) { tree t; omp_context *up; for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) - t = maybe_lookup_decl (decl, up); + { + if (skip_hostdata + && gimple_code (up->stmt) == GIMPLE_OMP_TARGET + && gimple_omp_target_kind (up->stmt) + == GF_OMP_TARGET_KIND_OACC_HOST_DATA) + continue; + + t = maybe_lookup_decl (decl, up); + } gcc_assert (!ctx->is_nested || t || is_global_var (decl)); @@ -3959,13 +3972,22 @@ lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) in outer contexts. */ static tree -maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) +maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx, + bool skip_hostdata) { tree t = NULL; omp_context *up; for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) - t = maybe_lookup_decl (decl, up); + { + if (skip_hostdata + && gimple_code (up->stmt) == GIMPLE_OMP_TARGET + && gimple_omp_target_kind (up->stmt) + == GF_OMP_TARGET_KIND_OACC_HOST_DATA) + continue; + + t = maybe_lookup_decl (decl, up); + } return t ? t : decl; } @@ -12499,6 +12521,7 @@ expand_omp_target (struct omp_region *region) break; case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: data_region = true; break; default: @@ -12742,6 +12765,9 @@ expand_omp_target (struct omp_region *region) case GF_OMP_TARGET_KIND_OACC_DECLARE: start_ix = BUILT_IN_GOACC_DECLARE; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + start_ix = BUILT_IN_GOACC_HOST_DATA; + break; default: gcc_unreachable (); } @@ -12866,6 +12892,7 @@ expand_omp_target (struct omp_region *region) case BUILT_IN_GOACC_DATA_START: case BUILT_IN_GOACC_DECLARE: case BUILT_IN_GOMP_TARGET_DATA: + case BUILT_IN_GOACC_HOST_DATA: break; case BUILT_IN_GOMP_TARGET: case BUILT_IN_GOMP_TARGET_UPDATE: @@ -13173,6 +13200,7 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent, case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: break; case GF_OMP_TARGET_KIND_UPDATE: case GF_OMP_TARGET_KIND_ENTER_DATA: @@ -14972,6 +15000,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) break; case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: data_region = true; break; default: @@ -15079,7 +15108,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE) { - if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)) + if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx, + true)) && varpool_node::get_create (var)->offloadable) continue; @@ -15178,6 +15208,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: var = OMP_CLAUSE_DECL (c); @@ -15316,7 +15347,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) talign = DECL_ALIGN_UNIT (ovar); if (nc) { - var = lookup_decl_in_outer_ctx (ovar, ctx); + var = lookup_decl_in_outer_ctx (ovar, ctx, true); x = build_sender_ref (ovar, ctx); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP @@ -15563,12 +15594,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) build_int_cstu (tkind_type, tkind)); break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: ovar = OMP_CLAUSE_DECL (c); var = lookup_decl_in_outer_ctx (ovar, ctx); x = build_sender_ref (ovar, ctx); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) tkind = GOMP_MAP_USE_DEVICE_PTR; else tkind = GOMP_MAP_FIRSTPRIVATE_INT; @@ -15771,10 +15804,12 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_build_assign (new_var, x)); } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: var = OMP_CLAUSE_DECL (c); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) x = build_sender_ref (var, ctx); else x = build_receiver_ref (var, false, ctx); @@ -16761,6 +16796,7 @@ make_gimple_omp_edges (basic_block bb, struct omp_region **region, case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: break; case GF_OMP_TARGET_KIND_UPDATE: case GF_OMP_TARGET_KIND_ENTER_DATA: diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c index 1f6311c..7579cb6 100644 --- a/gcc/tree-nested.c +++ b/gcc/tree-nested.c @@ -1072,6 +1072,7 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SHARED: case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_LINK: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: do_decl_clause: @@ -1719,6 +1720,7 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SHARED: case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_LINK: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: do_decl_clause: diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 4d42c42..ea9344d 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -394,6 +394,7 @@ GOACC_2.0.1 { global: GOACC_declare; GOACC_parallel_keyed; + GOACC_host_data; } GOACC_2.0; GOMP_PLUGIN_1.0 { diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index a80ede4..db7cab3 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -490,6 +490,46 @@ GOACC_wait (int async, int num_waits, ...) goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); } +void +GOACC_host_data (int device, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; + struct target_mem_desc *tgt; + +#ifdef HAVE_INTTYPES_H + gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", + __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); +#else + gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", + __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); +#endif + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + /* Host fallback or 'do nothing'. */ + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + || host_fallback) + { + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, + GOMP_MAP_VARS_OPENACC); + tgt->prev = thr->mapped_data; + thr->mapped_data = tgt; + + return; + } + + gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); + tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_OPENACC); + gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); + tgt->prev = thr->mapped_data; + thr->mapped_data = tgt; +} + int GOACC_get_num_threads (void) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c new file mode 100644 index 0000000..8dc7c2d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c @@ -0,0 +1,118 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include <stdlib.h> +#include <openacc.h> +#include <cuda.h> +#include <cuda_runtime_api.h> +#include <cublas_v2.h> + +void +saxpy_host (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +#pragma acc routine +void +saxpy_target (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +int +main(int argc, char **argv) +{ +#define N 8 + int i; + float x_ref[N], y_ref[N]; + float x[N], y[N]; + cublasHandle_t h; + float a = 2.0; + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { + float *xp, *yp; +#pragma acc host_data use_device (x, y) + { +#pragma acc parallel pcopy (xp, yp) + { + xp = x; + yp = y; + } + } + + if (xp != acc_deviceptr (x) || yp != acc_deviceptr (y)) + abort (); + } + + for (i = 0; i < N; i++) + { + x[i] = x_ref[i] = 4.0 + i; + y[i] = y_ref[i] = 3.0; + } + + saxpy_host (N, a, x_ref, y_ref); + + cublasCreate (&h); + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + +#pragma acc data create (x[0:N]) copyout (y[0:N]) + { +#pragma acc kernels + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + cublasDestroy (h); + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { +#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a) + saxpy_target (N, a, x, y); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c new file mode 100644 index 0000000..614f143 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c @@ -0,0 +1,31 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include <stdlib.h> +#include <openacc.h> + +char *global_in_host; + +void foo (char *in) +{ + if (!acc_is_present (global_in_host, sizeof (*global_in_host)) + || in != acc_deviceptr (global_in_host)) + abort (); +} + +int +main (int argc, char **argv) +{ + char mydata[1024]; + + global_in_host = mydata; + +#pragma acc data copyin(mydata) + { +#pragma acc host_data use_device (mydata) + { + foo (mydata); + } + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c new file mode 100644 index 0000000..942a01d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c @@ -0,0 +1,28 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N]; + +#pragma acc data copyin (x[0:N]) + { + int *xp; +#pragma acc host_data use_device (x) + { +#pragma acc parallel present (x) copyout (xp) + { + xp = x; + } + } + + if (xp != acc_deviceptr (x)) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c new file mode 100644 index 0000000..f53fc90 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c @@ -0,0 +1,29 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N], *xp2; + +#pragma acc data copyin (x[0:N]) + { + int *xp; +#pragma acc host_data use_device (x) + { +#pragma acc data present (x) + { + xp = x; + } + xp2 = x; + } + + if (xp != acc_deviceptr (x) || xp2 != xp) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c new file mode 100644 index 0000000..82c84a6 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c @@ -0,0 +1,38 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N], y[N], *yp; + + yp = y + 1; + +#pragma acc data copyin (x[0:N]) + { + int *xp, *yp2; +#pragma acc host_data use_device (x) + { +#pragma acc data present (x) copyin (y) + { +#pragma acc host_data use_device (yp) + { + xp = x; + yp2 = yp; + } + + if (yp2 != acc_deviceptr (yp)) + abort (); + } + } + + if (xp != acc_deviceptr (x)) + abort (); + + } + + return 0; +} ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-11-18 12:48 ` Julian Brown @ 2015-11-19 13:13 ` Jakub Jelinek 2015-11-19 14:29 ` Julian Brown 0 siblings, 1 reply; 33+ messages in thread From: Jakub Jelinek @ 2015-11-19 13:13 UTC (permalink / raw) To: Julian Brown; +Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell On Wed, Nov 18, 2015 at 12:47:47PM +0000, Julian Brown wrote: The FE/gimplifier part is okay, but I really don't like the omp-low.c changes, mostly the *lookup_decl_in_outer_ctx* changes. If I count well, we have right now 27 maybe_lookup_decl_in_outer_ctx callers and 7 lookup_decl_in_outer_ctx callers, you want to change behavior of 1 maybe_lookup_decl_in_outer_ctx and 1 lookup_decl_in_outer_ctx. Why exactly those 2 and not the others? What are the exact rules (what does the standard say about it)? I'd expect that all phases (scan_sharing_clauses, lower_omp* and expand_omp*) should agree on the same behavior, otherwise I can't see how it can work properly. And, if you want to change just a couple of spots, I'd strongly prefer to add new functions with this weirdo behavior, rather than tweaking the original function. > --- a/gcc/omp-low.c > +++ b/gcc/omp-low.c > @@ -390,8 +390,8 @@ scan_omp_op (tree *tp, omp_context *ctx) > } > > static void lower_omp (gimple_seq *, omp_context *); > -static tree lookup_decl_in_outer_ctx (tree, omp_context *); > -static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *); > +static tree lookup_decl_in_outer_ctx (tree, omp_context *, bool = false); > +static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *, bool = false); > > /* Find an OMP clause of type KIND within CLAUSES. */ > > @@ -1935,6 +1935,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > install_var_local (decl, ctx); > break; > > + case OMP_CLAUSE_USE_DEVICE: > case OMP_CLAUSE_USE_DEVICE_PTR: > decl = OMP_CLAUSE_DECL (c); > if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) > @@ -2137,7 +2138,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > break; > > case OMP_CLAUSE_DEVICE_RESIDENT: > - case OMP_CLAUSE_USE_DEVICE: > case OMP_CLAUSE__CACHE_: > sorry ("Clause not supported yet"); > break; > @@ -2288,6 +2288,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > case OMP_CLAUSE_SIMD: > case OMP_CLAUSE_NOGROUP: > case OMP_CLAUSE_DEFAULTMAP: > + case OMP_CLAUSE_USE_DEVICE: > case OMP_CLAUSE_USE_DEVICE_PTR: > case OMP_CLAUSE__CILK_FOR_COUNT_: > case OMP_CLAUSE_ASYNC: > @@ -2305,7 +2306,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) > break; > > case OMP_CLAUSE_DEVICE_RESIDENT: > - case OMP_CLAUSE_USE_DEVICE: > case OMP_CLAUSE__CACHE_: > sorry ("Clause not supported yet"); > break; > @@ -3608,6 +3608,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) > case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break; > case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: > stmt_name = "enter/exit data"; break; > + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data"; > + break; > default: gcc_unreachable (); > } > switch (gimple_omp_target_kind (ctx->stmt)) > @@ -3619,6 +3621,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) > case GF_OMP_TARGET_KIND_OACC_KERNELS: > ctx_stmt_name = "kernels"; break; > case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break; > + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: > + ctx_stmt_name = "host_data"; break; > default: gcc_unreachable (); > } > > @@ -3941,13 +3945,22 @@ maybe_lookup_ctx (gimple *stmt) > parallelism happens only rarely. */ > > static tree > -lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) > +lookup_decl_in_outer_ctx (tree decl, omp_context *ctx, > + bool skip_hostdata) > { > tree t; > omp_context *up; > > for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) > - t = maybe_lookup_decl (decl, up); > + { > + if (skip_hostdata > + && gimple_code (up->stmt) == GIMPLE_OMP_TARGET > + && gimple_omp_target_kind (up->stmt) > + == GF_OMP_TARGET_KIND_OACC_HOST_DATA) > + continue; > + > + t = maybe_lookup_decl (decl, up); > + } > > gcc_assert (!ctx->is_nested || t || is_global_var (decl)); > > @@ -3959,13 +3972,22 @@ lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) > in outer contexts. */ > > static tree > -maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) > +maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx, > + bool skip_hostdata) > { > tree t = NULL; > omp_context *up; > > for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) > - t = maybe_lookup_decl (decl, up); > + { > + if (skip_hostdata > + && gimple_code (up->stmt) == GIMPLE_OMP_TARGET > + && gimple_omp_target_kind (up->stmt) > + == GF_OMP_TARGET_KIND_OACC_HOST_DATA) > + continue; > + > + t = maybe_lookup_decl (decl, up); > + } > > return t ? t : decl; > } > @@ -12499,6 +12521,7 @@ expand_omp_target (struct omp_region *region) > break; > case GF_OMP_TARGET_KIND_DATA: > case GF_OMP_TARGET_KIND_OACC_DATA: > + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: > data_region = true; > break; > default: > @@ -12742,6 +12765,9 @@ expand_omp_target (struct omp_region *region) > case GF_OMP_TARGET_KIND_OACC_DECLARE: > start_ix = BUILT_IN_GOACC_DECLARE; > break; > + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: > + start_ix = BUILT_IN_GOACC_HOST_DATA; > + break; > default: > gcc_unreachable (); > } > @@ -12866,6 +12892,7 @@ expand_omp_target (struct omp_region *region) > case BUILT_IN_GOACC_DATA_START: > case BUILT_IN_GOACC_DECLARE: > case BUILT_IN_GOMP_TARGET_DATA: > + case BUILT_IN_GOACC_HOST_DATA: > break; > case BUILT_IN_GOMP_TARGET: > case BUILT_IN_GOMP_TARGET_UPDATE: > @@ -13173,6 +13200,7 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent, > case GF_OMP_TARGET_KIND_OACC_PARALLEL: > case GF_OMP_TARGET_KIND_OACC_KERNELS: > case GF_OMP_TARGET_KIND_OACC_DATA: > + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: > break; > case GF_OMP_TARGET_KIND_UPDATE: > case GF_OMP_TARGET_KIND_ENTER_DATA: > @@ -14972,6 +15000,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) > break; > case GF_OMP_TARGET_KIND_DATA: > case GF_OMP_TARGET_KIND_OACC_DATA: > + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: > data_region = true; > break; > default: > @@ -15079,7 +15108,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) > { > if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE) > { > - if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)) > + if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx, > + true)) > && varpool_node::get_create (var)->offloadable) > continue; > > @@ -15178,6 +15208,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) > } > break; > > + case OMP_CLAUSE_USE_DEVICE: > case OMP_CLAUSE_USE_DEVICE_PTR: > case OMP_CLAUSE_IS_DEVICE_PTR: > var = OMP_CLAUSE_DECL (c); > @@ -15316,7 +15347,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) > talign = DECL_ALIGN_UNIT (ovar); > if (nc) > { > - var = lookup_decl_in_outer_ctx (ovar, ctx); > + var = lookup_decl_in_outer_ctx (ovar, ctx, true); > x = build_sender_ref (ovar, ctx); > > if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP > @@ -15563,12 +15594,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) > build_int_cstu (tkind_type, tkind)); > break; > > + case OMP_CLAUSE_USE_DEVICE: > case OMP_CLAUSE_USE_DEVICE_PTR: > case OMP_CLAUSE_IS_DEVICE_PTR: > ovar = OMP_CLAUSE_DECL (c); > var = lookup_decl_in_outer_ctx (ovar, ctx); > x = build_sender_ref (ovar, ctx); > - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) > + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR > + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) > tkind = GOMP_MAP_USE_DEVICE_PTR; > else > tkind = GOMP_MAP_FIRSTPRIVATE_INT; > @@ -15771,10 +15804,12 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) > gimple_build_assign (new_var, x)); > } > break; > + case OMP_CLAUSE_USE_DEVICE: > case OMP_CLAUSE_USE_DEVICE_PTR: > case OMP_CLAUSE_IS_DEVICE_PTR: > var = OMP_CLAUSE_DECL (c); > - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) > + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR > + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) > x = build_sender_ref (var, ctx); > else > x = build_receiver_ref (var, false, ctx); > @@ -16761,6 +16796,7 @@ make_gimple_omp_edges (basic_block bb, struct omp_region **region, > case GF_OMP_TARGET_KIND_OACC_PARALLEL: > case GF_OMP_TARGET_KIND_OACC_KERNELS: > case GF_OMP_TARGET_KIND_OACC_DATA: > + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: > break; > case GF_OMP_TARGET_KIND_UPDATE: > case GF_OMP_TARGET_KIND_ENTER_DATA: Jakub ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-11-19 13:13 ` Jakub Jelinek @ 2015-11-19 14:29 ` Julian Brown 2015-11-19 15:57 ` Jakub Jelinek 0 siblings, 1 reply; 33+ messages in thread From: Julian Brown @ 2015-11-19 14:29 UTC (permalink / raw) To: Jakub Jelinek; +Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell On Thu, 19 Nov 2015 14:13:45 +0100 Jakub Jelinek <jakub@redhat.com> wrote: > On Wed, Nov 18, 2015 at 12:47:47PM +0000, Julian Brown wrote: > > The FE/gimplifier part is okay, but I really don't like the > omp-low.c changes, mostly the *lookup_decl_in_outer_ctx* changes. > If I count well, we have right now 27 maybe_lookup_decl_in_outer_ctx > callers and 7 lookup_decl_in_outer_ctx callers, you want to change > behavior of 1 maybe_lookup_decl_in_outer_ctx and 1 > lookup_decl_in_outer_ctx. Why exactly those 2 and not the others? The not-very-good reason is that those are the merely the places that allowed the supplied examples to work, and I'm wary of changing other code that I don't understand very well. > What are the exact rules (what does the standard say about it)? > I'd expect that all phases (scan_sharing_clauses, lower_omp* and > expand_omp*) should agree on the same behavior, otherwise I can't see > how it can work properly. OK, thanks -- as to what the standard says, it's so ill-specified in this area that nothing can be learned about the behaviour of offloaded regions within host_data constructs, and my question about that on the technical mailing list is still unanswered (actually Nathan suggested in private mail that the conservative thing to do would be to disallow offloaded regions entirely within host_data constructs, so maybe that's the way to go). OpenMP 4.5 seems to *not* specify the skipping-over behaviour for use_device_ptr variables (p105, lines 20-23): "The is_device_ptr clause is used to indicate that a list item is a device pointer already in the device data environment and that it should be used directly. Support for device pointers created outside of OpenMP, specifically outside of the omp_target_alloc routine and the use_device_ptr clause, is implementation defined." That suggests that use_device_ptr is a valid way to create device pointers for use in enclosed target regions: the behaviour I assumed was wrong for OpenACC. So I think my guess at the "most-obvious" behaviour was probably misguided anyway. It's maybe even more complicated. Consider the example: char x[1024]; #pragma acc enter data copyin(x) #pragma acc host_data use_device(x) { target_primitive(x); #pragma acc parallel present(x) [1] { x[5] = 0; [2] } } Here, the "present" clause marked [1] will fail (because 'x' is a target pointer now). If it's omitted, the array access [2] will cause an implicit present_or_copy to be used for the 'x' pointer (which again will fail, because now 'x' points to target data). Maybe what we actually need is, #pragma acc host_data use_device(x) { target_primitive(x); #pragma acc parallel deviceptr(x) { ... } } with the deviceptr(x) clause magically substituted in the parallel construct, but I'm struggling to see how we could justify doing that when that behaviour's not mentioned in the spec at all. Aha, so: maybe manually using deviceptr(x) is implicitly mandatory in this situation, and missing it out should be an error? That suddenly seems to make most sense. I'll see about fixing the patch to do that. Julian ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-11-19 14:29 ` Julian Brown @ 2015-11-19 15:57 ` Jakub Jelinek 2015-11-30 19:34 ` Julian Brown 0 siblings, 1 reply; 33+ messages in thread From: Jakub Jelinek @ 2015-11-19 15:57 UTC (permalink / raw) To: Julian Brown; +Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell On Thu, Nov 19, 2015 at 02:26:50PM +0000, Julian Brown wrote: > OK, thanks -- as to what the standard says, it's so ill-specified in > this area that nothing can be learned about the behaviour of offloaded > regions within host_data constructs, and my question about that on the > technical mailing list is still unanswered (actually Nathan suggested > in private mail that the conservative thing to do would be to disallow > offloaded regions entirely within host_data constructs, so maybe that's > the way to go). > > OpenMP 4.5 seems to *not* specify the skipping-over behaviour for > use_device_ptr variables (p105, lines 20-23): > > "The is_device_ptr clause is used to indicate that a list item is a > device pointer already in the device data environment and that it > should be used directly. Support for device pointers created outside > of OpenMP, specifically outside of the omp_target_alloc routine and the > use_device_ptr clause, is implementation defined." > > That suggests that use_device_ptr is a valid way to create device > pointers for use in enclosed target regions: the behaviour I assumed > was wrong for OpenACC. So I think my guess at the "most-obvious" > behaviour was probably misguided anyway. use_device_ptr kind of privatizes the variable, the private variable being the device pointer corresponding to the host pointer outside of the target data with use_device_ptr clause. And, if you want to use that device pointer in a target region, it should be on the is_device_ptr clause on the target construct. See e.g. libgomp.c/target-18.c testcase. int a[4]; ... #pragma omp target data map(to:a) #pragma omp target data use_device_ptr(a) map(from:err) #pragma omp target is_device_ptr(a) private(i) map(from:err) { err = 0; for (i = 0; i < 4; i++) if (a[i] != 23 + i) err = 1; } The implementation has this way a choice how to implement device pointers (what use_device_ptr gives you, or say omp_target_alloc returns) - either (GCC's choice at least for the XeonPhi and hopefully PTX, HSA does not care, as it shares address space) implement them as host pointer encoding the bits the target device wants to use, or some kind of descriptor. In the former case, is_device_ptr is essentially a firstprivate, you bitwise copy the device pointer from the host to target device, where you can dereference it etc. In the descriptor case you'd do some transformation of the host side representation of the device pointer to the device side. > > It's maybe even more complicated. Consider the example: > > char x[1024]; > > #pragma acc enter data copyin(x) > > #pragma acc host_data use_device(x) > { > target_primitive(x); > #pragma acc parallel present(x) [1] > { > x[5] = 0; [2] > } > } If it is unclear, I think disallowing acc {parallel,kernels} inside of acc host_data might be too big hammer, but perhaps just erroring out or warning during gimplification that if you (explicitly or implicitly) try to map a var that is in use_device clause in some outer context, it is either wrong, unsupported or will not do what users think? I will double check on omp-lang, but supposedly we could for OpenMP warn in similar cases (use_device_ptr clause instead of use_device), except when it is passed to is_device_ptr clause, because I think the behavior is just unspecified otherwise. > > Here, the "present" clause marked [1] will fail (because 'x' is a > target pointer now). If it's omitted, the array access [2] will cause an > implicit present_or_copy to be used for the 'x' pointer (which again > will fail, because now 'x' points to target data). Maybe what we > actually need is, > > #pragma acc host_data use_device(x) > { > target_primitive(x); > #pragma acc parallel deviceptr(x) > { > ... > } > } > > with the deviceptr(x) clause magically substituted in the parallel > construct, but I'm struggling to see how we could justify doing that > when that behaviour's not mentioned in the spec at all. Is deviceptr as above meant to work? That is the OpenACC counterpart of is_device_ptr, right? If yes, then I'd suggest just warning if you try to implicitly or explicitly map something use_device in outer contexts, and just make sure you don't ICE on the cases where you warn. If the standard does not say what it means, then it is unspecified behavior... Jakub ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-11-19 15:57 ` Jakub Jelinek @ 2015-11-30 19:34 ` Julian Brown 2015-12-01 8:30 ` Jakub Jelinek ` (2 more replies) 0 siblings, 3 replies; 33+ messages in thread From: Julian Brown @ 2015-11-30 19:34 UTC (permalink / raw) To: Jakub Jelinek; +Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell [-- Attachment #1: Type: text/plain, Size: 5145 bytes --] On Thu, 19 Nov 2015 16:57:23 +0100 Jakub Jelinek <jakub@redhat.com> wrote: > If it is unclear, I think disallowing acc {parallel,kernels} inside of > acc host_data might be too big hammer, but perhaps just erroring out > or warning during gimplification that if you (explicitly or > implicitly) try to map a var that is in use_device clause in some > outer context, it is either wrong, unsupported or will not do what > users think? I think we can only assume that trying to map a variable declared in a surrounding use_device clause is undefined behaviour. I haven't had any response to my questions about host_data & deviceptr on the OpenACC list. > > #pragma acc host_data use_device(x) > > { > > target_primitive(x); > > #pragma acc parallel deviceptr(x) > > { > > ... > > } > > } > > Is deviceptr as above meant to work? That is the OpenACC counterpart > of is_device_ptr, right? If yes, then I'd suggest just warning if you > try to implicitly or explicitly map something use_device in outer > contexts, and just make sure you don't ICE on the cases where you > warn. If the standard does not say what it means, then it is > unspecified behavior... A problem with deviceptr, unlike is_device_ptr, is that it turns out to be defined only to work with pointers, not arrays (OpenACC 2.0a 2.6.5.2), and there are no rules describing the latter decaying to the former. So at least if 'x' is an array, it appears the answer is "no". So, the attached patch disallows (via raising an error): * Variables being declared in explicit mapping clauses that are declared in enclosing host_data regions. * Variables being implicitly used (mapped) in offloaded regions that are declared in enclosing host_data regions. It's otherwise equivalent to the previously-posted version, but without the hacks to {maybe_,}lookup_decl_in_outer_ctx. I added checks for the above conditions during gimplification, which seemed to be about the same phase that other similar kinds of errors are diagnosed. Tests look OK (libgomp/gcc/g++/libstdc++), and the new ones pass. OK for mainline? Thanks, Julian ChangeLog Julian Brown <julian@codesourcery.com> Cesar Philippidis <cesar@codesourcery.com> James Norris <James_Norris@mentor.com> gcc/ * c-family/c-pragma.c (oacc_pragmas): Add PRAGMA_OACC_HOST_DATA. * c-family/c-pragma.h (pragma_kind): Add PRAGMA_OACC_HOST_DATA. (pragma_omp_clause): Add PRAGMA_OACC_CLAUSE_USE_DEVICE. * c/c-parser.c (c_parser_omp_clause_name): Add use_device support. (c_parser_oacc_clause_use_device): New function. (c_parser_oacc_all_clauses): Add use_device support. (OACC_HOST_DATA_CLAUSE_MASK): New macro. (c_parser_oacc_host_data): New function. (c_parser_omp_construct): Add host_data support. * c/c-tree.h (c_finish_oacc_host_data): Add prototype. * c/c-typeck.c (c_finish_oacc_host_data): New function. (c_finish_omp_clauses): Add use_device support. * cp/cp-tree.h (finish_oacc_host_data): Add prototype. * cp/parser.c (cp_parser_omp_clause_name): Add use_device support. (cp_parser_oacc_all_clauses): Add use_device support. (OACC_HOST_DATA_CLAUSE_MASK): New macro. (cp_parser_oacc_host_data): New function. (cp_parser_omp_construct): Add host_data support. (cp_parser_pragma): Add host_data support. * cp/semantics.c (finish_omp_clauses): Add use_device support. (finish_oacc_host_data): New function. * gimple-pretty-print.c (dump_gimple_omp_target): Add host_data support. * gimple.h (gf_mask): Add GF_OMP_TARGET_KIND_OACC_HOST_DATA. (is_gimple_omp_oacc): Add support for above. * gimplify.c (omp_region_type): Add ORT_ACC_HOST_DATA. (omp_notice_variable): Diagnose undefined implicit uses of use_device variables in offloaded regions. (gimplify_scan_omp_clauses): Add host_data, use_device support. Diagnose undefined mapping of use_device variables in OpenACC clauses. (gimplify_omp_workshare): Add host_data support. (gimplify_expr): Likewise. * omp-builtins.def (BUILT_IN_GOACC_HOST_DATA): New. * omp-low.c (lookup_decl_in_outer_ctx) (maybe_lookup_decl_in_outer_ctx): Add optional argument to skip host_data regions. (scan_sharing_clauses): Support use_device. (check_omp_nesting_restrictions): Support host_data. (expand_omp_target): Support host_data. (lower_omp_target): Skip over outer host_data regions when looking up decls. Support use_device. (make_gimple_omp_edges): Support host_data. * tree-nested.c (convert_nonlocal_omp_clauses): Add use_device clause. libgomp/ * oacc-parallel.c (GOACC_host_data): New function. * libgomp.map (GOACC_host_data): Add to GOACC_2.0.1. * testsuite/libgomp.oacc-c-c++-common/host_data-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/host_data-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/host_data-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/host_data-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/host_data-5.c: New test. * testsuite/libgomp.oacc-c-c++-common/host_data-6.c: New test. [-- Attachment #2: fsf-host-data-5.diff --] [-- Type: text/x-patch, Size: 29566 bytes --] commit ac77cadbe27ca43d036f943c48fb4bf59bc84b36 Author: Julian Brown <julian@codesourcery.com> Date: Mon Nov 2 06:31:47 2015 -0800 OpenACC host_data support using mapping regions. diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c index 12c3e75..56cf697 100644 --- a/gcc/c-family/c-pragma.c +++ b/gcc/c-family/c-pragma.c @@ -1251,6 +1251,7 @@ static const struct omp_pragma_def oacc_pragmas[] = { { "declare", PRAGMA_OACC_DECLARE }, { "enter", PRAGMA_OACC_ENTER_DATA }, { "exit", PRAGMA_OACC_EXIT_DATA }, + { "host_data", PRAGMA_OACC_HOST_DATA }, { "kernels", PRAGMA_OACC_KERNELS }, { "loop", PRAGMA_OACC_LOOP }, { "parallel", PRAGMA_OACC_PARALLEL }, diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index 999ac67..dd246b9 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -33,6 +33,7 @@ enum pragma_kind { PRAGMA_OACC_DECLARE, PRAGMA_OACC_ENTER_DATA, PRAGMA_OACC_EXIT_DATA, + PRAGMA_OACC_HOST_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_LOOP, PRAGMA_OACC_PARALLEL, @@ -167,6 +168,7 @@ enum pragma_omp_clause { PRAGMA_OACC_CLAUSE_SELF, PRAGMA_OACC_CLAUSE_SEQ, PRAGMA_OACC_CLAUSE_TILE, + PRAGMA_OACC_CLAUSE_USE_DEVICE, PRAGMA_OACC_CLAUSE_VECTOR, PRAGMA_OACC_CLAUSE_VECTOR_LENGTH, PRAGMA_OACC_CLAUSE_WAIT, diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 0259f66..d4c512f 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -10279,6 +10279,8 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -11631,6 +11633,15 @@ c_parser_oacc_clause_tile (c_parser *parser, tree list) return c; } +/* OpenACC 2.0: + use_device ( variable-list ) */ + +static tree +c_parser_oacc_clause_use_device (c_parser *parser, tree list) +{ + return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_USE_DEVICE, list); +} + /* OpenACC: wait ( int-expr-list ) */ @@ -12940,6 +12951,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = c_parser_oacc_clause_use_device (parser, clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_SEQ: clauses = c_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, clauses); @@ -13590,6 +13605,29 @@ c_parser_oacc_enter_exit_data (c_parser *parser, bool enter) /* OpenACC 2.0: + # pragma acc host_data oacc-data-clause[optseq] new-line + structured-block +*/ + +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +static tree +c_parser_oacc_host_data (location_t loc, c_parser *parser) +{ + tree stmt, clauses, block; + + clauses = c_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data"); + + block = c_begin_omp_parallel (); + add_stmt (c_parser_omp_structured_block (parser)); + stmt = c_finish_oacc_host_data (loc, clauses, block); + return stmt; +} + + +/* OpenACC 2.0: # pragma acc loop oacc-loop-clause[optseq] new-line structured-block @@ -16897,6 +16935,9 @@ c_parser_omp_construct (c_parser *parser) case PRAGMA_OACC_DATA: stmt = c_parser_oacc_data (loc, parser); break; + case PRAGMA_OACC_HOST_DATA: + stmt = c_parser_oacc_host_data (loc, parser); + break; case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: strcpy (p_name, "#pragma acc"); diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index 6bc216a..848131e 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -653,6 +653,7 @@ extern tree c_finish_goto_ptr (location_t, tree); extern tree c_expr_to_decl (tree, bool *, bool *); extern tree c_finish_omp_construct (location_t, enum tree_code, tree, tree); extern tree c_finish_oacc_data (location_t, tree, tree); +extern tree c_finish_oacc_host_data (location_t, tree, tree); extern tree c_begin_omp_parallel (void); extern tree c_finish_omp_parallel (location_t, tree, tree); extern tree c_begin_omp_task (void); diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index 741c75c..cc2e38e 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -11631,6 +11631,25 @@ c_finish_oacc_data (location_t loc, tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_HOST_DATA. */ + +tree +c_finish_oacc_host_data (location_t loc, tree clauses, tree block) +{ + tree stmt; + + block = c_end_compound_stmt (loc, block, true); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + SET_EXPR_LOCATION (stmt, loc); + + return add_stmt (stmt); +} + /* Like c_begin_compound_stmt, except force the retention of the BLOCK. */ tree @@ -13074,6 +13093,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd) bitmap_set_bit (&map_head, DECL_UID (t)); goto check_dup_generic; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE_USE_DEVICE_PTR: t = OMP_CLAUSE_DECL (c); diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index caa601d..38ae70f 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6360,6 +6360,7 @@ extern void finish_omp_threadprivate (tree); extern tree begin_omp_structured_block (void); extern tree finish_omp_structured_block (tree); extern tree finish_oacc_data (tree, tree); +extern tree finish_oacc_host_data (tree, tree); extern tree finish_omp_construct (enum tree_code, tree, tree); extern tree begin_omp_parallel (void); extern tree finish_omp_parallel (tree, tree); diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 90a0673..f78df02 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -29232,6 +29232,8 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -31598,6 +31600,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, + clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_SEQ: clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, clauses, here); @@ -34509,6 +34516,30 @@ cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok) return stmt; } +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +/* OpenACC 2.0: + # pragma acc host_data <clauses> new-line + structured-block */ + +static tree +cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + unsigned int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data", pragma_tok); + + block = begin_omp_parallel (); + save = cp_parser_begin_omp_structured_block (parser); + cp_parser_statement (parser, NULL_TREE, false, NULL); + cp_parser_end_omp_structured_block (parser, save); + stmt = finish_oacc_host_data (clauses, block); + return stmt; +} + /* OpenACC 2.0: # pragma acc declare oacc-data-clause[optseq] new-line */ @@ -36068,6 +36099,9 @@ cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok) case PRAGMA_OACC_EXIT_DATA: stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, false); break; + case PRAGMA_OACC_HOST_DATA: + stmt = cp_parser_oacc_host_data (parser, pragma_tok); + break; case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: strcpy (p_name, "#pragma acc"); @@ -36645,6 +36679,7 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context) case PRAGMA_OACC_DATA: case PRAGMA_OACC_ENTER_DATA: case PRAGMA_OACC_EXIT_DATA: + case PRAGMA_OACC_HOST_DATA: case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: case PRAGMA_OACC_LOOP: diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index e7e5d8e..3bb6184 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -6835,6 +6835,7 @@ finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd) } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE_USE_DEVICE_PTR: field_ok = allow_fields; @@ -7390,6 +7391,24 @@ finish_oacc_data (tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. */ + +tree +finish_oacc_host_data (tree clauses, tree block) +{ + tree stmt; + + block = finish_omp_structured_block (block); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + + return add_stmt (stmt); +} + /* Generate OMP construct CODE, with BODY and CLAUSES as its compound statement. */ diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index 7764201..f1abf5c 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -1356,6 +1356,9 @@ dump_gimple_omp_target (pretty_printer *buffer, gomp_target *gs, case GF_OMP_TARGET_KIND_OACC_DECLARE: kind = " oacc_declare"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + kind = " oacc_host_data"; + break; default: gcc_unreachable (); } diff --git a/gcc/gimple.h b/gcc/gimple.h index 0b04804..dc61043 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -171,6 +171,7 @@ enum gf_mask { GF_OMP_TARGET_KIND_OACC_UPDATE = 8, GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 9, GF_OMP_TARGET_KIND_OACC_DECLARE = 10, + GF_OMP_TARGET_KIND_OACC_HOST_DATA = 11, /* True on an GIMPLE_OMP_RETURN statement if the return does not require a thread synchronization via some sort of barrier. The exact barrier @@ -6004,6 +6005,7 @@ is_gimple_omp_oacc (const gimple *stmt) case GF_OMP_TARGET_KIND_OACC_UPDATE: case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: case GF_OMP_TARGET_KIND_OACC_DECLARE: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: return true; default: return false; diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 7fff12f..85f6b1a 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -122,6 +122,7 @@ enum omp_region_type ORT_ACC_DATA = ORT_ACC | ORT_TARGET_DATA, /* Data construct. */ ORT_ACC_PARALLEL = ORT_ACC | ORT_TARGET, /* Parallel construct */ ORT_ACC_KERNELS = ORT_ACC | ORT_TARGET | 0x80, /* Kernels construct. */ + ORT_ACC_HOST_DATA = ORT_ACC | ORT_TARGET_DATA | 0x80, /* Host data. */ /* Dummy OpenMP region, used to disable expansion of DECL_VALUE_EXPRs in taskloop pre body. */ @@ -6120,6 +6121,9 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) (splay_tree_key) decl); if (n2) { + if (octx->region_type == ORT_ACC_HOST_DATA) + error ("variable %qE declared in enclosing " + "host_data region", DECL_NAME (decl)); nflags |= GOVD_MAP; goto found_outer; } @@ -6418,6 +6422,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_TARGET_DATA: case OMP_TARGET_ENTER_DATA: case OMP_TARGET_EXIT_DATA: + case OACC_HOST_DATA: ctx->target_firstprivatize_array_bases = true; default: break; @@ -6683,6 +6688,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_TARGET_DATA: case OMP_TARGET_ENTER_DATA: case OMP_TARGET_EXIT_DATA: + case OACC_HOST_DATA: if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER || (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) @@ -6695,6 +6701,22 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } if (remove) break; + if (DECL_P (decl) && outer_ctx && (region_type & ORT_ACC)) + { + struct gimplify_omp_ctx *octx; + for (octx = outer_ctx; octx; octx = octx->outer_context) + { + if (!(octx->region_type & (ORT_TARGET_DATA | ORT_TARGET))) + break; + splay_tree_node n2 + = splay_tree_lookup (octx->variables, + (splay_tree_key) decl); + if (n2 && octx->region_type == ORT_ACC_HOST_DATA) + error_at (OMP_CLAUSE_LOCATION (c), "variable %qE " + "declared in enclosing host_data region", + DECL_NAME (decl)); + } + } if (OMP_CLAUSE_SIZE (c) == NULL_TREE) OMP_CLAUSE_SIZE (c) = DECL_P (decl) ? DECL_SIZE_UNIT (decl) : TYPE_SIZE_UNIT (TREE_TYPE (decl)); @@ -7092,6 +7114,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } goto do_notice; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: flags = GOVD_FIRSTPRIVATE | GOVD_EXPLICIT; goto do_add; @@ -7327,7 +7350,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: remove = true; break; @@ -9365,6 +9387,9 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) case OMP_TEAMS: ort = OMP_TEAMS_COMBINED (expr) ? ORT_COMBINED_TEAMS : ORT_TEAMS; break; + case OACC_HOST_DATA: + ort = ORT_ACC_HOST_DATA; + break; default: gcc_unreachable (); } @@ -9386,6 +9411,7 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) switch (TREE_CODE (expr)) { case OACC_DATA: + case OACC_HOST_DATA: end_ix = BUILT_IN_GOACC_DATA_END; break; case OMP_TARGET_DATA: @@ -9418,6 +9444,10 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_KERNELS, OMP_CLAUSES (expr)); break; + case OACC_HOST_DATA: + stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_HOST_DATA, + OMP_CLAUSES (expr)); + break; case OACC_PARALLEL: stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_PARALLEL, OMP_CLAUSES (expr)); @@ -10527,16 +10557,12 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, ret = GS_ALL_DONE; break; - case OACC_HOST_DATA: - sorry ("directive not yet implemented"); - ret = GS_ALL_DONE; - break; - case OACC_DECLARE: gimplify_oacc_declare (expr_p, pre_p); ret = GS_ALL_DONE; break; + case OACC_HOST_DATA: case OACC_DATA: case OACC_KERNELS: case OACC_PARALLEL: diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index d540dab..35f5014 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -47,6 +47,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_HOST_DATA, "GOACC_host_data", + BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index f17a828..15cc839 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -1942,6 +1942,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) install_var_local (decl, ctx); break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: decl = OMP_CLAUSE_DECL (c); if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) @@ -2144,7 +2145,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE__CACHE_: sorry ("Clause not supported yet"); break; @@ -2295,6 +2295,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_NOGROUP: case OMP_CLAUSE_DEFAULTMAP: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE__CILK_FOR_COUNT_: case OMP_CLAUSE_ASYNC: @@ -2312,7 +2313,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE__CACHE_: sorry ("Clause not supported yet"); break; @@ -3615,6 +3615,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break; case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: stmt_name = "enter/exit data"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data"; + break; default: gcc_unreachable (); } switch (gimple_omp_target_kind (ctx->stmt)) @@ -3626,6 +3628,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) case GF_OMP_TARGET_KIND_OACC_KERNELS: ctx_stmt_name = "kernels"; break; case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + ctx_stmt_name = "host_data"; break; default: gcc_unreachable (); } @@ -12508,6 +12512,7 @@ expand_omp_target (struct omp_region *region) break; case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: data_region = true; break; default: @@ -12751,6 +12756,9 @@ expand_omp_target (struct omp_region *region) case GF_OMP_TARGET_KIND_OACC_DECLARE: start_ix = BUILT_IN_GOACC_DECLARE; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + start_ix = BUILT_IN_GOACC_HOST_DATA; + break; default: gcc_unreachable (); } @@ -12875,6 +12883,7 @@ expand_omp_target (struct omp_region *region) case BUILT_IN_GOACC_DATA_START: case BUILT_IN_GOACC_DECLARE: case BUILT_IN_GOMP_TARGET_DATA: + case BUILT_IN_GOACC_HOST_DATA: break; case BUILT_IN_GOMP_TARGET: case BUILT_IN_GOMP_TARGET_UPDATE: @@ -13182,6 +13191,7 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent, case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: break; case GF_OMP_TARGET_KIND_UPDATE: case GF_OMP_TARGET_KIND_ENTER_DATA: @@ -14982,6 +14992,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) break; case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: data_region = true; break; default: @@ -15188,6 +15199,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: var = OMP_CLAUSE_DECL (c); @@ -15573,12 +15585,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) build_int_cstu (tkind_type, tkind)); break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: ovar = OMP_CLAUSE_DECL (c); var = lookup_decl_in_outer_ctx (ovar, ctx); x = build_sender_ref (ovar, ctx); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) tkind = GOMP_MAP_USE_DEVICE_PTR; else tkind = GOMP_MAP_FIRSTPRIVATE_INT; @@ -15781,10 +15795,12 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_build_assign (new_var, x)); } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: var = OMP_CLAUSE_DECL (c); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) x = build_sender_ref (var, ctx); else x = build_receiver_ref (var, false, ctx); @@ -16771,6 +16787,7 @@ make_gimple_omp_edges (basic_block bb, struct omp_region **region, case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: break; case GF_OMP_TARGET_KIND_UPDATE: case GF_OMP_TARGET_KIND_ENTER_DATA: diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c index 280d29b..70904ce 100644 --- a/gcc/tree-nested.c +++ b/gcc/tree-nested.c @@ -1072,6 +1072,7 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SHARED: case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_LINK: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: do_decl_clause: @@ -1721,6 +1722,7 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SHARED: case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_LINK: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: do_decl_clause: diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 4d42c42..ea9344d 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -394,6 +394,7 @@ GOACC_2.0.1 { global: GOACC_declare; GOACC_parallel_keyed; + GOACC_host_data; } GOACC_2.0; GOMP_PLUGIN_1.0 { diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index a80ede4..db7cab3 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -490,6 +490,46 @@ GOACC_wait (int async, int num_waits, ...) goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); } +void +GOACC_host_data (int device, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; + struct target_mem_desc *tgt; + +#ifdef HAVE_INTTYPES_H + gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", + __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); +#else + gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", + __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); +#endif + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + /* Host fallback or 'do nothing'. */ + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + || host_fallback) + { + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, + GOMP_MAP_VARS_OPENACC); + tgt->prev = thr->mapped_data; + thr->mapped_data = tgt; + + return; + } + + gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); + tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_OPENACC); + gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); + tgt->prev = thr->mapped_data; + thr->mapped_data = tgt; +} + int GOACC_get_num_threads (void) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c new file mode 100644 index 0000000..51745ba --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c @@ -0,0 +1,100 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include <stdlib.h> +#include <openacc.h> +#include <cuda.h> +#include <cuda_runtime_api.h> +#include <cublas_v2.h> + +void +saxpy_host (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +#pragma acc routine +void +saxpy_target (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +int +main(int argc, char **argv) +{ +#define N 8 + int i; + float x_ref[N], y_ref[N]; + float x[N], y[N]; + cublasHandle_t h; + float a = 2.0; + + for (i = 0; i < N; i++) + { + x[i] = x_ref[i] = 4.0 + i; + y[i] = y_ref[i] = 3.0; + } + + saxpy_host (N, a, x_ref, y_ref); + + cublasCreate (&h); + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + +#pragma acc data create (x[0:N]) copyout (y[0:N]) + { +#pragma acc kernels + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + cublasDestroy (h); + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + for (i = 0; i < N; i++) + y[i] = 3.0; + + /* There's no need to use host_data here. */ +#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N]) + { +#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a) + saxpy_target (N, a, x, y); + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c new file mode 100644 index 0000000..9820286 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ + +#include <stdlib.h> +#include <openacc.h> + +char *global_in_host; + +void foo (char *in) +{ + if (!acc_is_present (global_in_host, sizeof (*global_in_host)) + || in != acc_deviceptr (global_in_host)) + abort (); +} + +int +main (int argc, char **argv) +{ + char mydata[1024]; + + global_in_host = mydata; + +#pragma acc data copyin(mydata) + { +#pragma acc host_data use_device (mydata) + { + foo (mydata); + } + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c new file mode 100644 index 0000000..b6ee9b1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N]; + +#pragma acc data copyin (x[0:N]) + { + int *xp; +#pragma acc host_data use_device (x) + { + /* This use of the present clause is undefined behaviour for OpenACC. */ +#pragma acc parallel present (x) copyout (xp) /* { dg-error "variable 'x' declared in enclosing host_data region" } */ + { + xp = x; + } + } + + if (xp != acc_deviceptr (x)) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c new file mode 100644 index 0000000..3504f27 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N], *xp2; + +#pragma acc data copyin (x[0:N]) + { + int *xp; +#pragma acc host_data use_device (x) + { +#pragma acc data + { + xp = x; + } + xp2 = x; + } + + if (xp != acc_deviceptr (x) || xp2 != xp) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c new file mode 100644 index 0000000..268e919 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N], y[N], *yp; + + yp = y + 1; + +#pragma acc data copyin (x[0:N]) + { + int *xp, *yp2; +#pragma acc host_data use_device (x) + { +#pragma acc data copyin (y) + { +#pragma acc host_data use_device (yp) + { + xp = x; + yp2 = yp; + } + + if (yp2 != acc_deviceptr (yp)) + abort (); + } + } + + if (xp != acc_deviceptr (x)) + abort (); + + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-6.c new file mode 100644 index 0000000..d0b1968 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-6.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N]; + +#pragma acc data copyin (x[0:N]) + { + int *xp; +#pragma acc host_data use_device (x) + { + /* Here 'x' being implicitly firstprivate for the parallel region + conflicts with it being declared as use_device in the enclosing + host_data region. */ +#pragma acc parallel copyout (xp) + { + xp = x; /* { dg-error "variable 'x' declared in enclosing host_data region" } */ + } + } + + if (xp != acc_deviceptr (x)) + abort (); + } + + return 0; +} ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-11-30 19:34 ` Julian Brown @ 2015-12-01 8:30 ` Jakub Jelinek 2015-12-02 15:27 ` Tom de Vries 2015-12-02 15:59 ` Thomas Schwinge 2 siblings, 0 replies; 33+ messages in thread From: Jakub Jelinek @ 2015-12-01 8:30 UTC (permalink / raw) To: Julian Brown; +Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell On Mon, Nov 30, 2015 at 07:30:34PM +0000, Julian Brown wrote: > Julian Brown <julian@codesourcery.com> > Cesar Philippidis <cesar@codesourcery.com> > James Norris <James_Norris@mentor.com> > > gcc/ > * c-family/c-pragma.c (oacc_pragmas): Add PRAGMA_OACC_HOST_DATA. > * c-family/c-pragma.h (pragma_kind): Add PRAGMA_OACC_HOST_DATA. c-family/, c/ and cp/ subdirectories have their own ChangeLog, so you need to split the entry into multiple ChangeLog files and remove the directory prefixes. > @@ -6120,6 +6121,9 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) > (splay_tree_key) decl); > if (n2) > { > + if (octx->region_type == ORT_ACC_HOST_DATA) > + error ("variable %qE declared in enclosing " > + "host_data region", DECL_NAME (decl)); %<host_data%> instead? > nflags |= GOVD_MAP; > goto found_outer; > } > @@ -6418,6 +6422,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, > case OMP_TARGET_DATA: > case OMP_TARGET_ENTER_DATA: > case OMP_TARGET_EXIT_DATA: > + case OACC_HOST_DATA: > ctx->target_firstprivatize_array_bases = true; > default: > break; > @@ -6683,6 +6688,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, > case OMP_TARGET_DATA: > case OMP_TARGET_ENTER_DATA: > case OMP_TARGET_EXIT_DATA: > + case OACC_HOST_DATA: > if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER > || (OMP_CLAUSE_MAP_KIND (c) > == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) > @@ -6695,6 +6701,22 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, > } > if (remove) > break; > + if (DECL_P (decl) && outer_ctx && (region_type & ORT_ACC)) > + { > + struct gimplify_omp_ctx *octx; > + for (octx = outer_ctx; octx; octx = octx->outer_context) > + { > + if (!(octx->region_type & (ORT_TARGET_DATA | ORT_TARGET))) > + break; Wouldn't it be better to do if (octx->region_type != ORT_ACC_HOST_DATA) continue; here, thus only lookup if you really want to use it? > + splay_tree_node n2 > + = splay_tree_lookup (octx->variables, > + (splay_tree_key) decl); > + if (n2 && octx->region_type == ORT_ACC_HOST_DATA) and remove the && ... part from the condition? > + error_at (OMP_CLAUSE_LOCATION (c), "variable %qE " > + "declared in enclosing host_data region", > + DECL_NAME (decl)); > + } > + } > if (OMP_CLAUSE_SIZE (c) == NULL_TREE) > OMP_CLAUSE_SIZE (c) = DECL_P (decl) ? DECL_SIZE_UNIT (decl) > : TYPE_SIZE_UNIT (TREE_TYPE (decl)); Ok with those changes. Jakub ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-11-30 19:34 ` Julian Brown 2015-12-01 8:30 ` Jakub Jelinek @ 2015-12-02 15:27 ` Tom de Vries 2015-12-02 15:59 ` Thomas Schwinge 2 siblings, 0 replies; 33+ messages in thread From: Tom de Vries @ 2015-12-02 15:27 UTC (permalink / raw) To: Julian Brown, Jakub Jelinek Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell On 30/11/15 20:30, Julian Brown wrote: > libgomp/ > * oacc-parallel.c (GOACC_host_data): New function. > * libgomp.map (GOACC_host_data): Add to GOACC_2.0.1. > * testsuite/libgomp.oacc-c-c++-common/host_data-1.c: New test. > * testsuite/libgomp.oacc-c-c++-common/host_data-2.c: New test. > * testsuite/libgomp.oacc-c-c++-common/host_data-3.c: New test. > * testsuite/libgomp.oacc-c-c++-common/host_data-4.c: New test. > * testsuite/libgomp.oacc-c-c++-common/host_data-5.c: New test. > * testsuite/libgomp.oacc-c-c++-common/host_data-6.c: New test. > Hi, At r231169, I'm seeing these failures for a no-accelerator setup: ... FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/host_data-2.c -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 execution test FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/host_data-4.c -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 execution test FAIL: libgomp.oacc-c/../libgomp.oacc-c-c++-common/host_data-5.c -DACC_DEVICE_TYPE_host=1 -DACC_MEM_SHARED=1 execution test ... Thanks, - Tom ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-11-30 19:34 ` Julian Brown 2015-12-01 8:30 ` Jakub Jelinek 2015-12-02 15:27 ` Tom de Vries @ 2015-12-02 15:59 ` Thomas Schwinge 2015-12-02 19:16 ` Cesar Philippidis ` (2 more replies) 2 siblings, 3 replies; 33+ messages in thread From: Thomas Schwinge @ 2015-12-02 15:59 UTC (permalink / raw) To: Julian Brown, GCC Patches, Jakub Jelinek Cc: James Norris, Joseph S. Myers, Nathan Sidwell, Cesar Philippidis [-- Attachment #1: Type: text/plain, Size: 21493 bytes --] Hi! Cesar and Jim copied, for help with Fortran and generally testsuite things. On Mon, 30 Nov 2015 19:30:34 +0000, Julian Brown <julian@codesourcery.com> wrote: > [patch] First, thanks! > Tests look OK (libgomp/gcc/g++/libstdc++), and the new ones pass. I see a regression (ICE) in gfortran.dg/goacc/coarray.f95 (done: XFAILed, and obsolete dg-excess-errors directives removed; compare to gfortran.dg/goacc/coarray_2.f90), and I see new FAILs for non-offloading execution of libgomp.oacc-c-c++-common/host_data-2.c, libgomp.oacc-c-c++-common/host_data-4.c, and libgomp.oacc-c-c++-common/host_data-5.c (done: see below); confirmed by a number of reports on the <gcc-regression@gcc.gnu.org> and <gcc-testresults@gcc.gnu.org> mailing lists. I can understand that you didn't see the Fortran problem if not running Fortrant testing (but why?), but it's strange that you didn't see the libgomp C/C++ FAILs. A few patch review items, some of which I've already addressed (see below). > --- a/gcc/c/c-parser.c > +++ b/gcc/c/c-parser.c > @@ -10279,6 +10279,8 @@ c_parser_omp_clause_name (c_parser *parser) > result = PRAGMA_OMP_CLAUSE_UNTIED; > else if (!strcmp ("use_device_ptr", p)) > result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; > + else if (!strcmp ("use_device", p)) > + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; "use_device" sorts before "use_device_ptr". (Done.) > @@ -12940,6 +12951,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, > clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); > c_name = "self"; > break; > + case PRAGMA_OACC_CLAUSE_USE_DEVICE: > + clauses = c_parser_oacc_clause_use_device (parser, clauses); > + c_name = "use_device"; > + break; > case PRAGMA_OACC_CLAUSE_SEQ: > clauses = c_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, > clauses); Sorting? (Done.) > --- a/gcc/cp/parser.c > +++ b/gcc/cp/parser.c > @@ -29232,6 +29232,8 @@ cp_parser_omp_clause_name (cp_parser *parser) > result = PRAGMA_OMP_CLAUSE_UNTIED; > else if (!strcmp ("use_device_ptr", p)) > result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; > + else if (!strcmp ("use_device", p)) > + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; > break; Likewise. (Done.) > @@ -31598,6 +31600,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, > clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); > c_name = "self"; > break; > + case PRAGMA_OACC_CLAUSE_USE_DEVICE: > + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, > + clauses); > + c_name = "use_device"; > + break; > case PRAGMA_OACC_CLAUSE_SEQ: > clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, > clauses, here); Likewise. (Done.) > +#define OACC_HOST_DATA_CLAUSE_MASK \ > + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) > + > +/* OpenACC 2.0: > + # pragma acc host_data <clauses> new-line > + structured-block */ Define OACC_HOST_DATA_CLAUSE_MASK after the "accepted syntax" comment. (Done.) There is no handlig of OMP_CLAUSE_USE_DEVICE in gcc/cp/pt.c:tsubst_omp_clauses. (Done.) > --- a/gcc/gimplify.c > +++ b/gcc/gimplify.c > @@ -6418,6 +6422,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, | if (!lang_GNU_Fortran ()) | switch (code) | { | case OMP_TARGET: > case OMP_TARGET_DATA: > case OMP_TARGET_ENTER_DATA: > case OMP_TARGET_EXIT_DATA: > + case OACC_HOST_DATA: > ctx->target_firstprivatize_array_bases = true; > default: > break; I understand it's not yet relevant/supported for OpenMP in Fortran, but why is C/C++ vs. Fortran being handled differently here for OpenACC host_data? > --- a/libgomp/oacc-parallel.c > +++ b/libgomp/oacc-parallel.c > +void > +GOACC_host_data (int device, size_t mapnum, > + void **hostaddrs, size_t *sizes, unsigned short *kinds) > +{ > + bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; > + struct target_mem_desc *tgt; > + > +#ifdef HAVE_INTTYPES_H > + gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", > + __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); > +#else > + gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", > + __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); > +#endif > + > + goacc_lazy_initialize (); > + > + struct goacc_thread *thr = goacc_thread (); > + struct gomp_device_descr *acc_dev = thr->dev; > + > + /* Host fallback or 'do nothing'. */ > + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) > + || host_fallback) > + { > + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, > + GOMP_MAP_VARS_OPENACC); > + tgt->prev = thr->mapped_data; > + thr->mapped_data = tgt; > + > + return; > + } > + > + gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); > + tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, > + GOMP_MAP_VARS_OPENACC); > + gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); > + tgt->prev = thr->mapped_data; > + thr->mapped_data = tgt; > +} Isn't that identical to GOACC_data_start? Can we thus get rid of it? > --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c > @@ -0,0 +1,100 @@ > +/* { dg-do run { target openacc_nvidia_accel_selected } } */ > +[...] > --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c > @@ -0,0 +1,31 @@ > +/* { dg-do run } */ FAILs for non-offloading execution; restrict testing as done in libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c. (Done.) (Hopefully, that's the intention?) > --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c > @@ -0,0 +1,29 @@ > +/* { dg-do run } */ Likewise. (Done.) > --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c > @@ -0,0 +1,38 @@ > +/* { dg-do run } */ Likewise. (Done.) > --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ Compile tests (checking compiler diagnostics) belong into gcc/testsuite/. (Done.) > --- /dev/null > +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-6.c > @@ -0,0 +1,31 @@ > +/* { dg-do compile } */ Likewise. (Done.) What about the test cases present on gomp-4_0-branch, gcc/testsuite/c-c++-common/goacc/host_data-1.c, gcc/testsuite/c-c++-common/goacc/host_data-2.c, gcc/testsuite/c-c++-common/goacc/host_data-3.c, and gcc/testsuite/c-c++-common/goacc/host_data-4.c, that have not been part of your submission/commit? Also, given the missing handling of OMP_CLAUSE_USE_DEVICE in gcc/cp/pt.c:tsubst_omp_clauses, I assert we don't have any testsuite coverage for C++ templates. Your submission/commit didn't have any execution tests for OpenACC host_data in Fortran. On gomp-4_0-branch, there is libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90 at least. For the "(Done.)" items, as obvious, committed to trunk in r231184: commit 2a7545d57731de7d4918a8786c972259488dbc56 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Wed Dec 2 15:53:34 2015 +0000 Some OpenACC host_data cleanup gcc/c/ * c-parser.c (c_parser_omp_clause_name) (c_parser_oacc_all_clauses): Alphabetical sorting. gcc/cp/ * parser.c (cp_parser_omp_clause_name) (cp_parser_oacc_all_clauses): Alphabetical sorting. * pt.c (tsubst_omp_clauses): Handle OMP_CLAUSE_USE_DEVICE. gcc/testsuite/ * c-c++-common/goacc/host_data-5.c: New file. * c-c++-common/goacc/host_data-6.c: Likewise. * gfortran.dg/goacc/coarray.f95: XFAIL. * gfortran.dg/goacc/coarray_2.f90: Adjust dg-excess-errors directive. * gfortran.dg/goacc/host_data-tree.f95: Remove dg-prune-output directive. libgomp/ * testsuite/libgomp.oacc-c-c++-common/host_data-2.c: Restrict to target openacc_nvidia_accel_selected. * testsuite/libgomp.oacc-c-c++-common/host_data-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/host_data-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/host_data-3.c: Remove file. * testsuite/libgomp.oacc-c-c++-common/host_data-6.c: Remove file. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@231184 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/c/ChangeLog | 5 +++++ gcc/c/c-parser.c | 12 ++++++------ gcc/cp/ChangeLog | 6 ++++++ gcc/cp/parser.c | 20 ++++++++++---------- gcc/cp/pt.c | 2 ++ gcc/testsuite/ChangeLog | 15 +++++++++++++++ .../testsuite/c-c++-common/goacc/host_data-5.c | 6 ------ .../testsuite/c-c++-common/goacc}/host_data-6.c | 6 ------ gcc/testsuite/gfortran.dg/goacc/coarray.f95 | 8 ++++---- gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 | 2 +- gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 | 1 - libgomp/ChangeLog | 9 +++++++++ .../libgomp.oacc-c-c++-common/host_data-2.c | 2 +- .../libgomp.oacc-c-c++-common/host_data-4.c | 2 +- .../libgomp.oacc-c-c++-common/host_data-5.c | 2 +- 15 files changed, 61 insertions(+), 37 deletions(-) diff --git gcc/c/ChangeLog gcc/c/ChangeLog index acb8ee4..e517467 100644 --- gcc/c/ChangeLog +++ gcc/c/ChangeLog @@ -1,3 +1,8 @@ +2015-12-02 Thomas Schwinge <thomas@codesourcery.com> + + * c-parser.c (c_parser_omp_clause_name) + (c_parser_oacc_all_clauses): Alphabetical sorting. + 2015-12-02 Jakub Jelinek <jakub@redhat.com> PR c/68533 diff --git gcc/c/c-parser.c gcc/c/c-parser.c index d4c512f..ee0a305 100644 --- gcc/c/c-parser.c +++ gcc/c/c-parser.c @@ -10277,10 +10277,10 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_UNIFORM; else if (!strcmp ("untied", p)) result = PRAGMA_OMP_CLAUSE_UNTIED; - else if (!strcmp ("use_device_ptr", p)) - result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; else if (!strcmp ("use_device", p)) result = PRAGMA_OACC_CLAUSE_USE_DEVICE; + else if (!strcmp ("use_device_ptr", p)) + result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; break; case 'v': if (!strcmp ("vector", p)) @@ -12951,10 +12951,6 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; - case PRAGMA_OACC_CLAUSE_USE_DEVICE: - clauses = c_parser_oacc_clause_use_device (parser, clauses); - c_name = "use_device"; - break; case PRAGMA_OACC_CLAUSE_SEQ: clauses = c_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, clauses); @@ -12964,6 +12960,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_clause_tile (parser, clauses); c_name = "tile"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = c_parser_oacc_clause_use_device (parser, clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_VECTOR: c_name = "vector"; clauses = c_parser_oacc_shape_clause (parser, OMP_CLAUSE_VECTOR, diff --git gcc/cp/ChangeLog gcc/cp/ChangeLog index 385ba63..d2a7e99 100644 --- gcc/cp/ChangeLog +++ gcc/cp/ChangeLog @@ -1,3 +1,9 @@ +2015-12-02 Thomas Schwinge <thomas@codesourcery.com> + + * parser.c (cp_parser_omp_clause_name) + (cp_parser_oacc_all_clauses): Alphabetical sorting. + * pt.c (tsubst_omp_clauses): Handle OMP_CLAUSE_USE_DEVICE. + 2015-12-02 Andreas Arnez <arnez@linux.vnet.ibm.com> PR gcov-profile/68603 diff --git gcc/cp/parser.c gcc/cp/parser.c index f78df02..b4ecac7 100644 --- gcc/cp/parser.c +++ gcc/cp/parser.c @@ -29230,10 +29230,10 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_UNIFORM; else if (!strcmp ("untied", p)) result = PRAGMA_OMP_CLAUSE_UNTIED; - else if (!strcmp ("use_device_ptr", p)) - result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; else if (!strcmp ("use_device", p)) result = PRAGMA_OACC_CLAUSE_USE_DEVICE; + else if (!strcmp ("use_device_ptr", p)) + result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; break; case 'v': if (!strcmp ("vector", p)) @@ -31600,11 +31600,6 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; - case PRAGMA_OACC_CLAUSE_USE_DEVICE: - clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, - clauses); - c_name = "use_device"; - break; case PRAGMA_OACC_CLAUSE_SEQ: clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, clauses, here); @@ -31614,6 +31609,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_clause_tile (parser, here, clauses); c_name = "tile"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, + clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_VECTOR: c_name = "vector"; clauses = cp_parser_oacc_shape_clause (parser, OMP_CLAUSE_VECTOR, @@ -34516,13 +34516,13 @@ cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok) return stmt; } -#define OACC_HOST_DATA_CLAUSE_MASK \ - ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) - /* OpenACC 2.0: # pragma acc host_data <clauses> new-line structured-block */ +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + static tree cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok) { diff --git gcc/cp/pt.c gcc/cp/pt.c index 5befd64..d1d1e4e 100644 --- gcc/cp/pt.c +++ gcc/cp/pt.c @@ -14387,6 +14387,7 @@ tsubst_omp_clauses (tree clauses, bool declare_simd, bool allow_fields, case OMP_CLAUSE_FROM: case OMP_CLAUSE_TO: case OMP_CLAUSE_MAP: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: OMP_CLAUSE_DECL (nc) @@ -14513,6 +14514,7 @@ tsubst_omp_clauses (tree clauses, bool declare_simd, bool allow_fields, case OMP_CLAUSE_COPYPRIVATE: case OMP_CLAUSE_LINEAR: case OMP_CLAUSE_REDUCTION: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: /* tsubst_expr on SCOPE_REF results in returning diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog index 5fe26bb..4f7af87 100644 --- gcc/testsuite/ChangeLog +++ gcc/testsuite/ChangeLog @@ -1,3 +1,18 @@ +2015-12-02 Thomas Schwinge <thomas@codesourcery.com> + + * gfortran.dg/goacc/coarray.f95: XFAIL. + * gfortran.dg/goacc/coarray_2.f90: Adjust dg-excess-errors + directive. + * gfortran.dg/goacc/host_data-tree.f95: Remove dg-prune-output + directive. + +2015-12-02 Thomas Schwinge <thomas@codesourcery.com> + Julian Brown <julian@codesourcery.com> + James Norris <James_Norris@mentor.com> + + * c-c++-common/goacc/host_data-5.c: New file. + * c-c++-common/goacc/host_data-6.c: Likewise. + 2015-12-02 Tom de Vries <tom@codesourcery.com> * c-c++-common/goacc/kernels-default-2.c: New test. diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c gcc/testsuite/c-c++-common/goacc/host_data-5.c similarity index 82% rename from libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c rename to gcc/testsuite/c-c++-common/goacc/host_data-5.c index 7d9b5f7..f372fbd 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c +++ gcc/testsuite/c-c++-common/goacc/host_data-5.c @@ -1,8 +1,5 @@ /* { dg-do compile } */ -#include <openacc.h> -#include <stdlib.h> - #define N 1024 int main (int argc, char* argv[]) @@ -20,9 +17,6 @@ int main (int argc, char* argv[]) xp = x; } } - - if (xp != acc_deviceptr (x)) - abort (); } return 0; diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-6.c gcc/testsuite/c-c++-common/goacc/host_data-6.c similarity index 84% rename from libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-6.c rename to gcc/testsuite/c-c++-common/goacc/host_data-6.c index a841488..8be7912 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-6.c +++ gcc/testsuite/c-c++-common/goacc/host_data-6.c @@ -1,8 +1,5 @@ /* { dg-do compile } */ -#include <openacc.h> -#include <stdlib.h> - #define N 1024 int main (int argc, char* argv[]) @@ -22,9 +19,6 @@ int main (int argc, char* argv[]) xp = x; /* { dg-error "variable 'x' declared in enclosing 'host_data' region" } */ } } - - if (xp != acc_deviceptr (x)) - abort (); } return 0; diff --git gcc/testsuite/gfortran.dg/goacc/coarray.f95 gcc/testsuite/gfortran.dg/goacc/coarray.f95 index 130ffc3..d2f10d5 100644 --- gcc/testsuite/gfortran.dg/goacc/coarray.f95 +++ gcc/testsuite/gfortran.dg/goacc/coarray.f95 @@ -1,7 +1,9 @@ ! { dg-do compile } ! { dg-additional-options "-fcoarray=single" } - -! TODO: These cases must fail +! +! PR fortran/63861 +! { dg-xfail-if "<http://gcc.gnu.org/PR63861>" { *-*-* } } +! { dg-excess-errors "TODO" } module test contains @@ -9,7 +11,6 @@ contains implicit none integer :: i integer, codimension[*] :: a - ! { dg-excess-errors "sorry, unimplemented: directive not yet implemented" } !$acc declare device_resident (a) !$acc data copy (a) !$acc end data @@ -17,7 +18,6 @@ contains !$acc end data !$acc parallel private (a) !$acc end parallel - ! { dg-excess-errors "sorry, unimplemented: directive not yet implemented" } !$acc host_data use_device (a) !$acc end host_data !$acc parallel loop reduction(+:a) diff --git gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 index f9cf9ac..87e04d5 100644 --- gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 +++ gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 @@ -3,6 +3,7 @@ ! ! PR fortran/63861 ! { dg-xfail-if "<http://gcc.gnu.org/PR63861>" { *-*-* } } +! { dg-excess-errors "TODO" } module test contains @@ -106,4 +107,3 @@ contains !$acc update self (a) end subroutine oacc4 end module test -! { dg-excess-errors "sorry, unimplemented: directive not yet implemented" } diff --git gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 index e4c8205..7a5eea6 100644 --- gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 +++ gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 @@ -8,5 +8,4 @@ program test !$acc host_data use_device(i) !$acc end host_data end program test -! { dg-prune-output "unimplemented" } ! { dg-final { scan-tree-dump-times "pragma acc host_data use_device\\(i\\)" 1 "original" } } diff --git libgomp/ChangeLog libgomp/ChangeLog index ddf836a..cde0b5c 100644 --- libgomp/ChangeLog +++ libgomp/ChangeLog @@ -1,3 +1,12 @@ +2015-12-02 Thomas Schwinge <thomas@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/host_data-2.c: Restrict to + target openacc_nvidia_accel_selected. + * testsuite/libgomp.oacc-c-c++-common/host_data-4.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/host_data-5.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/host_data-3.c: Remove file. + * testsuite/libgomp.oacc-c-c++-common/host_data-6.c: Remove file. + 2015-12-01 Julian Brown <julian@codesourcery.com> James Norris <James_Norris@mentor.com> diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c index 9820286..614f143 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c +++ libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c @@ -1,4 +1,4 @@ -/* { dg-do run } */ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ #include <stdlib.h> #include <openacc.h> diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c index 3504f27..0ab5a35 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c +++ libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c @@ -1,4 +1,4 @@ -/* { dg-do run } */ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ #include <openacc.h> #include <stdlib.h> diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c index 268e919..a3737a7 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c +++ libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c @@ -1,4 +1,4 @@ -/* { dg-do run } */ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ #include <openacc.h> #include <stdlib.h> Grüße Thomas [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 472 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-12-02 15:59 ` Thomas Schwinge @ 2015-12-02 19:16 ` Cesar Philippidis 2015-12-02 19:28 ` Steve Kargl 2015-12-02 19:35 ` Jakub Jelinek 2015-12-02 22:14 ` [gomp4] " Thomas Schwinge 2016-02-02 13:57 ` [OpenACC 0/7] host_data construct Thomas Schwinge 2 siblings, 2 replies; 33+ messages in thread From: Cesar Philippidis @ 2015-12-02 19:16 UTC (permalink / raw) To: Thomas Schwinge, Julian Brown, GCC Patches, Jakub Jelinek Cc: James Norris, Joseph S. Myers, Nathan Sidwell, Fortran List [-- Attachment #1: Type: text/plain, Size: 2194 bytes --] On 12/02/2015 07:58 AM, Thomas Schwinge wrote: > diff --git gcc/testsuite/gfortran.dg/goacc/coarray.f95 gcc/testsuite/gfortran.dg/goacc/coarray.f95 > index 130ffc3..d2f10d5 100644 > --- gcc/testsuite/gfortran.dg/goacc/coarray.f95 > +++ gcc/testsuite/gfortran.dg/goacc/coarray.f95 > @@ -1,7 +1,9 @@ > ! { dg-do compile } > ! { dg-additional-options "-fcoarray=single" } > - > -! TODO: These cases must fail > +! > +! PR fortran/63861 > +! { dg-xfail-if "<http://gcc.gnu.org/PR63861>" { *-*-* } } > +! { dg-excess-errors "TODO" } > > module test > contains > @@ -9,7 +11,6 @@ contains > implicit none > integer :: i > integer, codimension[*] :: a > - ! { dg-excess-errors "sorry, unimplemented: directive not yet implemented" } > !$acc declare device_resident (a) > !$acc data copy (a) > !$acc end data > @@ -17,7 +18,6 @@ contains > !$acc end data > !$acc parallel private (a) > !$acc end parallel > - ! { dg-excess-errors "sorry, unimplemented: directive not yet implemented" } > !$acc host_data use_device (a) > !$acc end host_data > !$acc parallel loop reduction(+:a) > diff --git gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 > index f9cf9ac..87e04d5 100644 > --- gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 > +++ gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 > @@ -3,6 +3,7 @@ > ! > ! PR fortran/63861 > ! { dg-xfail-if "<http://gcc.gnu.org/PR63861>" { *-*-* } } > +! { dg-excess-errors "TODO" } This host_data patch exposed a bug in the fortran front end where it was allowing arrays to be used as reduction variables. If replace you replace codimension with dimension, you'd see a similar ICE. The attached patch, while it doesn't make any attempt to fix the gimplifier changes, does teach the fortran front end to error on acc reductions containing array variables. Note that this solution is somewhat aggressive because we probably should allow reductions on individual array elements. E.g. !$acc loop reduction(+:var(1)) The c and c++ front ends also have that problem. Maybe I'll revisit this later. Is this ok for trunk? It will close pr63861. Cesar [-- Attachment #2: gfc_array-reductions.diff --] [-- Type: text/x-patch, Size: 8939 bytes --] 2015-12-02 Cesar Philippidis <cesar@codesourcery.com> gcc/fortran/ PR fortran/63861 * openmp.c (gfc_match_omp_clauses): Allow subarrays for acc reductions. (resolve_omp_clauses): Error on any acc reductions on arrays. gcc/testsuite/ * gfortran.dg/goacc/array-reduction.f90: New test. * gfortran.dg/goacc/assumed.f95: Update expected diagnostics. * gfortran.dg/goacc/coarray.f95: Likewise. * gfortran.dg/goacc/coarray_2.f90: Likewise. * gfortran.dg/goacc/reduction-2.f95: Likewise. * gfortran.dg/goacc/reduction.f95: Likewise. diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c index 6182464..276f2f1 100644 --- a/gcc/fortran/openmp.c +++ b/gcc/fortran/openmp.c @@ -978,7 +978,8 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, uint64_t mask, if (gfc_match_omp_variable_list (" :", &c->lists[OMP_LIST_REDUCTION], - false, NULL, &head) == MATCH_YES) + false, NULL, &head, openacc) + == MATCH_YES) { gfc_omp_namelist *n; if (rop == OMP_REDUCTION_NONE) @@ -3313,6 +3314,11 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, n->sym->name, &n->where); else n->sym->mark = 1; + + /* OpenACC does not support reductions on arrays. */ + if (n->sym->as) + gfc_error ("Array %qs is not permitted in reduction at %L", + n->sym->name, &n->where); } } diff --git a/gcc/testsuite/gfortran.dg/goacc/array-reduction.f90 b/gcc/testsuite/gfortran.dg/goacc/array-reduction.f90 new file mode 100644 index 0000000..d71c400 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/array-reduction.f90 @@ -0,0 +1,74 @@ +program test + implicit none + integer a(10), i + + a(:) = 0 + + ! Array reductions. + + !$acc parallel reduction (+:a) ! { dg-error "Array 'a' is not permitted in reduction" } + do i = 1, 10 + a = a + 1 + end do + !$acc end parallel + + !$acc parallel + !$acc loop reduction (+:a) ! { dg-error "Array 'a' is not permitted in reduction" } + do i = 1, 10 + a = a + 1 + end do + !$acc end parallel + + !$acc kernels + !$acc loop reduction (+:a) ! { dg-error "Array 'a' is not permitted in reduction" } + do i = 1, 10 + a = a + 1 + end do + !$acc end kernels + + ! Subarray reductions. + + !$acc parallel reduction (+:a(1:5)) ! { dg-error "Array 'a' is not permitted in reduction" } + do i = 1, 10 + a = a + 1 + end do + !$acc end parallel + + !$acc parallel + !$acc loop reduction (+:a(1:5)) ! { dg-error "Array 'a' is not permitted in reduction" } + do i = 1, 10 + a = a + 1 + end do + !$acc end parallel + + !$acc kernels + !$acc loop reduction (+:a(1:5)) ! { dg-error "Array 'a' is not permitted in reduction" } + do i = 1, 10 + a = a + 1 + end do + !$acc end kernels + + ! Reductions on array elements. + + !$acc parallel reduction (+:a(1)) ! { dg-error "Array 'a' is not permitted in reduction" } + do i = 1, 10 + a(1) = a(1) + 1 + end do + !$acc end parallel + + !$acc parallel + !$acc loop reduction (+:a(1)) ! { dg-error "Array 'a' is not permitted in reduction" } + do i = 1, 10 + a(1) = a(1) + 1 + end do + !$acc end parallel + + !$acc kernels + !$acc loop reduction (+:a(1)) ! { dg-error "Array 'a' is not permitted in reduction" } + do i = 1, 10 + a(1) = a(1) + 1 + end do + !$acc end kernels + + print *, a +end program test diff --git a/gcc/testsuite/gfortran.dg/goacc/assumed.f95 b/gcc/testsuite/gfortran.dg/goacc/assumed.f95 index 3287241..4efe5a2 100644 --- a/gcc/testsuite/gfortran.dg/goacc/assumed.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/assumed.f95 @@ -45,3 +45,6 @@ contains !$acc update self (a) ! { dg-error "Assumed rank" } end subroutine assumed_rank end module test + +! { dg-error "Array 'a' is not permitted in reduction" "" { target "*-*-*" } 18 } +! { dg-error "Array 'a' is not permitted in reduction" "" { target "*-*-*" } 39 } diff --git a/gcc/testsuite/gfortran.dg/goacc/coarray.f95 b/gcc/testsuite/gfortran.dg/goacc/coarray.f95 index d2f10d5..932e1f7 100644 --- a/gcc/testsuite/gfortran.dg/goacc/coarray.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/coarray.f95 @@ -2,8 +2,6 @@ ! { dg-additional-options "-fcoarray=single" } ! ! PR fortran/63861 -! { dg-xfail-if "<http://gcc.gnu.org/PR63861>" { *-*-* } } -! { dg-excess-errors "TODO" } module test contains @@ -20,7 +18,7 @@ contains !$acc end parallel !$acc host_data use_device (a) !$acc end host_data - !$acc parallel loop reduction(+:a) + !$acc parallel loop reduction(+:a) ! { dg-error "Array 'a' is not permitted in reduction" } do i = 1,5 enddo !$acc end parallel loop diff --git a/gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 b/gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 index 87e04d5..05167a1 100644 --- a/gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 @@ -2,8 +2,6 @@ ! { dg-additional-options "-fcoarray=lib" } ! ! PR fortran/63861 -! { dg-xfail-if "<http://gcc.gnu.org/PR63861>" { *-*-* } } -! { dg-excess-errors "TODO" } module test contains @@ -20,7 +18,7 @@ contains !$acc end parallel !$acc host_data use_device (a) !$acc end host_data - !$acc parallel loop reduction(+:a) + !$acc parallel loop reduction(+:a) ! { dg-error "Array 'a' is not permitted in reduction" } do i = 1,5 enddo !$acc end parallel loop @@ -72,7 +70,7 @@ contains !$acc end parallel !$acc host_data use_device (a) !$acc end host_data - !$acc parallel loop reduction(+:a) + !$acc parallel loop reduction(+:a) ! { dg-error "Array 'a' is not permitted in reduction" } do i = 1,5 enddo !$acc end parallel loop @@ -94,7 +92,7 @@ contains !$acc end data !$acc parallel private (a) !$acc end parallel - !$acc parallel loop reduction(+:a) + !$acc parallel loop reduction(+:a) ! { dg-error "Array 'a' is not permitted in reduction" } do i = 1,5 enddo !$acc end parallel loop diff --git a/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 b/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 index 89e63ae..929fb0e 100644 --- a/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/reduction-2.f95 @@ -17,6 +17,6 @@ end subroutine ! { dg-final { scan-tree-dump-times "target oacc_parallel firstprivate.a." 1 "gimple" } } ! { dg-final { scan-tree-dump-times "acc loop private.p. reduction..:a." 1 "gimple" } } -! { dg-final { scan-tree-dump-times "target oacc_kernels map.tofrom:a .len: 4.." 1 "gimple" } } +! { dg-final { scan-tree-dump-times "target oacc_kernels map.force_tofrom:a .len: 4.." 1 "gimple" } } ! { dg-final { scan-tree-dump-times "acc loop private.k. reduction..:a." 1 "gimple" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/reduction.f95 b/gcc/testsuite/gfortran.dg/goacc/reduction.f95 index 833230a..a13574b 100644 --- a/gcc/testsuite/gfortran.dg/goacc/reduction.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/reduction.f95 @@ -136,3 +136,26 @@ common /blk/ i1 !$acc end parallel end subroutine + +! { dg-error "Array 'ia2' is not permitted in reduction" "" { target "*-*-*" } 27 } +! { dg-error "Array 'ra1' is not permitted in reduction" "" { target "*-*-*" } 29 } +! { dg-error "Array 'ca1' is not permitted in reduction" "" { target "*-*-*" } 31 } +! { dg-error "Array 'da1' is not permitted in reduction" "" { target "*-*-*" } 33 } +! { dg-error "Array 'la1' is not permitted in reduction" "" { target "*-*-*" } 35 } +! { dg-error "Array 'aa1' is not permitted in reduction" "" { target "*-*-*" } 65 } +! { dg-error "Array 'ia1' is not permitted in reduction" "" { target "*-*-*" } 67 } +! { dg-error "Array 'la1' is not permitted in reduction" "" { target "*-*-*" } 71 } +! { dg-error "Array 'ta1' is not permitted in reduction" "" { target "*-*-*" } 77 } +! { dg-error "Array 'ia2' is not permitted in reduction" "" { target "*-*-*" } 81 } +! { dg-error "Array 'ra1' is not permitted in reduction" "" { target "*-*-*" } 85 } +! { dg-error "Array 'da1' is not permitted in reduction" "" { target "*-*-*" } 89 } +! { dg-error "Array 'ca1' is not permitted in reduction" "" { target "*-*-*" } 93 } +! { dg-error "Array 'ta1' is not permitted in reduction" "" { target "*-*-*" } 99 } +! { dg-error "Array 'ca1' is not permitted in reduction" "" { target "*-*-*" } 103 } +! { dg-error "Array 'la1' is not permitted in reduction" "" { target "*-*-*" } 107 } +! { dg-error "Array 'ta1' is not permitted in reduction" "" { target "*-*-*" } 113 } +! { dg-error "Array 'ra1' is not permitted in reduction" "" { target "*-*-*" } 117 } +! { dg-error "Array 'da1' is not permitted in reduction" "" { target "*-*-*" } 121 } +! { dg-error "Array 'ca1' is not permitted in reduction" "" { target "*-*-*" } 125 } +! { dg-error "Array 'la1' is not permitted in reduction" "" { target "*-*-*" } 129 } +! { dg-error "Array 'ta1' is not permitted in reduction" "" { target "*-*-*" } 135 } ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-12-02 19:16 ` Cesar Philippidis @ 2015-12-02 19:28 ` Steve Kargl 2015-12-02 19:35 ` Jakub Jelinek 1 sibling, 0 replies; 33+ messages in thread From: Steve Kargl @ 2015-12-02 19:28 UTC (permalink / raw) To: Cesar Philippidis Cc: Thomas Schwinge, Julian Brown, GCC Patches, Jakub Jelinek, James Norris, Joseph S. Myers, Nathan Sidwell, Fortran List On Wed, Dec 02, 2015 at 11:16:10AM -0800, Cesar Philippidis wrote: > > This host_data patch exposed a bug in the fortran front end where it was > allowing arrays to be used as reduction variables. If replace you > replace codimension with dimension, you'd see a similar ICE. The > attached patch, while it doesn't make any attempt to fix the gimplifier > changes, does teach the fortran front end to error on acc reductions > containing array variables. > > Note that this solution is somewhat aggressive because we probably > should allow reductions on individual array elements. E.g. > > !$acc loop reduction(+:var(1)) > > The c and c++ front ends also have that problem. Maybe I'll revisit this > later. > > Is this ok for trunk? It will close pr63861. > I think that it is OK, but will defer to Jakub or Thomas. I suspect tht Jakub may be pre-occupied with the upcoming 5.3 release. -- Steve ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-12-02 19:16 ` Cesar Philippidis 2015-12-02 19:28 ` Steve Kargl @ 2015-12-02 19:35 ` Jakub Jelinek 2015-12-02 19:54 ` Cesar Philippidis 1 sibling, 1 reply; 33+ messages in thread From: Jakub Jelinek @ 2015-12-02 19:35 UTC (permalink / raw) To: Cesar Philippidis Cc: Thomas Schwinge, Julian Brown, GCC Patches, James Norris, Joseph S. Myers, Nathan Sidwell, Fortran List On Wed, Dec 02, 2015 at 11:16:10AM -0800, Cesar Philippidis wrote: > > --- gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 > > +++ gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 > > @@ -3,6 +3,7 @@ > > ! > > ! PR fortran/63861 > > ! { dg-xfail-if "<http://gcc.gnu.org/PR63861>" { *-*-* } } > > +! { dg-excess-errors "TODO" } > > This host_data patch exposed a bug in the fortran front end where it was > allowing arrays to be used as reduction variables. If replace you > replace codimension with dimension, you'd see a similar ICE. The > attached patch, while it doesn't make any attempt to fix the gimplifier > changes, does teach the fortran front end to error on acc reductions > containing array variables. Does the OpenACC standard disallow array reductions? Just asking, because OpenMP allows them (up to 4.0 only in Fortran, in 4.5 also C/C++ array sections are allowed). If the OpenACC standard disallows them, then it is desirable to reject them and the patch is ok, otherwise you should try harder to support them ;). Jakub ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-12-02 19:35 ` Jakub Jelinek @ 2015-12-02 19:54 ` Cesar Philippidis 0 siblings, 0 replies; 33+ messages in thread From: Cesar Philippidis @ 2015-12-02 19:54 UTC (permalink / raw) To: Jakub Jelinek Cc: Thomas Schwinge, Julian Brown, GCC Patches, James Norris, Joseph S. Myers, Nathan Sidwell, Fortran List On 12/02/2015 11:35 AM, Jakub Jelinek wrote: > On Wed, Dec 02, 2015 at 11:16:10AM -0800, Cesar Philippidis wrote: >>> --- gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 >>> +++ gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 >>> @@ -3,6 +3,7 @@ >>> ! >>> ! PR fortran/63861 >>> ! { dg-xfail-if "<http://gcc.gnu.org/PR63861>" { *-*-* } } >>> +! { dg-excess-errors "TODO" } >> >> This host_data patch exposed a bug in the fortran front end where it was >> allowing arrays to be used as reduction variables. If replace you >> replace codimension with dimension, you'd see a similar ICE. The >> attached patch, while it doesn't make any attempt to fix the gimplifier >> changes, does teach the fortran front end to error on acc reductions >> containing array variables. > > Does the OpenACC standard disallow array reductions? > Just asking, because OpenMP allows them (up to 4.0 only in Fortran, > in 4.5 also C/C++ array sections are allowed). > > If the OpenACC standard disallows them, then it is desirable to reject them > and the patch is ok, otherwise you should try harder to support them ;). Array reductions aren't supported in OpenACC 2.0. Cesar ^ permalink raw reply [flat|nested] 33+ messages in thread
* [gomp4] Re: [OpenACC 0/7] host_data construct 2015-12-02 15:59 ` Thomas Schwinge 2015-12-02 19:16 ` Cesar Philippidis @ 2015-12-02 22:14 ` Thomas Schwinge 2016-04-08 13:41 ` Fortran OpenACC host_data construct ICE (was: [gomp4] Re: [OpenACC 0/7] host_data construct) Thomas Schwinge 2016-02-02 13:57 ` [OpenACC 0/7] host_data construct Thomas Schwinge 2 siblings, 1 reply; 33+ messages in thread From: Thomas Schwinge @ 2015-12-02 22:14 UTC (permalink / raw) To: Julian Brown, GCC Patches, Cesar Philippidis, James Norris Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek [-- Attachment #1: Type: text/plain, Size: 45078 bytes --] Hi! On Wed, 2 Dec 2015 16:58:45 +0100, I wrote: > Cesar and Jim copied, for help with Fortran and generally testsuite > things. > > On Mon, 30 Nov 2015 19:30:34 +0000, Julian Brown <julian@codesourcery.com> wrote: > > [patch] > > First, thanks! Aside from a number of formatting/re-ordering changes, the front end changes were basically still the same, but otherwise (middle end, libgomp) the patch as committed to trunk in r231118 was quite (totally?) ;-) different from the code we had on gomp-4_0-branch, so I had to spend some time on merging, cleaning things up. > What about the test cases present on gomp-4_0-branch, > gcc/testsuite/c-c++-common/goacc/host_data-1.c, > gcc/testsuite/c-c++-common/goacc/host_data-2.c, > gcc/testsuite/c-c++-common/goacc/host_data-3.c, and > gcc/testsuite/c-c++-common/goacc/host_data-4.c, [...] In the merge, I had to move two use_device usages from c-c++-common/goacc/host_data-1.c (was accepted) to c-c++-common/goacc/host_data-2.c (now rejected); I hope that's correct. > Your submission/commit didn't have any execution tests for OpenACC > host_data in Fortran. On gomp-4_0-branch, there is > libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90 at least. ..., but this one now FAILs (ICE) as follows: [...]/source-gcc/libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90:11:0: internal compiler error: in scan_omp_target, at omp-low.c:3218 0xa33e80 scan_omp_target [...]/source-gcc/gcc/omp-low.c:3218 0xa33e80 scan_omp_1_stmt [...]/source-gcc/gcc/omp-low.c:3980 0x8e4e7e walk_gimple_stmt(gimple_stmt_iterator*, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:555 0x8e50b8 walk_gimple_seq_mod(gimple**, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:51 0x8e4f62 walk_gimple_stmt(gimple_stmt_iterator*, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:583 0x8e50b8 walk_gimple_seq_mod(gimple**, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:51 0x8e4ff2 walk_gimple_stmt(gimple_stmt_iterator*, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:619 0x8e50b8 walk_gimple_seq_mod(gimple**, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:51 0xa02479 scan_omp [...]/source-gcc/gcc/omp-low.c:4024 0xa32ea5 scan_omp_target [...]/source-gcc/gcc/omp-low.c:3204 0xa32ea5 scan_omp_1_stmt [...]/source-gcc/gcc/omp-low.c:3980 0x8e4e7e walk_gimple_stmt(gimple_stmt_iterator*, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:555 0x8e50b8 walk_gimple_seq_mod(gimple**, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:51 0x8e4ff2 walk_gimple_stmt(gimple_stmt_iterator*, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:619 0x8e50b8 walk_gimple_seq_mod(gimple**, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:51 0x8e4f62 walk_gimple_stmt(gimple_stmt_iterator*, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:583 0x8e50b8 walk_gimple_seq_mod(gimple**, tree_node* (*)(gimple_stmt_iterator*, bool*, walk_stmt_info*), tree_node* (*)(tree_node**, int*, void*), walk_stmt_info*) [...]/source-gcc/gcc/gimple-walk.c:51 0xa02479 scan_omp [...]/source-gcc/gcc/omp-low.c:4024 0xa3f35a execute_lower_omp [...]/source-gcc/gcc/omp-low.c:16735 0xa3f35a execute [...]/source-gcc/gcc/omp-low.c:16782 Maybe that's due to the gcc/gimplify.c:gimplify_scan_omp_clauses issue mentioned in <http://news.gmane.org/find-root.php?message_id=%3C877fkwn8p6.fsf%40kepler.schwinge.homeip.net%3E>, or maybe something else? (XFAILed for now.) (For avoidance of doubt, the merge does not include my "Some OpenACC host_data cleanup" commit, trunk r231184, which will get merged into gomp-4_0-branch later.) So, merging trunk r231118 into gomp-4_0-branch, I effectively applied the following patch, in r231207. Please verify. For instance, do we need to re-instantiate any of the testsuite code that we've lost here, or is all of that actually not supported? commit 15723d76ae42dfe3f7201e0e3c6cbd9f4fc480b2 Merge: e08db3c 571b348 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Wed Dec 2 21:52:25 2015 +0000 svn merge -r 231117:231118 svn+ssh://gcc.gnu.org/svn/gcc/trunk git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@231207 138bc75d-0d04-0410-961f-82ee72b054a4 gcc/ChangeLog | 29 ++++ gcc/c-family/ChangeLog | 8 + gcc/c/ChangeLog | 14 ++ gcc/c/c-parser.c | 12 +- gcc/c/c-typeck.c | 2 +- gcc/cp/ChangeLog | 14 ++ gcc/cp/parser.c | 62 ++++---- gcc/cp/semantics.c | 6 +- gcc/gimple-pretty-print.c | 3 + gcc/gimple.h | 2 + gcc/gimplify.c | 177 +++++---------------- gcc/omp-builtins.def | 4 +- gcc/omp-low.c | 25 ++- gcc/testsuite/c-c++-common/goacc/host_data-1.c | 4 +- gcc/testsuite/c-c++-common/goacc/host_data-2.c | 10 ++ gcc/testsuite/gfortran.dg/goacc/coarray.f95 | 2 - gcc/tree-nested.c | 2 + libgomp/ChangeLog | 12 ++ libgomp/libgomp.map | 2 +- libgomp/oacc-mem.c | 32 ---- libgomp/oacc-parallel.c | 40 +++++ .../libgomp.oacc-c-c++-common/host_data-1.c | 39 +---- .../libgomp.oacc-c-c++-common/host_data-2.c | 57 +++---- .../libgomp.oacc-c-c++-common/host_data-3.c | 29 ++++ .../libgomp.oacc-c-c++-common/host_data-4.c | 29 ++++ .../libgomp.oacc-c-c++-common/host_data-5.c | 38 +++++ .../libgomp.oacc-c-c++-common/host_data-6.c | 31 ++++ .../testsuite/libgomp.oacc-fortran/host_data-1.f90 | 5 +- 28 files changed, 394 insertions(+), 296 deletions(-) [diff --git gcc/ChangeLog gcc/ChangeLog] [diff --git gcc/c-family/ChangeLog gcc/c-family/ChangeLog] [diff --git gcc/c/ChangeLog gcc/c/ChangeLog] diff --git gcc/c/c-parser.c gcc/c/c-parser.c index 7191665..0251b80 100644 --- gcc/c/c-parser.c +++ gcc/c/c-parser.c @@ -10315,10 +10315,10 @@ c_parser_omp_clause_name (c_parser *parser, bool consume_token = true) result = PRAGMA_OMP_CLAUSE_UNIFORM; else if (!strcmp ("untied", p)) result = PRAGMA_OMP_CLAUSE_UNTIED; - else if (!strcmp ("use_device", p)) - result = PRAGMA_OACC_CLAUSE_USE_DEVICE; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -13113,6 +13113,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_omp_clause_reduction (parser, clauses); c_name = "reduction"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = c_parser_oacc_clause_use_device (parser, clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_SEQ: clauses = c_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, clauses); @@ -13122,10 +13126,6 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_clause_tile (parser, clauses); c_name = "tile"; break; - case PRAGMA_OACC_CLAUSE_USE_DEVICE: - clauses = c_parser_oacc_clause_use_device (parser, clauses); - c_name = "use_device"; - break; case PRAGMA_OACC_CLAUSE_VECTOR: c_name = "vector"; clauses = c_parser_oacc_shape_clause (parser, OMP_CLAUSE_VECTOR, diff --git gcc/c/c-typeck.c gcc/c/c-typeck.c index c40f6da..4659814 100644 --- gcc/c/c-typeck.c +++ gcc/c/c-typeck.c @@ -13168,6 +13168,7 @@ c_finish_omp_clauses (tree clauses, bool is_oacc, bool is_omp, bool declare_simd bitmap_set_bit (&map_head, DECL_UID (t)); goto check_dup_generic; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE_USE_DEVICE_PTR: t = OMP_CLAUSE_DECL (c); @@ -13230,7 +13231,6 @@ c_finish_omp_clauses (tree clauses, bool is_oacc, bool is_omp, bool declare_simd case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_BIND: case OMP_CLAUSE_NOHOST: case OMP_CLAUSE_TILE: [diff --git gcc/cp/ChangeLog gcc/cp/ChangeLog] diff --git gcc/cp/parser.c gcc/cp/parser.c index ac3f45c..d32aa91 100644 --- gcc/cp/parser.c +++ gcc/cp/parser.c @@ -29242,10 +29242,10 @@ cp_parser_omp_clause_name (cp_parser *parser, bool consume_token = true) result = PRAGMA_OMP_CLAUSE_UNIFORM; else if (!strcmp ("untied", p)) result = PRAGMA_OMP_CLAUSE_UNTIED; - else if (!strcmp ("use_device", p)) - result = PRAGMA_OACC_CLAUSE_USE_DEVICE; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -31752,6 +31752,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_omp_clause_reduction (parser, clauses); c_name = "reduction"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, + clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_SEQ: clauses = cp_parser_oacc_simple_clause (parser, OMP_CLAUSE_SEQ, clauses, here); @@ -31761,11 +31766,6 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_clause_tile (parser, here, clauses); c_name = "tile"; break; - case PRAGMA_OACC_CLAUSE_USE_DEVICE: - clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, - clauses); - c_name = "use_device"; - break; case PRAGMA_OACC_CLAUSE_VECTOR: c_name = "vector"; clauses = cp_parser_oacc_shape_clause (parser, OMP_CLAUSE_VECTOR, @@ -34671,6 +34671,30 @@ cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok) return stmt; } +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +/* OpenACC 2.0: + # pragma acc host_data <clauses> new-line + structured-block */ + +static tree +cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + unsigned int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data", pragma_tok); + + block = begin_omp_parallel (); + save = cp_parser_begin_omp_structured_block (parser); + cp_parser_statement (parser, NULL_TREE, false, NULL); + cp_parser_end_omp_structured_block (parser, save); + stmt = finish_oacc_host_data (clauses, block); + return stmt; +} + /* OpenACC 2.0: # pragma acc declare oacc-data-clause[optseq] new-line */ @@ -34823,30 +34847,6 @@ cp_parser_oacc_declare (cp_parser *parser, cp_token *pragma_tok) return NULL_TREE; } -#define OACC_HOST_DATA_CLAUSE_MASK \ - ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) - -/* OpenACC 2.0: - # pragma acc host_data <clauses> new-line - structured-block */ - -static tree -cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok) -{ - tree stmt, clauses, block; - unsigned int save; - - clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, - "#pragma acc host_data", pragma_tok); - - block = begin_omp_parallel (); - save = cp_parser_begin_omp_structured_block (parser); - cp_parser_statement (parser, NULL_TREE, false, NULL); - cp_parser_end_omp_structured_block (parser, save); - stmt = finish_oacc_host_data (clauses, block); - return stmt; -} - /* OpenACC 2.0: # pragma acc enter data oacc-enter-data-clause[optseq] new-line diff --git gcc/cp/semantics.c gcc/cp/semantics.c index 0d7e23d..a9a6671 100644 --- gcc/cp/semantics.c +++ gcc/cp/semantics.c @@ -6911,6 +6911,7 @@ finish_omp_clauses (tree clauses, bool is_oacc, bool allow_fields, } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE_USE_DEVICE_PTR: field_ok = allow_fields; @@ -6948,7 +6949,6 @@ finish_omp_clauses (tree clauses, bool is_oacc, bool allow_fields, case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE__CILK_FOR_COUNT_: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_SEQ: @@ -7483,9 +7483,9 @@ tree finish_oacc_host_data (tree clauses, tree block) { tree stmt; - + block = finish_omp_structured_block (block); - + stmt = make_node (OACC_HOST_DATA); TREE_TYPE (stmt) = void_type_node; OACC_HOST_DATA_CLAUSES (stmt) = clauses; diff --git gcc/gimple-pretty-print.c gcc/gimple-pretty-print.c index 6c4e42c..c0f7c20 100644 --- gcc/gimple-pretty-print.c +++ gcc/gimple-pretty-print.c @@ -1356,6 +1356,9 @@ dump_gimple_omp_target (pretty_printer *buffer, gomp_target *gs, case GF_OMP_TARGET_KIND_OACC_DECLARE: kind = " oacc_declare"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + kind = " oacc_host_data"; + break; default: gcc_unreachable (); } diff --git gcc/gimple.h gcc/gimple.h index 4c90bd7..7aaf785 100644 --- gcc/gimple.h +++ gcc/gimple.h @@ -171,6 +171,7 @@ enum gf_mask { GF_OMP_TARGET_KIND_OACC_UPDATE = 8, GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 9, GF_OMP_TARGET_KIND_OACC_DECLARE = 10, + GF_OMP_TARGET_KIND_OACC_HOST_DATA = 11, /* True on an GIMPLE_OMP_RETURN statement if the return does not require a thread synchronization via some sort of barrier. The exact barrier @@ -6006,6 +6007,7 @@ is_gimple_omp_oacc (const gimple *stmt) case GF_OMP_TARGET_KIND_OACC_UPDATE: case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: case GF_OMP_TARGET_KIND_OACC_DECLARE: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: return true; default: return false; diff --git gcc/gimplify.c gcc/gimplify.c index 3bb3bfe..b00de81 100644 --- gcc/gimplify.c +++ gcc/gimplify.c @@ -90,10 +90,8 @@ enum gimplify_omp_var_data /* Flag for shared vars that are or might be stored to in the region. */ GOVD_WRITTEN = 131072, - GOVD_USE_DEVICE = 1 << 18, - /* OpenACC deviceptr clause. */ - GOVD_USE_DEVPTR = 1 << 19, + GOVD_USE_DEVPTR = 1 << 18, GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR @@ -122,18 +120,16 @@ enum omp_region_type ORT_TARGET = 0x20, ORT_COMBINED_TARGET = 0x21, - ORT_HOST_DATA = 0x40, - /* OpenACC variants. */ - ORT_ACC = 0x80, /* A generic OpenACC region. */ + ORT_ACC = 0x40, /* A generic OpenACC region. */ ORT_ACC_DATA = ORT_ACC | ORT_TARGET_DATA, /* Data construct. */ ORT_ACC_PARALLEL = ORT_ACC | ORT_TARGET, /* Parallel construct */ - ORT_ACC_KERNELS = ORT_ACC | ORT_TARGET | 0x100, /* Kernels construct. */ - ORT_ACC_HOST = ORT_ACC | ORT_HOST_DATA, + ORT_ACC_KERNELS = ORT_ACC | ORT_TARGET | 0x80, /* Kernels construct. */ + ORT_ACC_HOST_DATA = ORT_ACC | ORT_TARGET_DATA | 0x80, /* Host data. */ /* Dummy OpenMP region, used to disable expansion of DECL_VALUE_EXPRs in taskloop pre body. */ - ORT_NONE = 0x200 + ORT_NONE = 0x100 }; /* Gimplify hashtable helper. */ @@ -6126,8 +6122,6 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) for (; octx; octx = octx->outer_context) { - if (octx->region_type & ORT_HOST_DATA) - continue; if (!(octx->region_type & (ORT_TARGET_DATA | ORT_TARGET))) break; splay_tree_node n2 @@ -6135,6 +6129,9 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code) (splay_tree_key) decl); if (n2) { + if (octx->region_type == ORT_ACC_HOST_DATA) + error ("variable %qE declared in enclosing " + "%<host_data%> region", DECL_NAME (decl)); nflags |= GOVD_MAP; goto found_outer; } @@ -6436,6 +6433,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_TARGET_DATA: case OMP_TARGET_ENTER_DATA: case OMP_TARGET_EXIT_DATA: + case OACC_HOST_DATA: ctx->target_firstprivatize_array_bases = true; default: break; @@ -6571,10 +6569,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, || outer_ctx->region_type == ORT_ACC_DATA)) redvec.safe_push (OMP_CLAUSE_DECL (c)); goto do_add_decl; - case OMP_CLAUSE_USE_DEVICE: - flags = GOVD_USE_DEVICE | GOVD_EXPLICIT; - check_non_private = "use_device"; - goto do_add; case OMP_CLAUSE_LINEAR: if (gimplify_expr (&OMP_CLAUSE_LINEAR_STEP (c), pre_p, NULL, is_gimple_val, fb_rvalue) == GS_ERROR) @@ -6709,6 +6703,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_TARGET_DATA: case OMP_TARGET_ENTER_DATA: case OMP_TARGET_EXIT_DATA: + case OACC_HOST_DATA: if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER || (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) @@ -6721,6 +6716,22 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } if (remove) break; + if (DECL_P (decl) && outer_ctx && (region_type & ORT_ACC)) + { + struct gimplify_omp_ctx *octx; + for (octx = outer_ctx; octx; octx = octx->outer_context) + { + if (octx->region_type != ORT_ACC_HOST_DATA) + break; + splay_tree_node n2 + = splay_tree_lookup (octx->variables, + (splay_tree_key) decl); + if (n2) + error_at (OMP_CLAUSE_LOCATION (c), "variable %qE " + "declared in enclosing %<host_data%> region", + DECL_NAME (decl)); + } + } if (OMP_CLAUSE_SIZE (c) == NULL_TREE) OMP_CLAUSE_SIZE (c) = DECL_P (decl) ? DECL_SIZE_UNIT (decl) : TYPE_SIZE_UNIT (TREE_TYPE (decl)); @@ -7120,6 +7131,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } goto do_notice; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: flags = GOVD_FIRSTPRIVATE | GOVD_EXPLICIT; goto do_add; @@ -7639,7 +7651,7 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data) code = OMP_CLAUSE_FIRSTPRIVATE; else if (flags & GOVD_LASTPRIVATE) code = OMP_CLAUSE_LASTPRIVATE; - else if (flags & (GOVD_ALIGNED | GOVD_USE_DEVICE)) + else if (flags & GOVD_ALIGNED) return 0; else gcc_unreachable (); @@ -8244,126 +8256,6 @@ gimplify_oacc_declare (tree *expr_p, gimple_seq *pre_p) *expr_p = NULL_TREE; } -static tree -gimplify_oacc_host_data_1 (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) -{ - splay_tree_node n = NULL; - location_t loc = EXPR_LOCATION (*tp); - - switch (TREE_CODE (*tp)) - { - case ADDR_EXPR: - { - tree decl = TREE_OPERAND (*tp, 0); - - switch (TREE_CODE (decl)) - { - case ARRAY_REF: - case ARRAY_RANGE_REF: - case COMPONENT_REF: - case VIEW_CONVERT_EXPR: - case REALPART_EXPR: - case IMAGPART_EXPR: - if (TREE_CODE (TREE_OPERAND (decl, 0)) == VAR_DECL) - n = splay_tree_lookup (gimplify_omp_ctxp->variables, - (splay_tree_key) TREE_OPERAND (decl, 0)); - break; - - case VAR_DECL: - n = splay_tree_lookup (gimplify_omp_ctxp->variables, - (splay_tree_key) decl); - break; - - default: - ; - } - - if (n != NULL && (n->value & GOVD_USE_DEVICE) != 0) - { - tree t = builtin_decl_explicit (BUILT_IN_GOACC_DEVICEPTR); - *tp = build_call_expr_loc (loc, t, 1, *tp); - } - - *walk_subtrees = 0; - } - break; - - case VAR_DECL: - { - tree decl = *tp; - - n = splay_tree_lookup (gimplify_omp_ctxp->variables, - (splay_tree_key) decl); - - if (n != NULL && (n->value & GOVD_USE_DEVICE) != 0) - { - if (!POINTER_TYPE_P (TREE_TYPE (decl))) - return decl; - - tree t = builtin_decl_explicit (BUILT_IN_GOACC_DEVICEPTR); - *tp = build_call_expr_loc (loc, t, 1, *tp); - *walk_subtrees = 0; - } - } - break; - - case OACC_PARALLEL: - case OACC_KERNELS: - case OACC_LOOP: - *walk_subtrees = 0; - break; - - default: - ; - } - - return NULL_TREE; -} - -static enum gimplify_status -gimplify_oacc_host_data (tree *expr_p, gimple_seq *pre_p) -{ - tree expr = *expr_p, orig_body; - gimple_seq body = NULL; - - gimplify_scan_omp_clauses (&OACC_HOST_DATA_CLAUSES (expr), pre_p, - ORT_ACC_HOST, OACC_HOST_DATA); - - orig_body = OACC_HOST_DATA_BODY (expr); - - /* Perform a pre-pass over the host_data region's body, inserting calls to - GOACC_deviceptr where appropriate. */ - - tree ret = walk_tree_without_duplicates (&orig_body, - &gimplify_oacc_host_data_1, 0); - - if (ret) - { - error_at (EXPR_LOCATION (expr), - "undefined use of variable %qE in host_data region", - DECL_NAME (ret)); - gimplify_adjust_omp_clauses (pre_p, body, &OACC_HOST_DATA_CLAUSES (expr), - OACC_HOST_DATA); - return GS_ERROR; - } - - push_gimplify_context (); - - gimple *g = gimplify_and_return_first (orig_body, &body); - - if (gimple_code (g) == GIMPLE_BIND) - pop_gimplify_context (g); - else - pop_gimplify_context (NULL); - - gimplify_adjust_omp_clauses (pre_p, body, &OACC_HOST_DATA_CLAUSES (expr), - OACC_HOST_DATA); - - gimplify_seq_add_stmt (pre_p, g); - - return GS_ALL_DONE; -} - /* Gimplify the contents of an OMP_PARALLEL statement. This involves gimplification of the body, as well as scanning the body for used variables. We need to do this scan now, because variable-sized @@ -9648,6 +9540,9 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) case OMP_TEAMS: ort = OMP_TEAMS_COMBINED (expr) ? ORT_COMBINED_TEAMS : ORT_TEAMS; break; + case OACC_HOST_DATA: + ort = ORT_ACC_HOST_DATA; + break; default: gcc_unreachable (); } @@ -9673,6 +9568,7 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) switch (TREE_CODE (expr)) { case OACC_DATA: + case OACC_HOST_DATA: end_ix = BUILT_IN_GOACC_DATA_END; break; case OMP_TARGET_DATA: @@ -9705,6 +9601,10 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p) stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_KERNELS, OMP_CLAUSES (expr)); break; + case OACC_HOST_DATA: + stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_HOST_DATA, + OMP_CLAUSES (expr)); + break; case OACC_PARALLEL: stmt = gimple_build_omp_target (body, GF_OMP_TARGET_KIND_OACC_PARALLEL, OMP_CLAUSES (expr)); @@ -10814,15 +10714,12 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, ret = GS_ALL_DONE; break; - case OACC_HOST_DATA: - ret = gimplify_oacc_host_data (expr_p, pre_p); - break; - case OACC_DECLARE: gimplify_oacc_declare (expr_p, pre_p); ret = GS_ALL_DONE; break; + case OACC_HOST_DATA: case OACC_DATA: case OACC_KERNELS: case OACC_PARALLEL: diff --git gcc/omp-builtins.def gcc/omp-builtins.def index 63e5e6e..35f5014 100644 --- gcc/omp-builtins.def +++ gcc/omp-builtins.def @@ -47,8 +47,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) -DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr", - BT_FN_PTR_PTR, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_HOST_DATA, "GOACC_host_data", + BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST) diff --git gcc/omp-low.c gcc/omp-low.c index a1e7a14..88e41b8 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -2071,6 +2071,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: decl = OMP_CLAUSE_DECL (c); if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) @@ -2274,7 +2275,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE__CACHE_: sorry ("Clause not supported yet"); break; @@ -2430,6 +2430,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_NOGROUP: case OMP_CLAUSE_DEFAULTMAP: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE__CILK_FOR_COUNT_: case OMP_CLAUSE_ASYNC: @@ -2448,7 +2449,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE__CACHE_: sorry ("Clause not supported yet"); break; @@ -3763,6 +3763,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break; case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: stmt_name = "enter/exit data"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data"; + break; default: gcc_unreachable (); } switch (gimple_omp_target_kind (ctx->stmt)) @@ -3774,6 +3776,8 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) case GF_OMP_TARGET_KIND_OACC_KERNELS: ctx_stmt_name = "kernels"; break; case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + ctx_stmt_name = "host_data"; break; default: gcc_unreachable (); } @@ -12730,6 +12734,7 @@ expand_omp_target (struct omp_region *region) break; case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: data_region = true; break; default: @@ -12980,6 +12985,9 @@ expand_omp_target (struct omp_region *region) case GF_OMP_TARGET_KIND_OACC_DECLARE: start_ix = BUILT_IN_GOACC_DECLARE; break; + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + start_ix = BUILT_IN_GOACC_HOST_DATA; + break; default: gcc_unreachable (); } @@ -13104,6 +13112,7 @@ expand_omp_target (struct omp_region *region) case BUILT_IN_GOACC_DATA_START: case BUILT_IN_GOACC_DECLARE: case BUILT_IN_GOMP_TARGET_DATA: + case BUILT_IN_GOACC_HOST_DATA: break; case BUILT_IN_GOMP_TARGET: case BUILT_IN_GOMP_TARGET_UPDATE: @@ -13445,6 +13454,7 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent, case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: if (is_gimple_omp_oacc (stmt)) region->kind = gimple_omp_target_kind (stmt); break; @@ -15277,6 +15287,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) break; case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: data_region = true; break; default: @@ -15485,6 +15496,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: var = OMP_CLAUSE_DECL (c); @@ -15870,12 +15882,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) build_int_cstu (tkind_type, tkind)); break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: ovar = OMP_CLAUSE_DECL (c); var = lookup_decl_in_outer_ctx (ovar, ctx); x = build_sender_ref (ovar, ctx); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) tkind = GOMP_MAP_USE_DEVICE_PTR; else tkind = GOMP_MAP_FIRSTPRIVATE_INT; @@ -16078,10 +16092,12 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_build_assign (new_var, x)); } break; + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: var = OMP_CLAUSE_DECL (c); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) x = build_sender_ref (var, ctx); else x = build_receiver_ref (var, false, ctx); @@ -17076,6 +17092,7 @@ make_gimple_omp_edges (basic_block bb, struct omp_region **region, case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: break; case GF_OMP_TARGET_KIND_UPDATE: case GF_OMP_TARGET_KIND_ENTER_DATA: diff --git gcc/testsuite/c-c++-common/goacc/host_data-1.c gcc/testsuite/c-c++-common/goacc/host_data-1.c index 521c854..a8922df 100644 --- gcc/testsuite/c-c++-common/goacc/host_data-1.c +++ gcc/testsuite/c-c++-common/goacc/host_data-1.c @@ -1,13 +1,11 @@ /* Test valid use of host_data directive. */ /* { dg-do compile } */ -int v0; int v1[3][3]; void f (void) { - int v2 = 3; -#pragma acc host_data use_device(v2, v0, v1) +#pragma acc host_data use_device(v1) ; } diff --git gcc/testsuite/c-c++-common/goacc/host_data-2.c gcc/testsuite/c-c++-common/goacc/host_data-2.c index e5213a0..1dd5be7 100644 --- gcc/testsuite/c-c++-common/goacc/host_data-2.c +++ gcc/testsuite/c-c++-common/goacc/host_data-2.c @@ -10,4 +10,14 @@ f (void) int v2 = 3; #pragma acc host_data copy(v2) /* { dg-error "not valid for" } */ ; + +#pragma acc host_data use_device(v2) + ; + /* { dg-error ".use_device. variable is neither a pointer nor an array" "" { target c } 14 } */ + /* { dg-error ".use_device. variable is neither a pointer, nor an arraynor reference to pointer or array" "" { target c++ } 14 } */ + +#pragma acc host_data use_device(v0) + ; + /* { dg-error ".use_device. variable is neither a pointer nor an array" "" { target c } 19 } */ + /* { dg-error ".use_device. variable is neither a pointer, nor an arraynor reference to pointer or array" "" { target c++ } 19 } */ } diff --git gcc/testsuite/gfortran.dg/goacc/coarray.f95 gcc/testsuite/gfortran.dg/goacc/coarray.f95 index 0ca14e2..d2f10d5 100644 --- gcc/testsuite/gfortran.dg/goacc/coarray.f95 +++ gcc/testsuite/gfortran.dg/goacc/coarray.f95 @@ -5,8 +5,6 @@ ! { dg-xfail-if "<http://gcc.gnu.org/PR63861>" { *-*-* } } ! { dg-excess-errors "TODO" } -! TODO: These cases must fail - module test contains subroutine oacc1(a) diff --git gcc/tree-nested.c gcc/tree-nested.c index 8b5aba2..da19e8d 100644 --- gcc/tree-nested.c +++ gcc/tree-nested.c @@ -1072,6 +1072,7 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SHARED: case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_LINK: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: do_decl_clause: @@ -1743,6 +1744,7 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SHARED: case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_LINK: + case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: do_decl_clause: [diff --git libgomp/ChangeLog libgomp/ChangeLog] diff --git libgomp/libgomp.map libgomp/libgomp.map index cceb92d..a42142f 100644 --- libgomp/libgomp.map +++ libgomp/libgomp.map @@ -394,11 +394,11 @@ GOACC_2.0.1 { global: GOACC_declare; GOACC_parallel_keyed; + GOACC_host_data; } GOACC_2.0; GOACC_2.0.GOMP_4_BRANCH { global: - GOACC_deviceptr; GOMP_set_offload_targets; } GOACC_2.0.1; diff --git libgomp/oacc-mem.c libgomp/oacc-mem.c index 11edcce..588782b 100644 --- libgomp/oacc-mem.c +++ libgomp/oacc-mem.c @@ -203,38 +203,6 @@ acc_deviceptr (void *h) return d; } -/* This function is used as a helper in generated code to implement pointer - lookup in host_data regions. Unlike acc_deviceptr, it returns its argument - unchanged on a shared-memory system (e.g. the host). */ - -void * -GOACC_deviceptr (void *h) -{ - splay_tree_key n; - void *d; - void *offset; - - goacc_lazy_initialize (); - - struct goacc_thread *thr = goacc_thread (); - - if ((thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) == 0) - { - n = lookup_host (thr->dev, h, 1); - - if (!n) - return NULL; - - offset = h - n->host_start; - - d = n->tgt->tgt_start + n->tgt_offset + offset; - - return d; - } - else - return h; -} - /* Return the host pointer that corresponds to device data D. Or NULL if no mapping. */ diff --git libgomp/oacc-parallel.c libgomp/oacc-parallel.c index d66e343..e60a61b 100644 --- libgomp/oacc-parallel.c +++ libgomp/oacc-parallel.c @@ -555,6 +555,46 @@ GOACC_wait (int async, int num_waits, ...) goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); } +void +GOACC_host_data (int device, size_t mapnum, + void **hostaddrs, size_t *sizes, unsigned short *kinds) +{ + bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; + struct target_mem_desc *tgt; + +#ifdef HAVE_INTTYPES_H + gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", + __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); +#else + gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", + __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); +#endif + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; + + /* Host fallback or 'do nothing'. */ + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + || host_fallback) + { + tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, + GOMP_MAP_VARS_OPENACC); + tgt->prev = thr->mapped_data; + thr->mapped_data = tgt; + + return; + } + + gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); + tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, + GOMP_MAP_VARS_OPENACC); + gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); + tgt->prev = thr->mapped_data; + thr->mapped_data = tgt; +} + int GOACC_get_num_threads (int gang, int worker, int vector) { diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c index 15ccb27..51745ba 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c +++ libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c @@ -1,7 +1,6 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ /* { dg-additional-options "-lcuda -lcublas -lcudart" } */ -#include <stdio.h> #include <stdlib.h> #include <openacc.h> #include <cuda.h> @@ -30,35 +29,13 @@ saxpy_target (int n, float a, float *x, float *y) int main(int argc, char **argv) { - const int N = 8; +#define N 8 int i; - float *x_ref, *y_ref; - float *x, *y; + float x_ref[N], y_ref[N]; + float x[N], y[N]; cublasHandle_t h; float a = 2.0; - x_ref = (float*) malloc (N * sizeof(float)); - y_ref = (float*) malloc (N * sizeof(float)); - - x = (float*) malloc (N * sizeof(float)); - y = (float*) malloc (N * sizeof(float)); - -#pragma acc data copyin (x[0:N]) copy (y[0:N]) - { - float *xp, *yp; -#pragma acc host_data use_device (x, y) - { -#pragma acc parallel pcopy (xp, yp) present (x, y) - { - xp = x; - yp = y; - } - } - - if (xp != acc_deviceptr (x) || yp != acc_deviceptr (y)) - abort (); - } - for (i = 0; i < N; i++) { x[i] = x_ref[i] = 4.0 + i; @@ -106,13 +83,11 @@ main(int argc, char **argv) for (i = 0; i < N; i++) y[i] = 3.0; -#pragma acc data copyin (x[0:N]) copyin (a, N) copy (y[0:N]) + /* There's no need to use host_data here. */ +#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N]) { -#pragma acc host_data use_device (x, y) - { -#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a, N) - saxpy_target (N, a, x, y); - } +#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a) + saxpy_target (N, a, x, y); } for (i = 0; i < N; i++) diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c index 511ec64..614f143 100644 --- libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c +++ libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c @@ -1,50 +1,31 @@ -/* { dg-do run } */ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ #include <stdlib.h> +#include <openacc.h> -struct by_lightning { - int a; - int b; - int c; -}; +char *global_in_host; -int main (int argc, char* argv[]) +void foo (char *in) { - int x; - void *q = NULL, *r = NULL, *p = NULL, *s = NULL, *t = NULL; - long u; - struct by_lightning on_the_head = {1, 2, 3}; - int arr[10], *f = NULL; - _Complex float cf; - #pragma acc enter data copyin (x, arr, on_the_head, cf) - #pragma acc host_data use_device (x, arr, on_the_head, cf) + if (!acc_is_present (global_in_host, sizeof (*global_in_host)) + || in != acc_deviceptr (global_in_host)) + abort (); +} + +int +main (int argc, char **argv) +{ + char mydata[1024]; + + global_in_host = mydata; + +#pragma acc data copyin(mydata) { - q = &x; +#pragma acc host_data use_device (mydata) { - f = &arr[5]; - r = f; - s = &__real__ cf; - t = &on_the_head.c; - u = (long) &__imag__ cf; - #pragma acc parallel copyout(p) present (x, arr, on_the_head, cf) - { - /* This will not (and must not) call GOACC_deviceptr, but '&x' will be - the address on the device (if appropriate) regardless. */ - p = &x; - } + foo (mydata); } } - #pragma acc exit data delete (x) - -#if ACC_MEM_SHARED - if (q != &x || f != &arr[5] || r != f || s != &(__real__ cf) - || t != &on_the_head.c || u != (long) &(__imag__ cf) || p != &x) - abort (); -#else - if (q == &x || f == &arr[5] || r != f || s == &(__real__ cf) - || t == &on_the_head.c || u == (long) &(__imag__ cf) || p == &x) - abort (); -#endif return 0; } diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c new file mode 100644 index 0000000..7d9b5f7 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-3.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N]; + +#pragma acc data copyin (x[0:N]) + { + int *xp; +#pragma acc host_data use_device (x) + { + /* This use of the present clause is undefined behaviour for OpenACC. */ +#pragma acc parallel present (x) copyout (xp) /* { dg-error "variable 'x' declared in enclosing 'host_data' region" } */ + { + xp = x; + } + } + + if (xp != acc_deviceptr (x)) + abort (); + } + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c new file mode 100644 index 0000000..0ab5a35 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-4.c @@ -0,0 +1,29 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N], *xp2; + +#pragma acc data copyin (x[0:N]) + { + int *xp; +#pragma acc host_data use_device (x) + { +#pragma acc data + { + xp = x; + } + xp2 = x; + } + + if (xp != acc_deviceptr (x) || xp2 != xp) + abort (); + } + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c new file mode 100644 index 0000000..a3737a7 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-5.c @@ -0,0 +1,38 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N], y[N], *yp; + + yp = y + 1; + +#pragma acc data copyin (x[0:N]) + { + int *xp, *yp2; +#pragma acc host_data use_device (x) + { +#pragma acc data copyin (y) + { +#pragma acc host_data use_device (yp) + { + xp = x; + yp2 = yp; + } + + if (yp2 != acc_deviceptr (yp)) + abort (); + } + } + + if (xp != acc_deviceptr (x)) + abort (); + + } + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-6.c libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-6.c new file mode 100644 index 0000000..a841488 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-6.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ + +#include <openacc.h> +#include <stdlib.h> + +#define N 1024 + +int main (int argc, char* argv[]) +{ + int x[N]; + +#pragma acc data copyin (x[0:N]) + { + int *xp; +#pragma acc host_data use_device (x) + { + /* Here 'x' being implicitly firstprivate for the parallel region + conflicts with it being declared as use_device in the enclosing + host_data region. */ +#pragma acc parallel copyout (xp) + { + xp = x; /* { dg-error "variable 'x' declared in enclosing 'host_data' region" } */ + } + } + + if (xp != acc_deviceptr (x)) + abort (); + } + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90 libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90 index a219eaf..9bb79c3 100644 --- libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90 +++ libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90 @@ -1,6 +1,9 @@ -! { dg-do run } */ +! { dg-do run } ! { dg-additional-options "-cpp" } +! { dg-xfail-if "TODO" { *-*-* } } +! { dg-excess-errors "TODO" } + program test implicit none Grüße Thomas [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 472 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Fortran OpenACC host_data construct ICE (was: [gomp4] Re: [OpenACC 0/7] host_data construct) 2015-12-02 22:14 ` [gomp4] " Thomas Schwinge @ 2016-04-08 13:41 ` Thomas Schwinge 0 siblings, 0 replies; 33+ messages in thread From: Thomas Schwinge @ 2016-04-08 13:41 UTC (permalink / raw) To: Chung-Lin Tang Cc: GCC Patches, Jakub Jelinek, Cesar Philippidis, James Norris [-- Attachment #1: Type: text/plain, Size: 2525 bytes --] Hi! On Wed, 2 Dec 2015 23:13:58 +0100, I wrote: > On Wed, 2 Dec 2015 16:58:45 +0100, I wrote: > > Cesar and Jim copied, for help with Fortran and generally testsuite > > things. (Just in case you happen to have any ideas.) > > On Mon, 30 Nov 2015 19:30:34 +0000, Julian Brown <julian@codesourcery.com> wrote: > > > [patch] > > > > First, thanks! > > Aside from a number of formatting/re-ordering changes, the front end > changes were basically still the same, but otherwise (middle end, > libgomp) the patch as committed to trunk in r231118 was quite (totally?) > ;-) different from the code we had on gomp-4_0-branch, so I had to spend > some time on merging, cleaning things up. > > Your submission/commit didn't have any execution tests for OpenACC > > host_data in Fortran. On gomp-4_0-branch, there is > > libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90 at least. > > ..., but this one now FAILs (ICE) as follows: > > [...]/source-gcc/libgomp/testsuite/libgomp.oacc-fortran/host_data-1.f90:11:0: internal compiler error: in scan_omp_target, at omp-low.c:3218 > 0xa33e80 scan_omp_target > [...]/source-gcc/gcc/omp-low.c:3218 > [...] Filed <https://gcc.gnu.org/PR70598>. > Maybe that's due to the gcc/gimplify.c:gimplify_scan_omp_clauses issue > mentioned in > <http://news.gmane.org/find-root.php?message_id=%3C877fkwn8p6.fsf%40kepler.schwinge.homeip.net%3E>, > or maybe something else? (XFAILed for now.) The following patch does not resolve the problem -- but we'll still want something like that, I suppose? --- gcc/gimplify.c +++ gcc/gimplify.c @@ -6544,18 +6544,20 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, the Fortran FE is updated to OpenMP 4.5. */ ctx->target_map_scalars_firstprivate = true; } - if (!lang_GNU_Fortran ()) - switch (code) - { - case OMP_TARGET: - case OMP_TARGET_DATA: - case OMP_TARGET_ENTER_DATA: - case OMP_TARGET_EXIT_DATA: - case OACC_HOST_DATA: - ctx->target_firstprivatize_array_bases = true; - default: + switch (code) + { + case OMP_TARGET: + case OMP_TARGET_DATA: + case OMP_TARGET_ENTER_DATA: + case OMP_TARGET_EXIT_DATA: + if (lang_GNU_Fortran ()) break; - } + /* FALLTHRU */ + case OACC_HOST_DATA: + ctx->target_firstprivatize_array_bases = true; + default: + break; + } while ((c = *list_p) != NULL) { Grüße Thomas [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 472 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [OpenACC 0/7] host_data construct 2015-12-02 15:59 ` Thomas Schwinge 2015-12-02 19:16 ` Cesar Philippidis 2015-12-02 22:14 ` [gomp4] " Thomas Schwinge @ 2016-02-02 13:57 ` Thomas Schwinge 2 siblings, 0 replies; 33+ messages in thread From: Thomas Schwinge @ 2016-02-02 13:57 UTC (permalink / raw) To: GCC Patches, Jakub Jelinek Cc: Julian Brown, James Norris, Joseph S. Myers, Nathan Sidwell, Cesar Philippidis [-- Attachment #1: Type: text/plain, Size: 5971 bytes --] Hi! On Wed, 2 Dec 2015 16:58:45 +0100, I wrote: > On Mon, 30 Nov 2015 19:30:34 +0000, Julian Brown <julian@codesourcery.com> wrote: > > --- a/libgomp/oacc-parallel.c > > +++ b/libgomp/oacc-parallel.c > > > +void > > +GOACC_host_data (int device, size_t mapnum, > > + void **hostaddrs, size_t *sizes, unsigned short *kinds) > > +{ > > +[...] > > +} > > Isn't that identical to GOACC_data_start? Can we thus get rid of it? Yes, we can. As GOACC_host_data has not been part of GCC 5's libgomp ABI, it's OK to just remove it; committed "as obvious" in r233074: commit 2bf3f448431be10baa9755df5faeed6b2f6508f8 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Tue Feb 2 13:53:55 2016 +0000 Merge BUILT_IN_GOACC_HOST_DATA into BUILT_IN_GOACC_DATA_START gcc/ * omp-builtins.def (BUILT_IN_GOACC_HOST_DATA): Remove. * omp-low.c (expand_omp_target): Use BUILT_IN_GOACC_DATA_START instead. libgomp/ * libgomp.map (GOACC_2.0): Remove GOACC_host_data. * oacc-parallel.c (GOACC_host_data): Remove function definition. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@233074 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 6 ++++++ gcc/omp-builtins.def | 2 -- gcc/omp-low.c | 5 +---- libgomp/ChangeLog | 3 +++ libgomp/libgomp.map | 1 - libgomp/oacc-parallel.c | 40 ---------------------------------------- 6 files changed, 10 insertions(+), 47 deletions(-) diff --git gcc/ChangeLog gcc/ChangeLog index 05741331..9a2cec8 100644 --- gcc/ChangeLog +++ gcc/ChangeLog @@ -1,3 +1,9 @@ +2016-02-02 Thomas Schwinge <thomas@codesourcery.com> + + * omp-builtins.def (BUILT_IN_GOACC_HOST_DATA): Remove. + * omp-low.c (expand_omp_target): Use BUILT_IN_GOACC_DATA_START + instead. + 2016-02-02 Richard Biener <rguenther@suse.de> PR tree-optimization/69606 diff --git gcc/omp-builtins.def gcc/omp-builtins.def index 60199b0..ea012df 100644 --- gcc/omp-builtins.def +++ gcc/omp-builtins.def @@ -47,8 +47,6 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) -DEF_GOACC_BUILTIN (BUILT_IN_GOACC_HOST_DATA, "GOACC_host_data", - BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST) diff --git gcc/omp-low.c gcc/omp-low.c index 0b70274..d41688b 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -13186,6 +13186,7 @@ expand_omp_target (struct omp_region *region) start_ix = BUILT_IN_GOACC_PARALLEL; break; case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: start_ix = BUILT_IN_GOACC_DATA_START; break; case GF_OMP_TARGET_KIND_OACC_UPDATE: @@ -13197,9 +13198,6 @@ expand_omp_target (struct omp_region *region) case GF_OMP_TARGET_KIND_OACC_DECLARE: start_ix = BUILT_IN_GOACC_DECLARE; break; - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: - start_ix = BUILT_IN_GOACC_HOST_DATA; - break; default: gcc_unreachable (); } @@ -13324,7 +13322,6 @@ expand_omp_target (struct omp_region *region) case BUILT_IN_GOACC_DATA_START: case BUILT_IN_GOACC_DECLARE: case BUILT_IN_GOMP_TARGET_DATA: - case BUILT_IN_GOACC_HOST_DATA: break; case BUILT_IN_GOMP_TARGET: case BUILT_IN_GOMP_TARGET_UPDATE: diff --git libgomp/ChangeLog libgomp/ChangeLog index 6c9bf6a..250240d 100644 --- libgomp/ChangeLog +++ libgomp/ChangeLog @@ -1,5 +1,8 @@ 2016-02-02 Thomas Schwinge <thomas@codesourcery.com> + * libgomp.map (GOACC_2.0): Remove GOACC_host_data. + * oacc-parallel.c (GOACC_host_data): Remove function definition. + * testsuite/lib/libgomp.exp: Skip hsa offloading for OpenACC test cases. diff --git libgomp/libgomp.map libgomp/libgomp.map index ea9344d..4d42c42 100644 --- libgomp/libgomp.map +++ libgomp/libgomp.map @@ -394,7 +394,6 @@ GOACC_2.0.1 { global: GOACC_declare; GOACC_parallel_keyed; - GOACC_host_data; } GOACC_2.0; GOMP_PLUGIN_1.0 { diff --git libgomp/oacc-parallel.c libgomp/oacc-parallel.c index f22ba41..bc24651 100644 --- libgomp/oacc-parallel.c +++ libgomp/oacc-parallel.c @@ -490,46 +490,6 @@ GOACC_wait (int async, int num_waits, ...) goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval); } -void -GOACC_host_data (int device, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned short *kinds) -{ - bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK; - struct target_mem_desc *tgt; - -#ifdef HAVE_INTTYPES_H - gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", - __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); -#else - gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", - __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); -#endif - - goacc_lazy_initialize (); - - struct goacc_thread *thr = goacc_thread (); - struct gomp_device_descr *acc_dev = thr->dev; - - /* Host fallback or 'do nothing'. */ - if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) - || host_fallback) - { - tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, - GOMP_MAP_VARS_OPENACC); - tgt->prev = thr->mapped_data; - thr->mapped_data = tgt; - - return; - } - - gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); - tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_OPENACC); - gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); - tgt->prev = thr->mapped_data; - thr->mapped_data = tgt; -} - int GOACC_get_num_threads (void) { Grüße Thomas [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 472 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Bulk] [OpenACC 0/7] host_data construct 2015-11-02 18:33 ` Julian Brown 2015-11-02 19:29 ` Jakub Jelinek 2015-11-12 11:16 ` Julian Brown @ 2015-11-13 15:31 ` Jakub Jelinek 2 siblings, 0 replies; 33+ messages in thread From: Jakub Jelinek @ 2015-11-13 15:31 UTC (permalink / raw) To: Julian Brown; +Cc: James Norris, GCC Patches, Joseph S. Myers, Nathan Sidwell On Mon, Nov 02, 2015 at 06:33:39PM +0000, Julian Brown wrote: > Firstly, on trunk at least, use_device_ptr variables are restricted to > pointer or array types: that restriction doesn't exist in OpenACC, nor > actually could I find it in the OpenMP 4.1 document (my guess is the > standards are supposed to match in this regard). I think that a program > such as this should work: So, after talking about this on omp-lang, it seems there is agreement that only arrays and pointer types (or reference to arrays or pointers) should be allowed in use_device_ptr clause and that for pointers/reference to pointers it should probably act the way I've coded it up, i.e. that for them it translates the pointer to point to corresponding object to the one to which it points on the host. It is too late to change the standard now, but will be changed soon, and hopefully clarified in examples. > void target_fn (int *targ_data); > > int > main (int argc, char *argv[]) > { > char out; > int myvar; > #pragma omp target enter data map(to: myvar) > > #pragma omp target data use_device_ptr(myvar) map(from:out) > { > target_fn (&myvar); > out = 5; > } > > return 0; > } That would make the above non-conforming for OpenMP. > Secondly, attempts to use use_device_ptr on (e.g. > dynamically-allocated) arrays accessed through a pointer cause an ICE > with the existing trunk OpenMP code: > > #include <stdlib.h> > > void target_fn (char *targ_data); > > int > main (int argc, char *argv[]) > { > char *myarr, out; > > myarr = malloc (1024); > > #pragma omp target data map(to: myarr[0:1024]) > { > #pragma omp target data use_device_ptr(myarr) map(from:out) > { > target_fn (myarr); > out = 5; > } > } > > return 0; > } Can't reproduce this ICE (at least not on gomp-4_5-branch, but there aren't significant changes from the trunk there). > Furthermore, this looks strange to me (006t.omplower): > > .omp_data_arr.5.out = &out; > myarr.8 = myarr; > .omp_data_arr.5.myarr = myarr.8; > #pragma omp target data map(from:out [len: 1]) use_device_ptr(myarr) > { > D.2436 = .omp_data_arr.5.myarr; > myarr = D.2436; > > That's clobbering the original myarr variable, right? Just use -fdump-tree-omplower-uid to see that it is a different variable. Basically, for OpenMP use_device_ptr creates a private copy of the pointer for the body of the target data construct, and that pointer is assigned the target device's address. For arrays the implementation creates an artificial pointer variable (holding the start of the array initially) and replaces all references to the array in the target data body with dereference of the pointer. Jakub ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [Bulk] [OpenACC 0/7] host_data construct 2015-10-26 18:36 ` Jakub Jelinek 2015-10-27 15:57 ` Cesar Philippidis 2015-11-02 18:33 ` Julian Brown @ 2015-12-23 11:02 ` Thomas Schwinge 2 siblings, 0 replies; 33+ messages in thread From: Thomas Schwinge @ 2015-12-23 11:02 UTC (permalink / raw) To: gcc-patches, Julian Brown Cc: Joseph S. Myers, Nathan Sidwell, Jakub Jelinek, James Norris [-- Attachment #1: Type: text/plain, Size: 13600 bytes --] Hi! On Mon, 26 Oct 2015 19:34:22 +0100, Jakub Jelinek <jakub@redhat.com> wrote: > Your use_device sounds very similar to use_device_ptr clause in OpenMP, > which is allowed on #pragma omp target data construct and is implemented > quite a bit differently from this; it is unclear if the OpenACC standard > requires this kind of implementation, or you just chose to implement it this > way. In particular, the GOMP_target_data call puts the variables mentioned > in the use_device_ptr clauses into the mapping structures (similarly how > map clause appears) and the corresponding vars are privatized within the > target data region (which is a host region, basically a fancy { } braces), > where the private variables contain the offloading device's pointers. ACK. As the OpenACC use_device clause implementation now completely matches the OpenMP use_device_ptr clause implementation, there is no use anymore for a separate OMP_CLAUSE_USE_DEVICE, so in r231926 I cleaned that up, as obvious: commit 9d5fd7c608fef6e7a9efbfc940545d49452c4e01 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Wed Dec 23 11:01:18 2015 +0000 Merge OMP_CLAUSE_USE_DEVICE into OMP_CLAUSE_USE_DEVICE_PTR gcc/c/ * c-parser.c (c_parser_oacc_clause_use_device): Merge function into... (c_parser_omp_clause_use_device_ptr): ... this function. Adjust all users. gcc/ * tree-core.h (enum omp_clause_code): Merge OMP_CLAUSE_USE_DEVICE into OMP_CLAUSE_USE_DEVICE_PTR. Adjust all users. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@231926 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 5 +++++ gcc/c/ChangeLog | 7 +++++++ gcc/c/c-parser.c | 16 +++++----------- gcc/c/c-typeck.c | 1 - gcc/cp/parser.c | 2 +- gcc/cp/pt.c | 2 -- gcc/cp/semantics.c | 1 - gcc/fortran/trans-openmp.c | 2 +- gcc/gimplify.c | 2 -- gcc/omp-low.c | 11 ++--------- gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 | 2 +- gcc/tree-core.h | 6 ++---- gcc/tree-nested.c | 2 -- gcc/tree-pretty-print.c | 3 --- gcc/tree.c | 3 --- 15 files changed, 24 insertions(+), 41 deletions(-) diff --git gcc/ChangeLog gcc/ChangeLog index aa28d10..d67b9c6 100644 --- gcc/ChangeLog +++ gcc/ChangeLog @@ -1,3 +1,8 @@ +2015-12-23 Thomas Schwinge <thomas@codesourcery.com> + + * tree-core.h (enum omp_clause_code): Merge OMP_CLAUSE_USE_DEVICE + into OMP_CLAUSE_USE_DEVICE_PTR. Adjust all users. + 2015-12-23 David Sherwood <david.sherwood@arm.com> * config/arm/iterators.md (VMAXMINFNM): New int iterator. diff --git gcc/c/ChangeLog gcc/c/ChangeLog index f99f426..7b275d8 100644 --- gcc/c/ChangeLog +++ gcc/c/ChangeLog @@ -1,3 +1,10 @@ +2015-12-23 Thomas Schwinge <thomas@codesourcery.com> + + * c-parser.c (c_parser_oacc_clause_use_device): Merge function + into... + (c_parser_omp_clause_use_device_ptr): ... this function. Adjust + all users. + 2015-12-22 Marek Polacek <polacek@redhat.com> PR c/69002 diff --git gcc/c/c-parser.c gcc/c/c-parser.c index 353e3da..8e754d0 100644 --- gcc/c/c-parser.c +++ gcc/c/c-parser.c @@ -11395,7 +11395,10 @@ c_parser_omp_clause_defaultmap (c_parser *parser, tree list) return list; } -/* OpenMP 4.5: +/* OpenACC 2.0: + use_device ( variable-list ) + + OpenMP 4.5: use_device_ptr ( variable-list ) */ static tree @@ -11730,15 +11733,6 @@ c_parser_oacc_clause_tile (c_parser *parser, tree list) return c; } -/* OpenACC 2.0: - use_device ( variable-list ) */ - -static tree -c_parser_oacc_clause_use_device (c_parser *parser, tree list) -{ - return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_USE_DEVICE, list); -} - /* OpenACC: wait ( int-expr-list ) */ @@ -13058,7 +13052,7 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, c_name = "tile"; break; case PRAGMA_OACC_CLAUSE_USE_DEVICE: - clauses = c_parser_oacc_clause_use_device (parser, clauses); + clauses = c_parser_omp_clause_use_device_ptr (parser, clauses); c_name = "use_device"; break; case PRAGMA_OACC_CLAUSE_VECTOR: diff --git gcc/c/c-typeck.c gcc/c/c-typeck.c index 928fcd5..7406bd4 100644 --- gcc/c/c-typeck.c +++ gcc/c/c-typeck.c @@ -13125,7 +13125,6 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd) bitmap_set_bit (&map_head, DECL_UID (t)); goto check_dup_generic; - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE_USE_DEVICE_PTR: t = OMP_CLAUSE_DECL (c); diff --git gcc/cp/parser.c gcc/cp/parser.c index 842dded..4829a77 100644 --- gcc/cp/parser.c +++ gcc/cp/parser.c @@ -32097,7 +32097,7 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, c_name = "tile"; break; case PRAGMA_OACC_CLAUSE_USE_DEVICE: - clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE_PTR, clauses); c_name = "use_device"; break; diff --git gcc/cp/pt.c gcc/cp/pt.c index dab15bd..4555b32 100644 --- gcc/cp/pt.c +++ gcc/cp/pt.c @@ -14425,7 +14425,6 @@ tsubst_omp_clauses (tree clauses, bool declare_simd, bool allow_fields, case OMP_CLAUSE_FROM: case OMP_CLAUSE_TO: case OMP_CLAUSE_MAP: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: OMP_CLAUSE_DECL (nc) @@ -14552,7 +14551,6 @@ tsubst_omp_clauses (tree clauses, bool declare_simd, bool allow_fields, case OMP_CLAUSE_COPYPRIVATE: case OMP_CLAUSE_LINEAR: case OMP_CLAUSE_REDUCTION: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: /* tsubst_expr on SCOPE_REF results in returning diff --git gcc/cp/semantics.c gcc/cp/semantics.c index ab9989a..37bf050 100644 --- gcc/cp/semantics.c +++ gcc/cp/semantics.c @@ -6886,7 +6886,6 @@ finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd) } break; - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_IS_DEVICE_PTR: case OMP_CLAUSE_USE_DEVICE_PTR: field_ok = allow_fields; diff --git gcc/fortran/trans-openmp.c gcc/fortran/trans-openmp.c index 227964c..70a7722 100644 --- gcc/fortran/trans-openmp.c +++ gcc/fortran/trans-openmp.c @@ -1771,7 +1771,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, clause_code = OMP_CLAUSE_UNIFORM; goto add_clause; case OMP_LIST_USE_DEVICE: - clause_code = OMP_CLAUSE_USE_DEVICE; + clause_code = OMP_CLAUSE_USE_DEVICE_PTR; goto add_clause; case OMP_LIST_DEVICE_RESIDENT: clause_code = OMP_CLAUSE_DEVICE_RESIDENT; diff --git gcc/gimplify.c gcc/gimplify.c index 62b0e64..bc90401 100644 --- gcc/gimplify.c +++ gcc/gimplify.c @@ -7139,7 +7139,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } goto do_notice; - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: flags = GOVD_FIRSTPRIVATE | GOVD_EXPLICIT; goto do_add; @@ -8051,7 +8050,6 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, case OMP_CLAUSE_ASYNC: case OMP_CLAUSE_WAIT: case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_NUM_GANGS: case OMP_CLAUSE_NUM_WORKERS: diff --git gcc/omp-low.c gcc/omp-low.c index 676b1df..a0c3e1c 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -1957,7 +1957,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, install_var_local (decl, ctx); break; - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: decl = OMP_CLAUSE_DECL (c); if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) @@ -2314,7 +2313,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_SIMD: case OMP_CLAUSE_NOGROUP: case OMP_CLAUSE_DEFAULTMAP: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE__CILK_FOR_COUNT_: case OMP_CLAUSE_ASYNC: @@ -15288,7 +15286,6 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } break; - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: var = OMP_CLAUSE_DECL (c); @@ -15674,14 +15671,12 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) build_int_cstu (tkind_type, tkind)); break; - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: ovar = OMP_CLAUSE_DECL (c); var = lookup_decl_in_outer_ctx (ovar, ctx); x = build_sender_ref (ovar, ctx); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR - || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) tkind = GOMP_MAP_USE_DEVICE_PTR; else tkind = GOMP_MAP_FIRSTPRIVATE_INT; @@ -15884,12 +15879,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_build_assign (new_var, x)); } break; - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: var = OMP_CLAUSE_DECL (c); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR - || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR) x = build_sender_ref (var, ctx); else x = build_receiver_ref (var, false, ctx); diff --git gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 index 7a5eea6..23aba8c 100644 --- gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 +++ gcc/testsuite/gfortran.dg/goacc/host_data-tree.f95 @@ -8,4 +8,4 @@ program test !$acc host_data use_device(i) !$acc end host_data end program test -! { dg-final { scan-tree-dump-times "pragma acc host_data use_device\\(i\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "pragma acc host_data use_device_ptr\\(i\\)" 1 "original" } } diff --git gcc/tree-core.h gcc/tree-core.h index 9cc64d9..5371378 100644 --- gcc/tree-core.h +++ gcc/tree-core.h @@ -302,7 +302,8 @@ enum omp_clause_code { OpenMP clause: map ({alloc:,to:,from:,tofrom:,}variable-list). */ OMP_CLAUSE_MAP, - /* OpenMP clause: use_device_ptr (variable-list). */ + /* OpenACC clause: use_device (variable_list). + OpenMP clause: use_device_ptr (variable-list). */ OMP_CLAUSE_USE_DEVICE_PTR, /* OpenMP clause: is_device_ptr (variable-list). */ @@ -315,9 +316,6 @@ enum omp_clause_code { /* OpenACC clause: device_resident (variable_list). */ OMP_CLAUSE_DEVICE_RESIDENT, - /* OpenACC clause: use_device (variable_list). */ - OMP_CLAUSE_USE_DEVICE, - /* OpenACC clause: gang [(gang-argument-list)]. Where gang-argument-list: [gang-argument-list, ] gang-argument diff --git gcc/tree-nested.c gcc/tree-nested.c index 3a9479a..8211a12 100644 --- gcc/tree-nested.c +++ gcc/tree-nested.c @@ -1072,7 +1072,6 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SHARED: case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_LINK: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: do_decl_clause: @@ -1744,7 +1743,6 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE_SHARED: case OMP_CLAUSE_TO_DECLARE: case OMP_CLAUSE_LINK: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_USE_DEVICE_PTR: case OMP_CLAUSE_IS_DEVICE_PTR: do_decl_clause: diff --git gcc/tree-pretty-print.c gcc/tree-pretty-print.c index caec760..c4a180e 100644 --- gcc/tree-pretty-print.c +++ gcc/tree-pretty-print.c @@ -327,9 +327,6 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags) case OMP_CLAUSE_DEVICE_RESIDENT: name = "device_resident"; goto print_remap; - case OMP_CLAUSE_USE_DEVICE: - name = "use_device"; - goto print_remap; case OMP_CLAUSE_TO_DECLARE: name = "to"; goto print_remap; diff --git gcc/tree.c gcc/tree.c index 2190cae..d837609 100644 --- gcc/tree.c +++ gcc/tree.c @@ -282,7 +282,6 @@ unsigned const char omp_clause_num_ops[] = 1, /* OMP_CLAUSE_IS_DEVICE_PTR */ 2, /* OMP_CLAUSE__CACHE_ */ 1, /* OMP_CLAUSE_DEVICE_RESIDENT */ - 1, /* OMP_CLAUSE_USE_DEVICE */ 2, /* OMP_CLAUSE_GANG */ 1, /* OMP_CLAUSE_ASYNC */ 1, /* OMP_CLAUSE_WAIT */ @@ -354,7 +353,6 @@ const char * const omp_clause_code_name[] = "is_device_ptr", "_cache_", "device_resident", - "use_device", "gang", "async", "wait", @@ -11612,7 +11610,6 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, /* FALLTHRU */ case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_ASYNC: case OMP_CLAUSE_WAIT: case OMP_CLAUSE_WORKER: Grüße Thomas [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 472 bytes --] ^ permalink raw reply [flat|nested] 33+ messages in thread
end of thread, other threads:[~2016-04-08 13:41 UTC | newest] Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2015-10-22 19:14 [OpenACC 0/7] host_data construct James Norris 2015-10-22 19:15 ` [OpenACC 2/7] host_data construct (C FE) James Norris 2015-10-22 19:15 ` [OpenACC 1/7] host_data construct (C/C++ common) James Norris 2015-10-22 19:16 ` [OpenACC 3/7] host_data construct (C front-end) James Norris 2015-10-22 19:18 ` [OpenACC 4/7] host_data construct (middle end) James Norris 2015-10-22 19:19 ` [OpenACC 5/7] host_data construct (gcc tests) James Norris 2015-10-22 19:20 ` [OpenACC 6/7] host_data construct James Norris 2015-10-22 19:22 ` [OpenACC 7/7] host_data construct (runtime tests) James Norris 2015-10-22 20:42 ` [OpenACC 0/7] host_data construct Joseph Myers 2015-10-22 20:53 ` James Norris 2015-10-23 16:01 ` [Bulk] " James Norris 2015-10-26 18:36 ` Jakub Jelinek 2015-10-27 15:57 ` Cesar Philippidis 2015-11-02 18:33 ` Julian Brown 2015-11-02 19:29 ` Jakub Jelinek 2015-11-12 11:16 ` Julian Brown 2015-11-18 12:48 ` Julian Brown 2015-11-19 13:13 ` Jakub Jelinek 2015-11-19 14:29 ` Julian Brown 2015-11-19 15:57 ` Jakub Jelinek 2015-11-30 19:34 ` Julian Brown 2015-12-01 8:30 ` Jakub Jelinek 2015-12-02 15:27 ` Tom de Vries 2015-12-02 15:59 ` Thomas Schwinge 2015-12-02 19:16 ` Cesar Philippidis 2015-12-02 19:28 ` Steve Kargl 2015-12-02 19:35 ` Jakub Jelinek 2015-12-02 19:54 ` Cesar Philippidis 2015-12-02 22:14 ` [gomp4] " Thomas Schwinge 2016-04-08 13:41 ` Fortran OpenACC host_data construct ICE (was: [gomp4] Re: [OpenACC 0/7] host_data construct) Thomas Schwinge 2016-02-02 13:57 ` [OpenACC 0/7] host_data construct Thomas Schwinge 2015-11-13 15:31 ` [Bulk] " Jakub Jelinek 2015-12-23 11:02 ` Thomas Schwinge
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).