diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c index 834a916..b748e2f 100644 --- a/gcc/c-family/c-pragma.c +++ b/gcc/c-family/c-pragma.c @@ -1214,6 +1214,7 @@ static const struct omp_pragma_def oacc_pragmas[] = { { "data", PRAGMA_OACC_DATA }, { "enter", PRAGMA_OACC_ENTER_DATA }, { "exit", PRAGMA_OACC_EXIT_DATA }, + { "host_data", PRAGMA_OACC_HOST_DATA }, { "kernels", PRAGMA_OACC_KERNELS }, { "loop", PRAGMA_OACC_LOOP }, { "parallel", PRAGMA_OACC_PARALLEL }, diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h index cec920f..23a72a3 100644 --- a/gcc/c-family/c-pragma.h +++ b/gcc/c-family/c-pragma.h @@ -31,6 +31,7 @@ enum pragma_kind { PRAGMA_OACC_DATA, PRAGMA_OACC_ENTER_DATA, PRAGMA_OACC_EXIT_DATA, + PRAGMA_OACC_HOST_DATA, PRAGMA_OACC_KERNELS, PRAGMA_OACC_LOOP, PRAGMA_OACC_PARALLEL, @@ -161,6 +162,7 @@ enum pragma_omp_clause { PRAGMA_OACC_CLAUSE_PRESENT_OR_CREATE, PRAGMA_OACC_CLAUSE_SELF, PRAGMA_OACC_CLAUSE_SEQ, + PRAGMA_OACC_CLAUSE_USE_DEVICE, PRAGMA_OACC_CLAUSE_VECTOR, PRAGMA_OACC_CLAUSE_VECTOR_LENGTH, PRAGMA_OACC_CLAUSE_WAIT, diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 704ebc6..ead98b9 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -10116,6 +10116,8 @@ c_parser_omp_clause_name (c_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector", p)) @@ -11219,6 +11221,15 @@ c_parser_oacc_clause_async (c_parser *parser, tree list) return list; } +/* OpenACC 2.0: + use_device ( variable-list ) */ + +static tree +c_parser_oacc_clause_use_device (c_parser *parser, tree list) +{ + return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_USE_DEVICE, list); +} + /* OpenACC: wait ( int-expr-list ) */ @@ -12474,6 +12485,10 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, clauses = c_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = c_parser_oacc_clause_use_device (parser, clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_VECTOR_LENGTH: clauses = c_parser_omp_clause_vector_length (parser, clauses); c_name = "vector_length"; @@ -13003,6 +13018,29 @@ c_parser_oacc_enter_exit_data (c_parser *parser, bool enter) /* OpenACC 2.0: + # pragma acc host_data oacc-data-clause[optseq] new-line + structured-block +*/ + +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +static tree +c_parser_oacc_host_data (location_t loc, c_parser *parser) +{ + tree stmt, clauses, block; + + clauses = c_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data"); + + block = c_begin_omp_parallel (); + add_stmt (c_parser_omp_structured_block (parser)); + stmt = c_finish_oacc_host_data (loc, clauses, block); + return stmt; +} + + +/* OpenACC 2.0: # pragma acc loop oacc-loop-clause[optseq] new-line structured-block @@ -16075,6 +16113,9 @@ c_parser_omp_construct (c_parser *parser) case PRAGMA_OACC_DATA: stmt = c_parser_oacc_data (loc, parser); break; + case PRAGMA_OACC_HOST_DATA: + stmt = c_parser_oacc_host_data (loc, parser); + break; case PRAGMA_OACC_KERNELS: strcpy (p_name, "#pragma acc"); stmt = c_parser_oacc_kernels (loc, parser, p_name); diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index bee03d3..a9c5975 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -643,6 +643,7 @@ extern tree c_expr_to_decl (tree, bool *, bool *); extern tree c_finish_oacc_parallel (location_t, tree, tree); extern tree c_finish_oacc_kernels (location_t, tree, tree); extern tree c_finish_oacc_data (location_t, tree, tree); +extern tree c_finish_oacc_host_data (location_t, tree, tree); extern tree c_begin_omp_parallel (void); extern tree c_finish_omp_parallel (location_t, tree, tree); extern tree c_begin_omp_task (void); diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index bc43602..a5e2a4a 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -11510,6 +11510,25 @@ c_finish_oacc_data (location_t loc, tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. LOC is the location of the OACC_HOST_DATA. */ + +tree +c_finish_oacc_host_data (location_t loc, tree clauses, tree block) +{ + tree stmt; + + block = c_end_compound_stmt (loc, block, true); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + SET_EXPR_LOCATION (stmt, loc); + + return add_stmt (stmt); +} + /* Like c_begin_compound_stmt, except force the retention of the BLOCK. */ tree @@ -12942,6 +12961,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, bool declare_simd) case OMP_CLAUSE_GANG: case OMP_CLAUSE_WORKER: case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_USE_DEVICE: pc = &OMP_CLAUSE_CHAIN (c); continue; diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 16db41f..76ece42 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6318,6 +6318,7 @@ extern void finish_omp_threadprivate (tree); extern tree begin_omp_structured_block (void); extern tree finish_omp_structured_block (tree); extern tree finish_oacc_data (tree, tree); +extern tree finish_oacc_host_data (tree, tree); extern tree finish_oacc_kernels (tree, tree); extern tree finish_oacc_parallel (tree, tree); extern tree begin_omp_parallel (void); diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index f07a5e4..714e69c 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -29235,6 +29235,8 @@ cp_parser_omp_clause_name (cp_parser *parser) result = PRAGMA_OMP_CLAUSE_UNTIED; else if (!strcmp ("use_device_ptr", p)) result = PRAGMA_OMP_CLAUSE_USE_DEVICE_PTR; + else if (!strcmp ("use_device", p)) + result = PRAGMA_OACC_CLAUSE_USE_DEVICE; break; case 'v': if (!strcmp ("vector_length", p)) @@ -31381,6 +31383,11 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, clauses = cp_parser_oacc_data_clause (parser, c_kind, clauses); c_name = "self"; break; + case PRAGMA_OACC_CLAUSE_USE_DEVICE: + clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_USE_DEVICE, + clauses); + c_name = "use_device"; + break; case PRAGMA_OACC_CLAUSE_VECTOR_LENGTH: clauses = cp_parser_oacc_clause_vector_length (parser, clauses); c_name = "vector_length"; @@ -34221,6 +34228,30 @@ cp_parser_oacc_data (cp_parser *parser, cp_token *pragma_tok) return stmt; } +#define OACC_HOST_DATA_CLAUSE_MASK \ + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_USE_DEVICE) ) + +/* OpenACC 2.0: + # pragma acc host_data new-line + structured-block */ + +static tree +cp_parser_oacc_host_data (cp_parser *parser, cp_token *pragma_tok) +{ + tree stmt, clauses, block; + unsigned int save; + + clauses = cp_parser_oacc_all_clauses (parser, OACC_HOST_DATA_CLAUSE_MASK, + "#pragma acc host_data", pragma_tok); + + block = begin_omp_parallel (); + save = cp_parser_begin_omp_structured_block (parser); + cp_parser_statement (parser, NULL_TREE, false, NULL); + cp_parser_end_omp_structured_block (parser, save); + stmt = finish_oacc_host_data (clauses, block); + return stmt; +} + /* OpenACC 2.0: # pragma acc enter data oacc-enter-data-clause[optseq] new-line @@ -35288,6 +35319,9 @@ cp_parser_omp_construct (cp_parser *parser, cp_token *pragma_tok) case PRAGMA_OACC_EXIT_DATA: stmt = cp_parser_oacc_enter_exit_data (parser, pragma_tok, false); break; + case PRAGMA_OACC_HOST_DATA: + stmt = cp_parser_oacc_host_data (parser, pragma_tok); + break; case PRAGMA_OACC_KERNELS: stmt = cp_parser_oacc_kernels (parser, pragma_tok); break; @@ -35856,6 +35890,7 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context) case PRAGMA_OACC_DATA: case PRAGMA_OACC_ENTER_DATA: case PRAGMA_OACC_EXIT_DATA: + case PRAGMA_OACC_HOST_DATA: case PRAGMA_OACC_KERNELS: case PRAGMA_OACC_PARALLEL: case PRAGMA_OACC_LOOP: diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index c0a8b32..25482e7 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -6689,6 +6689,7 @@ finish_omp_clauses (tree clauses, bool allow_fields, bool declare_simd) case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: case OMP_CLAUSE__CILK_FOR_COUNT_: + case OMP_CLAUSE_USE_DEVICE: break; case OMP_CLAUSE_INBRANCH: @@ -7119,6 +7120,24 @@ finish_oacc_data (tree clauses, tree block) return add_stmt (stmt); } +/* Generate OACC_HOST_DATA, with CLAUSES and BLOCK as its compound + statement. */ + +tree +finish_oacc_host_data (tree clauses, tree block) +{ + tree stmt; + + block = finish_omp_structured_block (block); + + stmt = make_node (OACC_HOST_DATA); + TREE_TYPE (stmt) = void_type_node; + OACC_HOST_DATA_CLAUSES (stmt) = clauses; + OACC_HOST_DATA_BODY (stmt) = block; + + return add_stmt (stmt); +} + /* Generate OACC_KERNELS, with CLAUSES and BLOCK as its compound statement. LOC is the location of the OACC_KERNELS. */ diff --git a/gcc/gimplify.c b/gcc/gimplify.c index ab9e540..0c32219 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -93,6 +93,8 @@ enum gimplify_omp_var_data GOVD_MAP_0LEN_ARRAY = 32768, + GOVD_USE_DEVICE = 65536, + GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR | GOVD_LOCAL) @@ -116,7 +118,9 @@ enum omp_region_type ORT_COMBINED_TARGET = 33, /* Dummy OpenMP region, used to disable expansion of DECL_VALUE_EXPRs in taskloop pre body. */ - ORT_NONE = 64 + ORT_NONE = 64, + /* An OpenACC host-data region. */ + ORT_HOST_DATA = 128 }; /* Gimplify hashtable helper. */ @@ -6338,6 +6342,10 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, decl = TREE_OPERAND (decl, 0); } goto do_add_decl; + case OMP_CLAUSE_USE_DEVICE: + flags = GOVD_USE_DEVICE | GOVD_EXPLICIT; + check_non_private = "use_device"; + goto do_add; case OMP_CLAUSE_LINEAR: if (gimplify_expr (&OMP_CLAUSE_LINEAR_STEP (c), pre_p, NULL, is_gimple_val, fb_rvalue) == GS_ERROR) @@ -7005,7 +7013,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, break; case OMP_CLAUSE_DEVICE_RESIDENT: - case OMP_CLAUSE_USE_DEVICE: case OMP_CLAUSE_INDEPENDENT: remove = true; break; @@ -7529,6 +7536,127 @@ gimplify_oacc_cache (tree *expr_p, gimple_seq *pre_p) *expr_p = NULL_TREE; } +static tree +gimplify_oacc_host_data_1 (tree *tp, int *walk_subtrees, + void *data ATTRIBUTE_UNUSED) +{ + splay_tree_node n = NULL; + location_t loc = EXPR_LOCATION (*tp); + + switch (TREE_CODE (*tp)) + { + case ADDR_EXPR: + { + tree decl = TREE_OPERAND (*tp, 0); + + switch (TREE_CODE (decl)) + { + case ARRAY_REF: + case ARRAY_RANGE_REF: + case COMPONENT_REF: + case VIEW_CONVERT_EXPR: + case REALPART_EXPR: + case IMAGPART_EXPR: + if (TREE_CODE (TREE_OPERAND (decl, 0)) == VAR_DECL) + n = splay_tree_lookup (gimplify_omp_ctxp->variables, + (splay_tree_key) TREE_OPERAND (decl, 0)); + break; + + case VAR_DECL: + n = splay_tree_lookup (gimplify_omp_ctxp->variables, + (splay_tree_key) decl); + break; + + default: + ; + } + + if (n != NULL && (n->value & GOVD_USE_DEVICE) != 0) + { + tree t = builtin_decl_explicit (BUILT_IN_GOACC_DEVICEPTR); + *tp = build_call_expr_loc (loc, t, 1, *tp); + } + + *walk_subtrees = 0; + } + break; + + case VAR_DECL: + { + tree decl = *tp; + + n = splay_tree_lookup (gimplify_omp_ctxp->variables, + (splay_tree_key) decl); + + if (n != NULL && (n->value & GOVD_USE_DEVICE) != 0) + { + if (!POINTER_TYPE_P (TREE_TYPE (decl))) + return decl; + + tree t = builtin_decl_explicit (BUILT_IN_GOACC_DEVICEPTR); + *tp = build_call_expr_loc (loc, t, 1, *tp); + *walk_subtrees = 0; + } + } + break; + + case OACC_PARALLEL: + case OACC_KERNELS: + case OACC_LOOP: + *walk_subtrees = 0; + break; + + default: + ; + } + + return NULL_TREE; +} + +static enum gimplify_status +gimplify_oacc_host_data (tree *expr_p, gimple_seq *pre_p) +{ + tree expr = *expr_p, orig_body; + gimple_seq body = NULL; + + gimplify_scan_omp_clauses (&OACC_HOST_DATA_CLAUSES (expr), pre_p, + ORT_HOST_DATA, OACC_HOST_DATA); + + orig_body = OACC_HOST_DATA_BODY (expr); + + /* Perform a pre-pass over the host_data region's body, inserting calls to + GOACC_deviceptr where appropriate. */ + + tree ret = walk_tree_without_duplicates (&orig_body, + &gimplify_oacc_host_data_1, 0); + + if (ret) + { + error_at (EXPR_LOCATION (expr), + "undefined use of variable %qE in host_data region", + DECL_NAME (ret)); + gimplify_adjust_omp_clauses (pre_p, &OACC_HOST_DATA_CLAUSES (expr), + OACC_HOST_DATA); + return GS_ERROR; + } + + push_gimplify_context (); + + gimple *g = gimplify_and_return_first (orig_body, &body); + + if (gimple_code (g) == GIMPLE_BIND) + pop_gimplify_context (g); + else + pop_gimplify_context (NULL); + + gimplify_adjust_omp_clauses (pre_p, &OACC_HOST_DATA_CLAUSES (expr), + OACC_HOST_DATA); + + gimplify_seq_add_stmt (pre_p, g); + + return GS_ALL_DONE; +} + /* Gimplify the contents of an OMP_PARALLEL statement. This involves gimplification of the body, as well as scanning the body for used variables. We need to do this scan now, because variable-sized @@ -9595,6 +9723,9 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, break; case OACC_HOST_DATA: + ret = gimplify_oacc_host_data (expr_p, pre_p); + break; + case OACC_DECLARE: sorry ("directive not yet implemented"); ret = GS_ALL_DONE; diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index ea9cf0d..9ed075f 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -47,6 +47,8 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr", + BT_FN_PTR_PTR, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_THREAD_NUM, "GOACC_get_thread_num", BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_NUM_THREADS, "GOACC_get_num_threads", diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-1.c b/gcc/testsuite/c-c++-common/goacc/host_data-1.c new file mode 100644 index 0000000..521c854 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-1.c @@ -0,0 +1,13 @@ +/* Test valid use of host_data directive. */ +/* { dg-do compile } */ + +int v0; +int v1[3][3]; + +void +f (void) +{ + int v2 = 3; +#pragma acc host_data use_device(v2, v0, v1) + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-2.c b/gcc/testsuite/c-c++-common/goacc/host_data-2.c new file mode 100644 index 0000000..e5213a0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-2.c @@ -0,0 +1,13 @@ +/* Test invalid use of host_data directive. */ +/* { dg-do compile } */ + +int v0; +#pragma acc host_data use_device(v0) /* { dg-error "expected" } */ + +void +f (void) +{ + int v2 = 3; +#pragma acc host_data copy(v2) /* { dg-error "not valid for" } */ + ; +} diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-3.c b/gcc/testsuite/c-c++-common/goacc/host_data-3.c new file mode 100644 index 0000000..f9621c9 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-3.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ + +int main (int argc, char* argv[]) +{ + int x = 5, y; + + #pragma acc enter data copyin (x) + /* It's not clear what attempts to use non-pointer variables "directly" + (rather than merely taking their address) should do in host_data regions. + We choose to make it an error. */ + #pragma acc host_data use_device (x) /* TODO { dg-error "" } */ + { + y = x; + } + #pragma acc exit data delete (x) + + return y - 5; +} diff --git a/gcc/testsuite/c-c++-common/goacc/host_data-4.c b/gcc/testsuite/c-c++-common/goacc/host_data-4.c new file mode 100644 index 0000000..3dac5f3 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/host_data-4.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ + +int main (int argc, char* argv[]) +{ + int x[100]; + + #pragma acc enter data copyin (x) + /* Specifying an array index is not valid for host_data/use_device. */ + #pragma acc host_data use_device (x[4]) /* { dg-error "expected '\\\)' before '\\\[' token" } */ + ; + #pragma acc exit data delete (x) + + return 0; +} diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 2153661..2a43a8c 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -378,6 +378,7 @@ GOACC_2.0 { GOACC_wait; GOACC_get_thread_num; GOACC_get_num_threads; + GOACC_deviceptr; }; GOACC_2.0.1 { diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index af067d6..497ab92 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -204,6 +204,38 @@ acc_deviceptr (void *h) return d; } +/* This function is used as a helper in generated code to implement pointer + lookup in host_data regions. Unlike acc_deviceptr, it returns its argument + unchanged on a shared-memory system (e.g. the host). */ + +void * +GOACC_deviceptr (void *h) +{ + splay_tree_key n; + void *d; + void *offset; + + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + + if ((thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) == 0) + { + n = lookup_host (thr->dev, h, 1); + + if (!n) + return NULL; + + offset = h - n->host_start; + + d = n->tgt->tgt_start + n->tgt_offset + offset; + + return d; + } + else + return h; +} + /* Return the host pointer that corresponds to device data D. Or NULL if no mapping. */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c new file mode 100644 index 0000000..15ccb27 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-1.c @@ -0,0 +1,125 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-lcuda -lcublas -lcudart" } */ + +#include +#include +#include +#include +#include +#include + +void +saxpy_host (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +#pragma acc routine +void +saxpy_target (int n, float a, float *x, float *y) +{ + int i; + + for (i = 0; i < n; i++) + y[i] = y[i] + a * x[i]; +} + +int +main(int argc, char **argv) +{ + const int N = 8; + int i; + float *x_ref, *y_ref; + float *x, *y; + cublasHandle_t h; + float a = 2.0; + + x_ref = (float*) malloc (N * sizeof(float)); + y_ref = (float*) malloc (N * sizeof(float)); + + x = (float*) malloc (N * sizeof(float)); + y = (float*) malloc (N * sizeof(float)); + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { + float *xp, *yp; +#pragma acc host_data use_device (x, y) + { +#pragma acc parallel pcopy (xp, yp) present (x, y) + { + xp = x; + yp = y; + } + } + + if (xp != acc_deviceptr (x) || yp != acc_deviceptr (y)) + abort (); + } + + for (i = 0; i < N; i++) + { + x[i] = x_ref[i] = 4.0 + i; + y[i] = y_ref[i] = 3.0; + } + + saxpy_host (N, a, x_ref, y_ref); + + cublasCreate (&h); + +#pragma acc data copyin (x[0:N]) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + +#pragma acc data create (x[0:N]) copyout (y[0:N]) + { +#pragma acc kernels + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc host_data use_device (x, y) + { + cublasSaxpy (h, N, &a, x, 1, y, 1); + } + } + + cublasDestroy (h); + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + for (i = 0; i < N; i++) + y[i] = 3.0; + +#pragma acc data copyin (x[0:N]) copyin (a, N) copy (y[0:N]) + { +#pragma acc host_data use_device (x, y) + { +#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a, N) + saxpy_target (N, a, x, y); + } + } + + for (i = 0; i < N; i++) + { + if (y[i] != y_ref[i]) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c new file mode 100644 index 0000000..511ec64 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/host_data-2.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ + +#include + +struct by_lightning { + int a; + int b; + int c; +}; + +int main (int argc, char* argv[]) +{ + int x; + void *q = NULL, *r = NULL, *p = NULL, *s = NULL, *t = NULL; + long u; + struct by_lightning on_the_head = {1, 2, 3}; + int arr[10], *f = NULL; + _Complex float cf; + #pragma acc enter data copyin (x, arr, on_the_head, cf) + #pragma acc host_data use_device (x, arr, on_the_head, cf) + { + q = &x; + { + f = &arr[5]; + r = f; + s = &__real__ cf; + t = &on_the_head.c; + u = (long) &__imag__ cf; + #pragma acc parallel copyout(p) present (x, arr, on_the_head, cf) + { + /* This will not (and must not) call GOACC_deviceptr, but '&x' will be + the address on the device (if appropriate) regardless. */ + p = &x; + } + } + } + #pragma acc exit data delete (x) + +#if ACC_MEM_SHARED + if (q != &x || f != &arr[5] || r != f || s != &(__real__ cf) + || t != &on_the_head.c || u != (long) &(__imag__ cf) || p != &x) + abort (); +#else + if (q == &x || f == &arr[5] || r != f || s == &(__real__ cf) + || t == &on_the_head.c || u == (long) &(__imag__ cf) || p == &x) + abort (); +#endif + + return 0; +}