Hi! On Tue, 05 May 2015 10:54:02 +0200, I wrote: > In follow-up messages, I'll be posting the separated parts (for easier > review) of a next set of OpenACC changes that we'd like to commit. > ChangeLog updates not yet written; will do that before commit, obviously. gcc/gimplify.c | 16 +- gcc/omp-low.c | 11 +- gcc/tree-core.h | 14 +- gcc/tree-pretty-print.c | 6 + gcc/tree.c | 13 +- gcc/tree.h | 21 +- include/gomp-constants.h | 4 + libgomp/oacc-mem.c | 3 + libgomp/oacc-ptx.h | 28 + libgomp/plugin/plugin-nvptx.c | 10 + diff --git gcc/gimplify.c gcc/gimplify.c index bda62ce..12efdc8 100644 --- gcc/gimplify.c +++ gcc/gimplify.c @@ -6385,6 +6385,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_CLAUSE_MERGEABLE: case OMP_CLAUSE_PROC_BIND: case OMP_CLAUSE_SAFELEN: + case OMP_CLAUSE_TILE: break; case OMP_CLAUSE_ALIGNED: @@ -6770,6 +6771,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, tree *list_p) case OMP_CLAUSE_VECTOR: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_TILE: break; default: @@ -8410,21 +8412,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, break; case OACC_KERNELS: - if (OACC_KERNELS_COMBINED (*expr_p)) - sorry ("directive not yet implemented"); - else - gimplify_omp_workshare (expr_p, pre_p); - ret = GS_ALL_DONE; - break; - case OACC_PARALLEL: - if (OACC_PARALLEL_COMBINED (*expr_p)) - sorry ("directive not yet implemented"); - else - gimplify_omp_workshare (expr_p, pre_p); - ret = GS_ALL_DONE; - break; - case OACC_DATA: case OMP_SECTIONS: case OMP_SINGLE: diff --git gcc/omp-low.c gcc/omp-low.c index 34e2e5c..6ec5145 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -1928,6 +1928,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_BIND: + case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_TILE: sorry ("Clause not supported yet"); break; @@ -2055,6 +2058,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_BIND: + case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_TILE: sorry ("Clause not supported yet"); break; @@ -2742,7 +2748,10 @@ check_omp_nesting_restrictions (gimple stmt, omp_context *ctx) { for (omp_context *ctx_ = ctx; ctx_ != NULL; ctx_ = ctx_->outer) if (is_gimple_omp (ctx_->stmt) - && is_gimple_omp_oacc (ctx_->stmt)) + && is_gimple_omp_oacc (ctx_->stmt) + /* Except for atomic codes that we share with OpenMP. */ + && ! (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD + || gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE)) { error_at (gimple_location (stmt), "non-OpenACC construct inside of OpenACC region"); diff --git gcc/tree-core.h gcc/tree-core.h index ad1bb23..ffbccda 100644 --- gcc/tree-core.h +++ gcc/tree-core.h @@ -390,7 +390,19 @@ enum omp_clause_code { OMP_CLAUSE_NUM_WORKERS, /* OpenACC clause: vector_length (integer-expression). */ - OMP_CLAUSE_VECTOR_LENGTH + OMP_CLAUSE_VECTOR_LENGTH, + + /* OpenACC clause: bind ( identifer | string ). */ + OMP_CLAUSE_BIND, + + /* OpenACC clause: nohost. */ + OMP_CLAUSE_NOHOST, + + /* OpenACC clause: tile ( size-expr-list ). */ + OMP_CLAUSE_TILE, + + /* OpenACC clause: device_type ( device-type-list). */ + OMP_CLAUSE_DEVICE_TYPE }; #undef DEFTREESTRUCT diff --git gcc/tree-pretty-print.c gcc/tree-pretty-print.c index d7c049f..5eb4daf 100644 --- gcc/tree-pretty-print.c +++ gcc/tree-pretty-print.c @@ -799,6 +799,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags) case OMP_CLAUSE_INDEPENDENT: pp_string (pp, "independent"); break; + case OMP_CLAUSE_TILE: + pp_string (pp, "tile("); + dump_generic_node (pp, OMP_CLAUSE_TILE_LIST (clause), + spc, flags, false); + pp_right_paren (pp); + break; default: /* Should never happen. */ diff --git gcc/tree.c gcc/tree.c index daf0292..43f80b7 100644 --- gcc/tree.c +++ gcc/tree.c @@ -369,6 +369,10 @@ unsigned const char omp_clause_num_ops[] = 1, /* OMP_CLAUSE_NUM_GANGS */ 1, /* OMP_CLAUSE_NUM_WORKERS */ 1, /* OMP_CLAUSE_VECTOR_LENGTH */ + 1, /* OMP_CLAUSE_BIND */ + 0, /* OMP_CLAUSE_NOHOST */ + 1, /* OMP_CLAUSE_TILE */ + 2 /* OMP_CLAUSE_DEVICE_TYPE */ }; const char * const omp_clause_code_name[] = @@ -427,7 +431,11 @@ const char * const omp_clause_code_name[] = "vector", "num_gangs", "num_workers", - "vector_length" + "vector_length", + "bind", + "nohost", + "tile", + "device_type" }; @@ -11237,6 +11245,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, case OMP_CLAUSE__LOOPTEMP_: case OMP_CLAUSE__SIMDUID_: case OMP_CLAUSE__CILK_FOR_COUNT_: + case OMP_CLAUSE_BIND: WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0)); /* FALLTHRU */ @@ -11255,6 +11264,8 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, case OMP_CLAUSE_TASKGROUP: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_TILE: WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); case OMP_CLAUSE_LASTPRIVATE: diff --git gcc/tree.h gcc/tree.h index e17bd9b..55c5a6d 100644 --- gcc/tree.h +++ gcc/tree.h @@ -1312,15 +1312,6 @@ extern void protected_set_expr_location (tree, location_t); #define OMP_SECTION_LAST(NODE) \ (OMP_SECTION_CHECK (NODE)->base.private_flag) -/* True on an OACC_KERNELS statement if is represents combined kernels loop - directive. */ -#define OACC_KERNELS_COMBINED(NODE) \ - (OACC_KERNELS_CHECK (NODE)->base.private_flag) - -/* Like OACC_KERNELS_COMBINED, but for parallel loop directive. */ -#define OACC_PARALLEL_COMBINED(NODE) \ - (OACC_PARALLEL_CHECK (NODE)->base.private_flag) - /* True on an OMP_PARALLEL statement if it represents an explicit combined parallel work-sharing constructs. */ #define OMP_PARALLEL_COMBINED(NODE) \ @@ -1391,6 +1382,9 @@ extern void protected_set_expr_location (tree, location_t); #define OMP_CLAUSE_VECTOR_LENGTH_EXPR(NODE) \ OMP_CLAUSE_OPERAND ( \ OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_VECTOR_LENGTH), 0) +#define OMP_CLAUSE_BIND_NAME(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_BIND), 0) #define OMP_CLAUSE_DEPEND_KIND(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEPEND)->omp_clause.subcode.depend_kind) @@ -1495,6 +1489,15 @@ extern void protected_set_expr_location (tree, location_t); #define OMP_CLAUSE_DEFAULT_KIND(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEFAULT)->omp_clause.subcode.default_kind) +#define OMP_CLAUSE_TILE_LIST(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 0) + +#define OMP_CLAUSE_DEVICE_TYPE_DEVICES(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE_TYPE), 0) + +#define OMP_CLAUSE_DEVICE_TYPE_CLAUSES(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE_TYPE), 1) + /* SSA_NAME accessors. */ /* Returns the IDENTIFIER_NODE giving the SSA name a name or NULL_TREE diff --git include/gomp-constants.h include/gomp-constants.h index e3d2820..45370b8 100644 --- include/gomp-constants.h +++ include/gomp-constants.h @@ -70,6 +70,10 @@ enum gomp_map_kind /* Is a device pointer. OMP_CLAUSE_SIZE for these is unused; is implicitly POINTER_SIZE_UNITS. */ GOMP_MAP_FORCE_DEVICEPTR = (GOMP_MAP_FLAG_SPECIAL_1 | 0), + /* OpenACC device_resident. */ + GOMP_MAP_DEVICE_RESIDENT = (GOMP_MAP_FLAG_SPECIAL_1 | 1), + /* OpenACC link. */ + GOMP_MAP_LINK = (GOMP_MAP_FLAG_SPECIAL_1 | 2), /* Allocate. */ GOMP_MAP_FORCE_ALLOC = (GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC), /* ..., and copy to device. */ diff --git libgomp/oacc-mem.c libgomp/oacc-mem.c index 89ef5fc..0164b3d 100644 --- libgomp/oacc-mem.c +++ libgomp/oacc-mem.c @@ -479,6 +479,9 @@ update_dev_host (int is_dev, void *h, size_t s) { splay_tree_key n; void *d; + + goacc_lazy_initialize (); + struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; diff --git libgomp/oacc-ptx.h libgomp/oacc-ptx.h index 2419a46..104f297 100644 --- libgomp/oacc-ptx.h +++ libgomp/oacc-ptx.h @@ -424,3 +424,31 @@ "st.param.u32 [%out_retval],%retval;\n" \ "ret;\n" \ "}\n" + + #define GOMP_ATOMIC_PTX \ + ".version 3.1\n" \ + ".target sm_30\n" \ + ".address_size 64\n" \ + ".global .align 4 .u32 libgomp_ptx_lock;\n" \ + ".visible .func GOMP_atomic_start;\n" \ + ".visible .func GOMP_atomic_start\n" \ + "{\n" \ + " .reg .pred %p<2>;\n" \ + " .reg .s32 %r<2>;\n" \ + " .reg .s64 %rd<2>;\n" \ + "BB5_1:\n" \ + " mov.u64 %rd1, libgomp_ptx_lock;\n" \ + " atom.global.cas.b32 %r1, [%rd1], 0, 1;\n" \ + " setp.ne.s32 %p1, %r1, 0;\n" \ + " @%p1 bra BB5_1;\n" \ + " ret;\n" \ + "}\n" \ + ".visible .func GOMP_atomic_end;\n" \ + ".visible .func GOMP_atomic_end\n" \ + "{\n" \ + " .reg .s32 %r<2>;\n" \ + " .reg .s64 %rd<2>;\n" \ + " mov.u64 %rd1, libgomp_ptx_lock;\n" \ + " atom.global.exch.b32 %r1, [%rd1], 0;\n" \ + " ret;\n" \ + "}\n" diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c index 583ec87..ad1163d 100644 --- libgomp/plugin/plugin-nvptx.c +++ libgomp/plugin/plugin-nvptx.c @@ -863,6 +863,16 @@ link_ptx (CUmodule *module, char *ptx_code) cuda_error (r)); } + char *gomp_atomic_ptx = GOMP_ATOMIC_PTX; + r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, gomp_atomic_ptx, + strlen (gomp_atomic_ptx) + 1, 0, 0, 0, 0); + if (r != CUDA_SUCCESS) + { + GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); + GOMP_PLUGIN_fatal ("cuLinkAddData (gomp_atomic_ptx) error: %s", + cuda_error (r)); + } + r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code, strlen (ptx_code) + 1, 0, 0, 0, 0); if (r != CUDA_SUCCESS) Grüße, Thomas