From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 73443 invoked by alias); 5 May 2015 08:56:39 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 73318 invoked by uid 89); 5 May 2015 08:56:38 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.3 required=5.0 tests=AWL,BAYES_00,RCVD_IN_DNSWL_LOW,SPF_PASS,T_FROM_12LTRDOM autolearn=ham version=3.3.2 X-HELO: relay1.mentorg.com Received: from relay1.mentorg.com (HELO relay1.mentorg.com) (192.94.38.131) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 05 May 2015 08:56:36 +0000 Received: from nat-ies.mentorg.com ([192.94.31.2] helo=SVR-IES-FEM-01.mgc.mentorg.com) by relay1.mentorg.com with esmtp id 1YpYeX-00044n-4f from Thomas_Schwinge@mentor.com ; Tue, 05 May 2015 01:56:33 -0700 Received: from feldtkeller.schwinge.homeip.net (137.202.0.76) by SVR-IES-FEM-01.mgc.mentorg.com (137.202.0.104) with Microsoft SMTP Server id 14.3.224.2; Tue, 5 May 2015 09:56:31 +0100 From: Thomas Schwinge To: , Jakub Jelinek CC: Bernd Schmidt , Cesar Philippidis , Chung-Lin Tang , James Norris , Joseph Myers , Julian Brown , Tom de Vries Subject: Next set of OpenACC changes: middle end, libgomp In-Reply-To: <87sibbpfpx.fsf@schwinge.name> References: <87sibbpfpx.fsf@schwinge.name> User-Agent: Notmuch/0.9-101-g81dad07 (http://notmuchmail.org) Emacs/24.3.1 (x86_64-pc-linux-gnu) Date: Tue, 05 May 2015 08:56:00 -0000 Message-ID: <87oalzpflw.fsf@schwinge.name> MIME-Version: 1.0 Content-Type: multipart/signed; boundary="=-=-="; micalg=pgp-sha1; protocol="application/pgp-signature" X-SW-Source: 2015-05/txt/msg00288.txt.bz2 --=-=-= Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable Content-length: 10550 Hi! On Tue, 05 May 2015 10:54:02 +0200, I wrote: > In follow-up messages, I'll be posting the separated parts (for easier > review) of a next set of OpenACC changes that we'd like to commit. > ChangeLog updates not yet written; will do that before commit, obviously. gcc/gimplify.c | 16 +- gcc/omp-low.c | 11 +- gcc/tree-core.h | 14 +- gcc/tree-pretty-print.c | 6 + gcc/tree.c | 13 +- gcc/tree.h | 21 +- include/gomp-constants.h | 4 + libgomp/oacc-mem.c | 3 + libgomp/oacc-ptx.h | 28 + libgomp/plugin/plugin-nvptx.c | 10 + diff --git gcc/gimplify.c gcc/gimplify.c index bda62ce..12efdc8 100644 --- gcc/gimplify.c +++ gcc/gimplify.c @@ -6385,6 +6385,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *= pre_p, case OMP_CLAUSE_MERGEABLE: case OMP_CLAUSE_PROC_BIND: case OMP_CLAUSE_SAFELEN: + case OMP_CLAUSE_TILE: break; =20 case OMP_CLAUSE_ALIGNED: @@ -6770,6 +6771,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, tree = *list_p) case OMP_CLAUSE_VECTOR: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_TILE: break; =20 default: @@ -8410,21 +8412,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimp= le_seq *post_p, break; =20 case OACC_KERNELS: - if (OACC_KERNELS_COMBINED (*expr_p)) - sorry ("directive not yet implemented"); - else - gimplify_omp_workshare (expr_p, pre_p); - ret =3D GS_ALL_DONE; - break; - case OACC_PARALLEL: - if (OACC_PARALLEL_COMBINED (*expr_p)) - sorry ("directive not yet implemented"); - else - gimplify_omp_workshare (expr_p, pre_p); - ret =3D GS_ALL_DONE; - break; - case OACC_DATA: case OMP_SECTIONS: case OMP_SINGLE: diff --git gcc/omp-low.c gcc/omp-low.c index 34e2e5c..6ec5145 100644 --- gcc/omp-low.c +++ gcc/omp-low.c @@ -1928,6 +1928,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_BIND: + case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_TILE: sorry ("Clause not supported yet"); break; =20 @@ -2055,6 +2058,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_BIND: + case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_TILE: sorry ("Clause not supported yet"); break; =20 @@ -2742,7 +2748,10 @@ check_omp_nesting_restrictions (gimple stmt, omp_con= text *ctx) { for (omp_context *ctx_ =3D ctx; ctx_ !=3D NULL; ctx_ =3D ctx_->outer) if (is_gimple_omp (ctx_->stmt) - && is_gimple_omp_oacc (ctx_->stmt)) + && is_gimple_omp_oacc (ctx_->stmt) + /* Except for atomic codes that we share with OpenMP. */ + && ! (gimple_code (stmt) =3D=3D GIMPLE_OMP_ATOMIC_LOAD + || gimple_code (stmt) =3D=3D GIMPLE_OMP_ATOMIC_STORE)) { error_at (gimple_location (stmt), "non-OpenACC construct inside of OpenACC region"); diff --git gcc/tree-core.h gcc/tree-core.h index ad1bb23..ffbccda 100644 --- gcc/tree-core.h +++ gcc/tree-core.h @@ -390,7 +390,19 @@ enum omp_clause_code { OMP_CLAUSE_NUM_WORKERS, =20 /* OpenACC clause: vector_length (integer-expression). */ - OMP_CLAUSE_VECTOR_LENGTH + OMP_CLAUSE_VECTOR_LENGTH, + + /* OpenACC clause: bind ( identifer | string ). */ + OMP_CLAUSE_BIND, + + /* OpenACC clause: nohost. */ + OMP_CLAUSE_NOHOST, + + /* OpenACC clause: tile ( size-expr-list ). */ + OMP_CLAUSE_TILE, + + /* OpenACC clause: device_type ( device-type-list). */ + OMP_CLAUSE_DEVICE_TYPE }; =20 #undef DEFTREESTRUCT diff --git gcc/tree-pretty-print.c gcc/tree-pretty-print.c index d7c049f..5eb4daf 100644 --- gcc/tree-pretty-print.c +++ gcc/tree-pretty-print.c @@ -799,6 +799,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int = spc, int flags) case OMP_CLAUSE_INDEPENDENT: pp_string (pp, "independent"); break; + case OMP_CLAUSE_TILE: + pp_string (pp, "tile("); + dump_generic_node (pp, OMP_CLAUSE_TILE_LIST (clause), + spc, flags, false); + pp_right_paren (pp); + break; =20 default: /* Should never happen. */ diff --git gcc/tree.c gcc/tree.c index daf0292..43f80b7 100644 --- gcc/tree.c +++ gcc/tree.c @@ -369,6 +369,10 @@ unsigned const char omp_clause_num_ops[] =3D 1, /* OMP_CLAUSE_NUM_GANGS */ 1, /* OMP_CLAUSE_NUM_WORKERS */ 1, /* OMP_CLAUSE_VECTOR_LENGTH */ + 1, /* OMP_CLAUSE_BIND */ + 0, /* OMP_CLAUSE_NOHOST */ + 1, /* OMP_CLAUSE_TILE */ + 2 /* OMP_CLAUSE_DEVICE_TYPE */ }; =20 const char * const omp_clause_code_name[] =3D @@ -427,7 +431,11 @@ const char * const omp_clause_code_name[] =3D "vector", "num_gangs", "num_workers", - "vector_length" + "vector_length", + "bind", + "nohost", + "tile", + "device_type" }; =20 =20 @@ -11237,6 +11245,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *dat= a, case OMP_CLAUSE__LOOPTEMP_: case OMP_CLAUSE__SIMDUID_: case OMP_CLAUSE__CILK_FOR_COUNT_: + case OMP_CLAUSE_BIND: WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0)); /* FALLTHRU */ =20 @@ -11255,6 +11264,8 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *dat= a, case OMP_CLAUSE_TASKGROUP: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE_NOHOST: + case OMP_CLAUSE_TILE: WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp)); =20 case OMP_CLAUSE_LASTPRIVATE: diff --git gcc/tree.h gcc/tree.h index e17bd9b..55c5a6d 100644 --- gcc/tree.h +++ gcc/tree.h @@ -1312,15 +1312,6 @@ extern void protected_set_expr_location (tree, locat= ion_t); #define OMP_SECTION_LAST(NODE) \ (OMP_SECTION_CHECK (NODE)->base.private_flag) =20 -/* True on an OACC_KERNELS statement if is represents combined kernels loop - directive. */ -#define OACC_KERNELS_COMBINED(NODE) \ - (OACC_KERNELS_CHECK (NODE)->base.private_flag) - -/* Like OACC_KERNELS_COMBINED, but for parallel loop directive. */ -#define OACC_PARALLEL_COMBINED(NODE) \ - (OACC_PARALLEL_CHECK (NODE)->base.private_flag) - /* True on an OMP_PARALLEL statement if it represents an explicit combined parallel work-sharing constructs. */ #define OMP_PARALLEL_COMBINED(NODE) \ @@ -1391,6 +1382,9 @@ extern void protected_set_expr_location (tree, locati= on_t); #define OMP_CLAUSE_VECTOR_LENGTH_EXPR(NODE) \ OMP_CLAUSE_OPERAND ( \ OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_VECTOR_LENGTH), 0) +#define OMP_CLAUSE_BIND_NAME(NODE) \ + OMP_CLAUSE_OPERAND ( \ + OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_BIND), 0) =20 #define OMP_CLAUSE_DEPEND_KIND(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEPEND)->omp_clause.subcode.= depend_kind) @@ -1495,6 +1489,15 @@ extern void protected_set_expr_location (tree, locat= ion_t); #define OMP_CLAUSE_DEFAULT_KIND(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEFAULT)->omp_clause.subcode= .default_kind) =20 +#define OMP_CLAUSE_TILE_LIST(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 0) + +#define OMP_CLAUSE_DEVICE_TYPE_DEVICES(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE_TY= PE), 0) + +#define OMP_CLAUSE_DEVICE_TYPE_CLAUSES(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE_TY= PE), 1) + /* SSA_NAME accessors. */ =20 /* Returns the IDENTIFIER_NODE giving the SSA name a name or NULL_TREE diff --git include/gomp-constants.h include/gomp-constants.h index e3d2820..45370b8 100644 --- include/gomp-constants.h +++ include/gomp-constants.h @@ -70,6 +70,10 @@ enum gomp_map_kind /* Is a device pointer. OMP_CLAUSE_SIZE for these is unused; is impli= citly POINTER_SIZE_UNITS. */ GOMP_MAP_FORCE_DEVICEPTR =3D (GOMP_MAP_FLAG_SPECIAL_1 | 0), + /* OpenACC device_resident. */ + GOMP_MAP_DEVICE_RESIDENT =3D (GOMP_MAP_FLAG_SPECIAL_1 | 1), + /* OpenACC link. */ + GOMP_MAP_LINK =3D (GOMP_MAP_FLAG_SPECIAL_1 | 2), /* Allocate. */ GOMP_MAP_FORCE_ALLOC =3D (GOMP_MAP_FLAG_FORCE | GOMP_MAP_ALLOC), /* ..., and copy to device. */ diff --git libgomp/oacc-mem.c libgomp/oacc-mem.c index 89ef5fc..0164b3d 100644 --- libgomp/oacc-mem.c +++ libgomp/oacc-mem.c @@ -479,6 +479,9 @@ update_dev_host (int is_dev, void *h, size_t s) { splay_tree_key n; void *d; + + goacc_lazy_initialize (); + struct goacc_thread *thr =3D goacc_thread (); struct gomp_device_descr *acc_dev =3D thr->dev; =20 diff --git libgomp/oacc-ptx.h libgomp/oacc-ptx.h index 2419a46..104f297 100644 --- libgomp/oacc-ptx.h +++ libgomp/oacc-ptx.h @@ -424,3 +424,31 @@ "st.param.u32 [%out_retval],%retval;\n" \ "ret;\n" \ "}\n" + + #define GOMP_ATOMIC_PTX \ + ".version 3.1\n" \ + ".target sm_30\n" \ + ".address_size 64\n" \ + ".global .align 4 .u32 libgomp_ptx_lock;\n" \ + ".visible .func GOMP_atomic_start;\n" \ + ".visible .func GOMP_atomic_start\n" \ + "{\n" \ + " .reg .pred %p<2>;\n" \ + " .reg .s32 %r<2>;\n" \ + " .reg .s64 %rd<2>;\n" \ + "BB5_1:\n" \ + " mov.u64 %rd1, libgomp_ptx_lock;\n" \ + " atom.global.cas.b32 %r1, [%rd1], 0, 1;\n" \ + " setp.ne.s32 %p1, %r1, 0;\n" \ + " @%p1 bra BB5_1;\n" \ + " ret;\n" \ + "}\n" \ + ".visible .func GOMP_atomic_end;\n" \ + ".visible .func GOMP_atomic_end\n" \ + "{\n" \ + " .reg .s32 %r<2>;\n" \ + " .reg .s64 %rd<2>;\n" \ + " mov.u64 %rd1, libgomp_ptx_lock;\n" \ + " atom.global.exch.b32 %r1, [%rd1], 0;\n" \ + " ret;\n" \ + "}\n" diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c index 583ec87..ad1163d 100644 --- libgomp/plugin/plugin-nvptx.c +++ libgomp/plugin/plugin-nvptx.c @@ -863,6 +863,16 @@ link_ptx (CUmodule *module, char *ptx_code) cuda_error (r)); } =20 + char *gomp_atomic_ptx =3D GOMP_ATOMIC_PTX; + r =3D cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, gomp_atomic_ptx, + strlen (gomp_atomic_ptx) + 1, 0, 0, 0, 0); + if (r !=3D CUDA_SUCCESS) + { + GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); + GOMP_PLUGIN_fatal ("cuLinkAddData (gomp_atomic_ptx) error: %s", + cuda_error (r)); + } + r =3D cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, ptx_code, strlen (ptx_code) + 1, 0, 0, 0, 0); if (r !=3D CUDA_SUCCESS) Gr=C3=BC=C3=9Fe, Thomas --=-=-= Content-Type: application/pgp-signature Content-length: 472 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAEBAgAGBQJVSIW7AAoJEPoxNhtoi6COu8gIAITmQ+8qpmZ0Czi/P8bi72/9 TUgPXIpCheFPpL7m7OR2+snA8B3C60Y+QWtfUnpCO3B8knIGmXHjSwE6GYRaZUqy IPX36itQBpQFi3pkpekOLvjzQ73rcw6PocyztypJwslkF00smWr8vPwy46ELedW/ 0FLnxKJy0uuJqx4+BtRyr+sc25iXfhKeQioyAwHTTvc6Lrh/ctxfOXoNTmOBTwgQ 2YWX5yoLXWv5t7AYjKfzQfzbaudUMVS/8qykTLtbfCS3RSKgvr1ZmpIuFXI8lxIV 3FtS3QV+idF3mvbo0k/ys0mHsUD2NLLgZm+/QXAUVLWgjiExZxZIMJhKU/uf0Ps= =DFxW -----END PGP SIGNATURE----- --=-=-=--