From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 95595 invoked by alias); 29 Sep 2015 18:21:31 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 95584 invoked by uid 89); 29 Sep 2015 18:21:31 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=0.9 required=5.0 tests=BAYES_50,FREEMAIL_FROM,KAM_ASCII_DIVIDERS,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=no version=3.3.2 X-HELO: mail-qg0-f45.google.com Received: from mail-qg0-f45.google.com (HELO mail-qg0-f45.google.com) (209.85.192.45) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Tue, 29 Sep 2015 18:21:30 +0000 Received: by qgev79 with SMTP id v79so13879391qge.0 for ; Tue, 29 Sep 2015 11:21:28 -0700 (PDT) X-Received: by 10.140.164.141 with SMTP id k135mr32750216qhk.40.1443550887876; Tue, 29 Sep 2015 11:21:27 -0700 (PDT) Received: from ?IPv6:2601:181:c000:c497:a2a8:cdff:fe3e:b48? ([2601:181:c000:c497:a2a8:cdff:fe3e:b48]) by smtp.googlemail.com with ESMTPSA id 19sm9778011qgo.41.2015.09.29.11.21.26 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 29 Sep 2015 11:21:27 -0700 (PDT) Cc: GCC Patches , Jakub Jelinek To: Bernd Schmidt From: Nathan Sidwell Subject: Fold acc_on_device Message-ID: <560AD6A6.4050109@acm.org> Date: Tue, 29 Sep 2015 18:50:00 -0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------080101080805060206090208" X-SW-Source: 2015-09/txt/msg02246.txt.bz2 This is a multi-part message in MIME format. --------------080101080805060206090208 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-length: 345 This patch folds acc_on_device as a regular builtin, but postponed until we know which compiler we're in. As suggested by Bernd, we use the existing builtin folding machinery. Trunk is still using the older PTX runtime scheme (Thomas is working on that), so the only change there is in the host-side libgomp piece. Ok for trunk? nathan --------------080101080805060206090208 Content-Type: text/x-patch; name="trunk-ondev.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="trunk-ondev.patch" Content-length: 3781 2015-09-29 Nathan Sidwell gcc/ * builtins.c (expand_builtin_acc_on_device): Delete. (expand_builtin): Don't call it. (fold_builtin_1): Fold acc_on_device. libgomp/ * oacc-init.c (acc_on_device): Force optimization level. Index: libgomp/oacc-init.c =================================================================== --- libgomp/oacc-init.c (revision 228250) +++ libgomp/oacc-init.c (working copy) @@ -620,10 +620,12 @@ acc_set_device_num (int ord, acc_device_ ialias (acc_set_device_num) -int +/* Compile on_device with optimization, so that the compiler expands + this, rather than generating infinitely recursive code. */ + +int __attribute__ ((__optimize__ ("O2"))) acc_on_device (acc_device_t dev) { - /* Just rely on the compiler builtin. */ return __builtin_acc_on_device (dev); } Index: gcc/builtins.c =================================================================== --- gcc/builtins.c (revision 228250) +++ gcc/builtins.c (working copy) @@ -5859,46 +5859,6 @@ expand_stack_save (void) } -/* Expand OpenACC acc_on_device. - - This has to happen late (that is, not in early folding; expand_builtin_*, - rather than fold_builtin_*), as we have to act differently for host and - acceleration device (ACCEL_COMPILER conditional). */ - -static rtx -expand_builtin_acc_on_device (tree exp, rtx target) -{ - if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE)) - return NULL_RTX; - - tree arg = CALL_EXPR_ARG (exp, 0); - - /* Return (arg == v1 || arg == v2) ? 1 : 0. */ - machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg)); - rtx v = expand_normal (arg), v1, v2; -#ifdef ACCEL_COMPILER - v1 = GEN_INT (GOMP_DEVICE_NOT_HOST); - v2 = GEN_INT (ACCEL_COMPILER_acc_device); -#else - v1 = GEN_INT (GOMP_DEVICE_NONE); - v2 = GEN_INT (GOMP_DEVICE_HOST); -#endif - machine_mode target_mode = TYPE_MODE (integer_type_node); - if (!target || !register_operand (target, target_mode)) - target = gen_reg_rtx (target_mode); - emit_move_insn (target, const1_rtx); - rtx_code_label *done_label = gen_label_rtx (); - do_compare_rtx_and_jump (v, v1, EQ, false, v_mode, NULL_RTX, - NULL, done_label, PROB_EVEN); - do_compare_rtx_and_jump (v, v2, EQ, false, v_mode, NULL_RTX, - NULL, done_label, PROB_EVEN); - emit_move_insn (target, const0_rtx); - emit_label (done_label); - - return target; -} - - /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient (and in mode MODE if that's convenient). @@ -7036,9 +6996,8 @@ expand_builtin (tree exp, rtx target, rt break; case BUILT_IN_ACC_ON_DEVICE: - target = expand_builtin_acc_on_device (exp, target); - if (target) - return target; + /* Do library call, if we failed to expand the builtin when + folding. */ break; default: /* just do library call, if unknown builtin */ @@ -10271,6 +10230,27 @@ fold_builtin_1 (location_t loc, tree fnd return build_empty_stmt (loc); break; + case BUILT_IN_ACC_ON_DEVICE: + /* Don't fold on_device until we know which compiler is active. */ + if (symtab->state == EXPANSION) + { + unsigned val_host = GOMP_DEVICE_HOST; + unsigned val_dev = GOMP_DEVICE_NONE; + +#ifdef ACCEL_COMPILER + val_host = GOMP_DEVICE_NOT_HOST; + val_dev = ACCEL_COMPILER_acc_device; +#endif + tree host = build2 (EQ_EXPR, boolean_type_node, arg0, + build_int_cst (integer_type_node, val_host)); + tree dev = build2 (EQ_EXPR, boolean_type_node, arg0, + build_int_cst (integer_type_node, val_dev)); + + tree result = build2 (TRUTH_OR_EXPR, boolean_type_node, host, dev); + return fold_convert (integer_type_node, result); + } + break; + default: break; } --------------080101080805060206090208--