From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 90969 invoked by alias); 20 Jul 2015 13:19:29 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 90955 invoked by uid 89); 20 Jul 2015 13:19:29 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-3.1 required=5.0 tests=BAYES_00,RP_MATCHES_RCVD,SPF_PASS autolearn=ham version=3.3.2 X-HELO: fencepost.gnu.org Received: from fencepost.gnu.org (HELO fencepost.gnu.org) (208.118.235.10) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Mon, 20 Jul 2015 13:19:26 +0000 Received: from eggs.gnu.org ([2001:4830:134:3::10]:36949) by fencepost.gnu.org with esmtps (TLS1.0:RSA_AES_256_CBC_SHA1:256) (Exim 4.82) (envelope-from ) id 1ZHAya-0000mM-4x for gcc-patches@gnu.org; Mon, 20 Jul 2015 09:19:24 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZHAyY-0002pF-9Y for gcc-patches@gnu.org; Mon, 20 Jul 2015 09:19:23 -0400 Received: from mail-qg0-x22d.google.com ([2607:f8b0:400d:c04::22d]:36469) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZHAyY-0002kq-3d for gcc-patches@gnu.org; Mon, 20 Jul 2015 09:19:22 -0400 Received: by qgy5 with SMTP id 5so72137757qgy.3 for ; Mon, 20 Jul 2015 06:19:20 -0700 (PDT) X-Received: by 10.140.85.208 with SMTP id n74mr45279396qgd.67.1437398360734; Mon, 20 Jul 2015 06:19:20 -0700 (PDT) Received: from ?IPv6:2601:181:c000:c497:a2a8:cdff:fe3e:b48? ([2601:181:c000:c497:a2a8:cdff:fe3e:b48]) by smtp.googlemail.com with ESMTPSA id v74sm844948qge.27.2015.07.20.06.19.19 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 20 Jul 2015 06:19:19 -0700 (PDT) Subject: Re: [gomp4] New nvptx pattern and internal builtin To: Bernd Schmidt , gcc-patches@gnu.org References: <55A92142.6050008@codesourcery.com> From: Nathan Sidwell Message-ID: <55ACF556.5000306@acm.org> Date: Mon, 20 Jul 2015 13:45:00 -0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.1.0 MIME-Version: 1.0 In-Reply-To: <55A92142.6050008@codesourcery.com> Content-Type: multipart/mixed; boundary="------------010603030806090304000902" X-detected-operating-system: by eggs.gnu.org: Error: Malformed IPv6 address (bad octet value). X-Received-From: 2607:f8b0:400d:c04::22d X-SW-Source: 2015-07/txt/msg01647.txt.bz2 This is a multi-part message in MIME format. --------------010603030806090304000902 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-length: 732 On 07/17/15 11:37, Bernd Schmidt wrote: > I've made this change at the request of Cesar who says it's needed for his > reductions work. It makes a new instruction to represent shfl.down, a thread > communication instruction, and some builtin functions for internal use to access > it. I was looking at adding another target builtin, and found this code rather convoluted. It seemed to have been cloned from somewhere more complicated -- for instance, nvptx_expand_binop_builtin's comment discusses a MACFLAG argument, which is nowhere to be seen. I ended up reimplementing using a single array describing the builtins and allowing direct indexing using the builtin number, rather than iteration when expanding. ok? nathan --------------010603030806090304000902 Content-Type: text/x-patch; name="gomp4-blt.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="gomp4-blt.patch" Content-length: 6832 2015-07-20 Nathan Sidwell * config/nvptx/nvptx.c (nvptx_builtins): Delete enum. (nvptx_types): New enum. (builtin_description): Add type and num_args fields. (builtins): New array describing builtins. (NVPTX_BUILTIN_MAX): Define. (def_builtin): Delete. (nvptx_init_builtins): Reimplement using builtins array. (nvptx_expand_binop_builtin): Delete. (bdesc_2arg): Delete. (nvptx_expand_builtin): Reimplement using builtins array. Index: config/nvptx/nvptx.c =================================================================== --- config/nvptx/nvptx.c (revision 225992) +++ config/nvptx/nvptx.c (working copy) @@ -3058,16 +3058,34 @@ nvptx_file_end (void) } } -/* Codes for all the NVPTX builtins. */ -enum nvptx_builtins +enum nvptx_types + { + NT_UINT_UINT_INT, + NT_ULL_ULL_INT, + NT_FLT_FLT_INT, + + NT_MAX + }; + +struct builtin_description { - NVPTX_BUILTIN_SHUFFLE_DOWN, - NVPTX_BUILTIN_SHUFFLE_DOWNF, - NVPTX_BUILTIN_SHUFFLE_DOWNLL, + const char *name; + enum insn_code icode; + unsigned short type; + unsigned short num_args; +}; - NVPTX_BUILTIN_MAX +static const struct builtin_description builtins[] = +{ + {"__builtin_nvptx_shuffle_down", CODE_FOR_thread_shuffle_downsi, + NT_UINT_UINT_INT, 2}, + {"__builtin_nvptx_shuffle_downf", CODE_FOR_thread_shuffle_downsf, + NT_FLT_FLT_INT, 2}, + { "__builtin_nvptx_shuffle_downll", CODE_FOR_thread_shuffle_downdi, + NT_ULL_ULL_INT, 2}, }; +#define NVPTX_BUILTIN_MAX (sizeof (builtins) / sizeof (builtins[0])) static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX]; @@ -3081,92 +3099,30 @@ nvptx_builtin_decl (unsigned code, bool return nvptx_builtin_decls[code]; } -#define def_builtin(NAME, TYPE, CODE) \ -do { \ - tree bdecl; \ - bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \ - NULL, NULL_TREE); \ - nvptx_builtin_decls[CODE] = bdecl; \ -} while (0) - /* Set up all builtin functions for this target. */ static void nvptx_init_builtins (void) -{ - tree uint_ftype_uint_int +{ + tree types[NT_MAX]; + unsigned ix; + + types[NT_UINT_UINT_INT] = build_function_type_list (unsigned_type_node, unsigned_type_node, integer_type_node, NULL_TREE); - tree ull_ftype_ull_int + types[NT_ULL_ULL_INT] = build_function_type_list (long_long_unsigned_type_node, long_long_unsigned_type_node, integer_type_node, NULL_TREE); - tree float_ftype_float_int + types[NT_FLT_FLT_INT] = build_function_type_list (float_type_node, float_type_node, integer_type_node, NULL_TREE); - def_builtin ("__builtin_nvptx_shuffle_down", uint_ftype_uint_int, - NVPTX_BUILTIN_SHUFFLE_DOWN); - def_builtin ("__builtin_nvptx_shuffle_downf", float_ftype_float_int, - NVPTX_BUILTIN_SHUFFLE_DOWNF); - def_builtin ("__builtin_nvptx_shuffle_downll", ull_ftype_ull_int, - NVPTX_BUILTIN_SHUFFLE_DOWNLL); -} - -/* Subroutine of nvptx_expand_builtin to take care of binop insns. MACFLAG is -1 - if this is a normal binary op, or one of the MACFLAG_xxx constants. */ - -static rtx -nvptx_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); - rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL); - machine_mode op0mode = GET_MODE (op0); - machine_mode op1mode = GET_MODE (op1); - machine_mode tmode = insn_data[icode].operand[0].mode; - machine_mode mode0 = insn_data[icode].operand[1].mode; - machine_mode mode1 = insn_data[icode].operand[2].mode; - rtx ret = target; - - if (! target - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - - gcc_assert ((op0mode == mode0 || op0mode == VOIDmode) - && (op1mode == mode1 || op1mode == VOIDmode)); - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - pat = GEN_FCN (icode) (target, op0, op1); - - if (! pat) - return 0; - - emit_insn (pat); - - return ret; + for (ix = 0; ix != NVPTX_BUILTIN_MAX; ix++) + nvptx_builtin_decls[ix] + = add_builtin_function (builtins[ix].name, types[builtins[ix].type], + ix, BUILT_IN_MD, NULL, NULL_TREE); } - -struct builtin_description -{ - const enum insn_code icode; - const char *const name; - const enum nvptx_builtins code; -}; - -static const struct builtin_description bdesc_2arg[] = -{ - { CODE_FOR_thread_shuffle_downsi, "__builtin_nvptx_shuffle_down", NVPTX_BUILTIN_SHUFFLE_DOWN }, - { CODE_FOR_thread_shuffle_downsf, "__builtin_nvptx_shuffle_downf", NVPTX_BUILTIN_SHUFFLE_DOWNF }, - { CODE_FOR_thread_shuffle_downdi, "__builtin_nvptx_shuffle_downll", NVPTX_BUILTIN_SHUFFLE_DOWNLL } -}; - /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient (and in mode MODE if that's convenient). @@ -3174,21 +3130,41 @@ static const struct builtin_description IGNORE is nonzero if the value is to be ignored. */ static rtx -nvptx_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, +nvptx_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, - machine_mode mode ATTRIBUTE_UNUSED, - int ignore ATTRIBUTE_UNUSED) + machine_mode mode, + int ignore) { - size_t i; - const struct builtin_description *d; tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + const struct builtin_description *d = &builtins[DECL_FUNCTION_CODE (fndecl)]; + unsigned icode = d->icode; + rtx operands[2]; /* maxium operands */ + unsigned ix; + machine_mode tmode = insn_data[icode].operand[0].mode; + + if (ignore) + return target; + + if (! target + || mode != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + for (ix = d->num_args; ix--;) + { + machine_mode m = insn_data[icode].operand[ix + 1].mode; + rtx op = expand_expr (CALL_EXPR_ARG (exp, ix), + NULL_RTX, VOIDmode, EXPAND_NORMAL); + if (! (*insn_data[icode].operand[ix + 1].predicate) (op, m)) + op = copy_to_mode_reg (m, op); + operands[ix] = op; + } - for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) - if (d->code == fcode) - return nvptx_expand_binop_builtin (d->icode, exp, target); + rtx pat = GEN_FCN (icode) (target, operands[0], operands[1]); + if (pat) + emit_insn (pat); - gcc_unreachable (); + return target; } #undef TARGET_OPTION_OVERRIDE --------------010603030806090304000902--