From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2119) id 3C7983858C50; Mon, 17 Oct 2022 23:36:55 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 3C7983858C50 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1666049815; bh=iZ087QlmiAAL9dkiikIv55nZS1vEAN11PB5s2Zg9ClU=; h=From:To:Subject:Date:From; b=OEUYM6zKvh9gk3IrsAZAYLKzwuqAWr69jiWyfZCB4nT19O0o9GYyk8xn0xCivzuQv hr1w9xQ7xkbCYpmLA/c7v1OwvSoUIq7CaJsGilh1Re8RKGYQYkfwRKT4g/gFLZafgt mzZTAu5wfRsYIqfRM74WZ5oDkLECEbHv1AwteeKk= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Jeff Law To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-3346] Remove accidential commits X-Act-Checkin: gcc X-Git-Author: Jeff Law X-Git-Refname: refs/heads/master X-Git-Oldrev: 566c5f1aaae120d2283103e68ecf1c1a83dd4459 X-Git-Newrev: f6e93b7b48195037d6c545104c952b97e05ad381 Message-Id: <20221017233655.3C7983858C50@sourceware.org> Date: Mon, 17 Oct 2022 23:36:55 +0000 (GMT) List-Id: https://gcc.gnu.org/g:f6e93b7b48195037d6c545104c952b97e05ad381 commit r13-3346-gf6e93b7b48195037d6c545104c952b97e05ad381 Author: Jeff Law Date: Mon Oct 17 17:33:52 2022 -0600 Remove accidential commits gcc/ * config/i386/cet.c: Remove accidental commit. * config/i386/driver-mingw32.c: Likewise. * config/i386/i386-builtins.c: Likewise. * config/i386/i386-d.c: Likewise. * config/i386/i386-expand.c: Likewise. * config/i386/i386-features.c: Likewise. * config/i386/i386-options.c: Likewise. * config/i386/t-cet: Likewise. * config/i386/x86-tune-sched-atom.c: Likewise. * config/i386/x86-tune-sched-bd.c: Likewise. * config/i386/x86-tune-sched-core.c: Likewise. * config/i386/x86-tune-sched.c: Likewise. Diff: --- gcc/config/i386/cet.c | 76 - gcc/config/i386/driver-mingw32.c | 28 - gcc/config/i386/i386-builtins.c | 2546 ---- gcc/config/i386/i386-d.c | 44 - gcc/config/i386/i386-expand.c | 20310 -------------------------------- gcc/config/i386/i386-features.c | 2884 ----- gcc/config/i386/i386-options.c | 3799 ------ gcc/config/i386/t-cet | 21 - gcc/config/i386/x86-tune-sched-atom.c | 246 - gcc/config/i386/x86-tune-sched-bd.c | 824 -- gcc/config/i386/x86-tune-sched-core.c | 257 - gcc/config/i386/x86-tune-sched.c | 636 - 12 files changed, 31671 deletions(-) diff --git a/gcc/config/i386/cet.c b/gcc/config/i386/cet.c deleted file mode 100644 index 5450ac307d5..00000000000 --- a/gcc/config/i386/cet.c +++ /dev/null @@ -1,76 +0,0 @@ -/* Functions for CET/x86. - Copyright (C) 2017-2020 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "output.h" -#include "linux-common.h" - -void -file_end_indicate_exec_stack_and_cet (void) -{ - file_end_indicate_exec_stack (); - - if (flag_cf_protection == CF_NONE) - return; - - unsigned int feature_1 = 0; - - if (flag_cf_protection & CF_BRANCH) - /* GNU_PROPERTY_X86_FEATURE_1_IBT. */ - feature_1 |= 0x1; - - if (flag_cf_protection & CF_RETURN) - /* GNU_PROPERTY_X86_FEATURE_1_SHSTK. */ - feature_1 |= 0x2; - - if (feature_1) - { - int p2align = ptr_mode == SImode ? 2 : 3; - - /* Generate GNU_PROPERTY_X86_FEATURE_1_XXX. */ - switch_to_section (get_section (".note.gnu.property", - SECTION_NOTYPE, NULL)); - - ASM_OUTPUT_ALIGN (asm_out_file, p2align); - /* name length. */ - fprintf (asm_out_file, ASM_LONG " 1f - 0f\n"); - /* data length. */ - fprintf (asm_out_file, ASM_LONG " 4f - 1f\n"); - /* note type: NT_GNU_PROPERTY_TYPE_0. */ - fprintf (asm_out_file, ASM_LONG " 5\n"); - fprintf (asm_out_file, "0:\n"); - /* vendor name: "GNU". */ - fprintf (asm_out_file, STRING_ASM_OP " \"GNU\"\n"); - fprintf (asm_out_file, "1:\n"); - ASM_OUTPUT_ALIGN (asm_out_file, p2align); - /* pr_type: GNU_PROPERTY_X86_FEATURE_1_AND. */ - fprintf (asm_out_file, ASM_LONG " 0xc0000002\n"); - /* pr_datasz. */\ - fprintf (asm_out_file, ASM_LONG " 3f - 2f\n"); - fprintf (asm_out_file, "2:\n"); - /* GNU_PROPERTY_X86_FEATURE_1_XXX. */ - fprintf (asm_out_file, ASM_LONG " 0x%x\n", feature_1); - fprintf (asm_out_file, "3:\n"); - ASM_OUTPUT_ALIGN (asm_out_file, p2align); - fprintf (asm_out_file, "4:\n"); - } -} diff --git a/gcc/config/i386/driver-mingw32.c b/gcc/config/i386/driver-mingw32.c deleted file mode 100644 index d0517e6759d..00000000000 --- a/gcc/config/i386/driver-mingw32.c +++ /dev/null @@ -1,28 +0,0 @@ -/* Host OS specific configuration for the gcc driver. - Copyright (C) 2017-2020 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -. */ - -#define IN_TARGET_CODE 1 - -#include "config.h" - -/* When defined, force the use (if non null) or not (otherwise) of CLI - globbing. */ -#ifdef MINGW_DOWILDCARD -int _dowildcard = MINGW_DOWILDCARD; -#endif diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c deleted file mode 100644 index be3ed0158f2..00000000000 --- a/gcc/config/i386/i386-builtins.c +++ /dev/null @@ -1,2546 +0,0 @@ -/* Copyright (C) 1988-2020 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -. */ - -#define IN_TARGET_CODE 1 - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "backend.h" -#include "rtl.h" -#include "tree.h" -#include "memmodel.h" -#include "gimple.h" -#include "cfghooks.h" -#include "cfgloop.h" -#include "df.h" -#include "tm_p.h" -#include "stringpool.h" -#include "expmed.h" -#include "optabs.h" -#include "regs.h" -#include "emit-rtl.h" -#include "recog.h" -#include "cgraph.h" -#include "diagnostic.h" -#include "cfgbuild.h" -#include "alias.h" -#include "fold-const.h" -#include "attribs.h" -#include "calls.h" -#include "stor-layout.h" -#include "varasm.h" -#include "output.h" -#include "insn-attr.h" -#include "flags.h" -#include "except.h" -#include "explow.h" -#include "expr.h" -#include "cfgrtl.h" -#include "common/common-target.h" -#include "langhooks.h" -#include "reload.h" -#include "gimplify.h" -#include "dwarf2.h" -#include "tm-constrs.h" -#include "cselib.h" -#include "sched-int.h" -#include "opts.h" -#include "tree-pass.h" -#include "context.h" -#include "pass_manager.h" -#include "target-globals.h" -#include "gimple-iterator.h" -#include "tree-vectorizer.h" -#include "shrink-wrap.h" -#include "builtins.h" -#include "rtl-iter.h" -#include "tree-iterator.h" -#include "dbgcnt.h" -#include "case-cfn-macros.h" -#include "dojump.h" -#include "fold-const-call.h" -#include "tree-vrp.h" -#include "tree-ssanames.h" -#include "selftest.h" -#include "selftest-rtl.h" -#include "print-rtl.h" -#include "intl.h" -#include "ifcvt.h" -#include "symbol-summary.h" -#include "ipa-prop.h" -#include "ipa-fnsummary.h" -#include "wide-int-bitmask.h" -#include "tree-vector-builder.h" -#include "debug.h" -#include "dwarf2out.h" -#include "i386-builtins.h" - -#undef BDESC -#undef BDESC_FIRST -#undef BDESC_END - -/* Macros for verification of enum ix86_builtins order. */ -#define BDESC_VERIFY(x, y, z) \ - gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z))) -#define BDESC_VERIFYS(x, y, z) \ - STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z))) - -BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST, - IX86_BUILTIN__BDESC_COMI_LAST, 1); -BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST, - IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1); -BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, - IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1); -BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST, - IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1); -BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, - IX86_BUILTIN__BDESC_ARGS_LAST, 1); -BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, - IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1); -BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST, - IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1); -BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, - IX86_BUILTIN__BDESC_CET_LAST, 1); -BDESC_VERIFYS (IX86_BUILTIN_MAX, - IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1); - - -/* Table for the ix86 builtin non-function types. */ -static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1]; - -/* Retrieve an element from the above table, building some of - the types lazily. */ - -static tree -ix86_get_builtin_type (enum ix86_builtin_type tcode) -{ - unsigned int index; - tree type, itype; - - gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab)); - - type = ix86_builtin_type_tab[(int) tcode]; - if (type != NULL) - return type; - - gcc_assert (tcode > IX86_BT_LAST_PRIM); - if (tcode <= IX86_BT_LAST_VECT) - { - machine_mode mode; - - index = tcode - IX86_BT_LAST_PRIM - 1; - itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]); - mode = ix86_builtin_type_vect_mode[index]; - - type = build_vector_type_for_mode (itype, mode); - } - else - { - int quals; - - index = tcode - IX86_BT_LAST_VECT - 1; - if (tcode <= IX86_BT_LAST_PTR) - quals = TYPE_UNQUALIFIED; - else - quals = TYPE_QUAL_CONST; - - itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]); - if (quals != TYPE_UNQUALIFIED) - itype = build_qualified_type (itype, quals); - - type = build_pointer_type (itype); - } - - ix86_builtin_type_tab[(int) tcode] = type; - return type; -} - -/* Table for the ix86 builtin function types. */ -static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1]; - -/* Retrieve an element from the above table, building some of - the types lazily. */ - -static tree -ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode) -{ - tree type; - - gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab)); - - type = ix86_builtin_func_type_tab[(int) tcode]; - if (type != NULL) - return type; - - if (tcode <= IX86_BT_LAST_FUNC) - { - unsigned start = ix86_builtin_func_start[(int) tcode]; - unsigned after = ix86_builtin_func_start[(int) tcode + 1]; - tree rtype, atype, args = void_list_node; - unsigned i; - - rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]); - for (i = after - 1; i > start; --i) - { - atype = ix86_get_builtin_type (ix86_builtin_func_args[i]); - args = tree_cons (NULL, atype, args); - } - - type = build_function_type (rtype, args); - } - else - { - unsigned index = tcode - IX86_BT_LAST_FUNC - 1; - enum ix86_builtin_func_type icode; - - icode = ix86_builtin_func_alias_base[index]; - type = ix86_get_builtin_func_type (icode); - } - - ix86_builtin_func_type_tab[(int) tcode] = type; - return type; -} - -/* Table for the ix86 builtin decls. */ -static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; - -struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; - -tree get_ix86_builtin (enum ix86_builtins c) -{ - return ix86_builtins[c]; -} - -/* Bits that can still enable any inclusion of a builtin. */ -HOST_WIDE_INT deferred_isa_values = 0; -HOST_WIDE_INT deferred_isa_values2 = 0; - -/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the - MASK and MASK2 of which isa_flags and ix86_isa_flags2 to use in the - ix86_builtins_isa array. Stores the function decl in the ix86_builtins - array. Returns the function decl or NULL_TREE, if the builtin was not - added. - - If the front end has a special hook for builtin functions, delay adding - builtin functions that aren't in the current ISA until the ISA is changed - with function specific optimization. Doing so, can save about 300K for the - default compiler. When the builtin is expanded, check at that time whether - it is valid. - - If the front end doesn't have a special hook, record all builtins, even if - it isn't an instruction set in the current ISA in case the user uses - function specific options for a different ISA, so that we don't get scope - errors if a builtin is added in the middle of a function scope. */ - -static inline tree -def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, - const char *name, - enum ix86_builtin_func_type tcode, - enum ix86_builtins code) -{ - tree decl = NULL_TREE; - - /* An instruction may be 64bit only regardless of ISAs. */ - if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) - { - ix86_builtins_isa[(int) code].isa = mask; - ix86_builtins_isa[(int) code].isa2 = mask2; - - mask &= ~OPTION_MASK_ISA_64BIT; - - /* Filter out the masks most often ored together with others. */ - if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL) - && mask != OPTION_MASK_ISA_AVX512VL) - mask &= ~OPTION_MASK_ISA_AVX512VL; - if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512BW) - && mask != OPTION_MASK_ISA_AVX512BW) - mask &= ~OPTION_MASK_ISA_AVX512BW; - - if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) - && (mask == 0 || (mask & ix86_isa_flags) != 0)) - || ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE) - || (lang_hooks.builtin_function - == lang_hooks.builtin_function_ext_scope)) - { - tree type = ix86_get_builtin_func_type (tcode); - decl = add_builtin_function (name, type, code, BUILT_IN_MD, - NULL, NULL_TREE); - ix86_builtins[(int) code] = decl; - ix86_builtins_isa[(int) code].set_and_not_built_p = false; - } - else - { - /* Just MASK and MASK2 where set_and_not_built_p == true can potentially - include a builtin. */ - deferred_isa_values |= mask; - deferred_isa_values2 |= mask2; - ix86_builtins[(int) code] = NULL_TREE; - ix86_builtins_isa[(int) code].tcode = tcode; - ix86_builtins_isa[(int) code].name = name; - ix86_builtins_isa[(int) code].const_p = false; - ix86_builtins_isa[(int) code].pure_p = false; - ix86_builtins_isa[(int) code].set_and_not_built_p = true; - } - } - - return decl; -} - -/* Like def_builtin, but also marks the function decl "const". */ - -static inline tree -def_builtin_const (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name, - enum ix86_builtin_func_type tcode, enum ix86_builtins code) -{ - tree decl = def_builtin (mask, mask2, name, tcode, code); - if (decl) - TREE_READONLY (decl) = 1; - else - ix86_builtins_isa[(int) code].const_p = true; - - return decl; -} - -/* Like def_builtin, but also marks the function decl "pure". */ - -static inline tree -def_builtin_pure (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name, - enum ix86_builtin_func_type tcode, enum ix86_builtins code) -{ - tree decl = def_builtin (mask, mask2, name, tcode, code); - if (decl) - DECL_PURE_P (decl) = 1; - else - ix86_builtins_isa[(int) code].pure_p = true; - - return decl; -} - -/* Add any new builtin functions for a given ISA that may not have been - declared. This saves a bit of space compared to adding all of the - declarations to the tree, even if we didn't use them. */ - -void -ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2) -{ - isa &= ~OPTION_MASK_ISA_64BIT; - - if ((isa & deferred_isa_values) == 0 - && (isa2 & deferred_isa_values2) == 0 - && ((deferred_isa_values & OPTION_MASK_ISA_MMX) == 0 - || !(TARGET_64BIT && (isa & OPTION_MASK_ISA_SSE2) != 0))) - return; - - /* Bits in ISA value can be removed from potential isa values. */ - deferred_isa_values &= ~isa; - deferred_isa_values2 &= ~isa2; - if (TARGET_64BIT && (isa & OPTION_MASK_ISA_SSE2) != 0) - deferred_isa_values &= ~OPTION_MASK_ISA_MMX; - - int i; - tree saved_current_target_pragma = current_target_pragma; - current_target_pragma = NULL_TREE; - - for (i = 0; i < (int)IX86_BUILTIN_MAX; i++) - { - if (((ix86_builtins_isa[i].isa & isa) != 0 - || (ix86_builtins_isa[i].isa2 & isa2) != 0 - || ((ix86_builtins_isa[i].isa & OPTION_MASK_ISA_MMX) != 0 - && TARGET_64BIT - && (isa & OPTION_MASK_ISA_SSE2) != 0)) - && ix86_builtins_isa[i].set_and_not_built_p) - { - tree decl, type; - - /* Don't define the builtin again. */ - ix86_builtins_isa[i].set_and_not_built_p = false; - - type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode); - decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name, - type, i, BUILT_IN_MD, NULL, - NULL_TREE); - - ix86_builtins[i] = decl; - if (ix86_builtins_isa[i].const_p) - TREE_READONLY (decl) = 1; - } - } - - current_target_pragma = saved_current_target_pragma; -} - -/* TM vector builtins. */ - -/* Reuse the existing x86-specific `struct builtin_description' cause - we're lazy. Add casts to make them fit. */ -static const struct builtin_description bdesc_tm[] = -{ - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, - - { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, - { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, - { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, - { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, - { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, - { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, - { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, - - { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, - { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, - { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, - { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, - { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, - { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, - { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, - - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID }, - { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID }, - { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID }, -}; - -/* Initialize the transactional memory vector load/store builtins. */ - -static void -ix86_init_tm_builtins (void) -{ - enum ix86_builtin_func_type ftype; - const struct builtin_description *d; - size_t i; - tree decl; - tree attrs_load, attrs_type_load, attrs_store, attrs_type_store; - tree attrs_log, attrs_type_log; - - if (!flag_tm) - return; - - /* If there are no builtins defined, we must be compiling in a - language without trans-mem support. */ - if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1)) - return; - - /* Use whatever attributes a normal TM load has. */ - decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1); - attrs_load = DECL_ATTRIBUTES (decl); - attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl)); - /* Use whatever attributes a normal TM store has. */ - decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1); - attrs_store = DECL_ATTRIBUTES (decl); - attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl)); - /* Use whatever attributes a normal TM log has. */ - decl = builtin_decl_explicit (BUILT_IN_TM_LOG); - attrs_log = DECL_ATTRIBUTES (decl); - attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl)); - - for (i = 0, d = bdesc_tm; - i < ARRAY_SIZE (bdesc_tm); - i++, d++) - { - if ((d->mask & ix86_isa_flags) != 0 - || ((d->mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE) - || (lang_hooks.builtin_function - == lang_hooks.builtin_function_ext_scope)) - { - tree type, attrs, attrs_type; - enum built_in_function code = (enum built_in_function) d->code; - - ftype = (enum ix86_builtin_func_type) d->flag; - type = ix86_get_builtin_func_type (ftype); - - if (BUILTIN_TM_LOAD_P (code)) - { - attrs = attrs_load; - attrs_type = attrs_type_load; - } - else if (BUILTIN_TM_STORE_P (code)) - { - attrs = attrs_store; - attrs_type = attrs_type_store; - } - else - { - attrs = attrs_log; - attrs_type = attrs_type_log; - } - decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL, - /* The builtin without the prefix for - calling it directly. */ - d->name + strlen ("__builtin_"), - attrs); - /* add_builtin_function() will set the DECL_ATTRIBUTES, now - set the TYPE_ATTRIBUTES. */ - decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN); - - set_builtin_decl (code, decl, false); - } - } -} - -/* Set up all the MMX/SSE builtins, even builtins for instructions that are not - in the current target ISA to allow the user to compile particular modules - with different target specific options that differ from the command line - options. */ -static void -ix86_init_mmx_sse_builtins (void) -{ - const struct builtin_description * d; - enum ix86_builtin_func_type ftype; - size_t i; - - /* Add all special builtins with variable number of operands. */ - for (i = 0, d = bdesc_special_args; - i < ARRAY_SIZE (bdesc_special_args); - i++, d++) - { - BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i); - if (d->name == 0) - continue; - - ftype = (enum ix86_builtin_func_type) d->flag; - def_builtin (d->mask, d->mask2, d->name, ftype, d->code); - } - BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, - ARRAY_SIZE (bdesc_special_args) - 1); - - /* Add all builtins with variable number of operands. */ - for (i = 0, d = bdesc_args; - i < ARRAY_SIZE (bdesc_args); - i++, d++) - { - BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i); - if (d->name == 0) - continue; - - ftype = (enum ix86_builtin_func_type) d->flag; - def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); - } - BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST, - IX86_BUILTIN__BDESC_ARGS_FIRST, - ARRAY_SIZE (bdesc_args) - 1); - - /* Add all builtins with rounding. */ - for (i = 0, d = bdesc_round_args; - i < ARRAY_SIZE (bdesc_round_args); - i++, d++) - { - BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i); - if (d->name == 0) - continue; - - ftype = (enum ix86_builtin_func_type) d->flag; - def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); - } - BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, - ARRAY_SIZE (bdesc_round_args) - 1); - - /* pcmpestr[im] insns. */ - for (i = 0, d = bdesc_pcmpestr; - i < ARRAY_SIZE (bdesc_pcmpestr); - i++, d++) - { - BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i); - if (d->code == IX86_BUILTIN_PCMPESTRM128) - ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT; - else - ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT; - def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); - } - BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST, - IX86_BUILTIN__BDESC_PCMPESTR_FIRST, - ARRAY_SIZE (bdesc_pcmpestr) - 1); - - /* pcmpistr[im] insns. */ - for (i = 0, d = bdesc_pcmpistr; - i < ARRAY_SIZE (bdesc_pcmpistr); - i++, d++) - { - BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i); - if (d->code == IX86_BUILTIN_PCMPISTRM128) - ftype = V16QI_FTYPE_V16QI_V16QI_INT; - else - ftype = INT_FTYPE_V16QI_V16QI_INT; - def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); - } - BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST, - IX86_BUILTIN__BDESC_PCMPISTR_FIRST, - ARRAY_SIZE (bdesc_pcmpistr) - 1); - - /* comi/ucomi insns. */ - for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) - { - BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i); - if (d->mask == OPTION_MASK_ISA_SSE2) - ftype = INT_FTYPE_V2DF_V2DF; - else - ftype = INT_FTYPE_V4SF_V4SF; - def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); - } - BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST, - IX86_BUILTIN__BDESC_COMI_FIRST, - ARRAY_SIZE (bdesc_comi) - 1); - - /* SSE */ - def_builtin (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_ldmxcsr", - VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR); - def_builtin_pure (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_stmxcsr", - UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR); - - /* SSE or 3DNow!A */ - def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A - /* As it uses V4HImode, we have to require -mmmx too. */ - | OPTION_MASK_ISA_MMX, 0, - "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR, - IX86_BUILTIN_MASKMOVQ); - - /* SSE2 */ - def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_maskmovdqu", - VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU); - - def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_clflush", - VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH); - x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_mfence", - VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE); - - /* SSE3. */ - def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_monitor", - VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR); - def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_mwait", - VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT); - - /* AES */ - def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, - "__builtin_ia32_aesenc128", - V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128); - def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, - "__builtin_ia32_aesenclast128", - V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128); - def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, - "__builtin_ia32_aesdec128", - V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128); - def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, - "__builtin_ia32_aesdeclast128", - V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128); - def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, - "__builtin_ia32_aesimc128", - V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128); - def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, - "__builtin_ia32_aeskeygenassist128", - V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128); - - /* PCLMUL */ - def_builtin_const (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2, 0, - "__builtin_ia32_pclmulqdq128", - V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128); - - /* RDRND */ - def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand16_step", - INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP); - def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand32_step", - INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP); - def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, 0, - "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG, - IX86_BUILTIN_RDRAND64_STEP); - - /* AVX2 */ - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2df", - V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT, - IX86_BUILTIN_GATHERSIV2DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4df", - V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT, - IX86_BUILTIN_GATHERSIV4DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2df", - V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT, - IX86_BUILTIN_GATHERDIV2DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4df", - V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT, - IX86_BUILTIN_GATHERDIV4DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4sf", - V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT, - IX86_BUILTIN_GATHERSIV4SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8sf", - V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT, - IX86_BUILTIN_GATHERSIV8SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf", - V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT, - IX86_BUILTIN_GATHERDIV4SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf256", - V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT, - IX86_BUILTIN_GATHERDIV8SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2di", - V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT, - IX86_BUILTIN_GATHERSIV2DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4di", - V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT, - IX86_BUILTIN_GATHERSIV4DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2di", - V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT, - IX86_BUILTIN_GATHERDIV2DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4di", - V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT, - IX86_BUILTIN_GATHERDIV4DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4si", - V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT, - IX86_BUILTIN_GATHERSIV4SI); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8si", - V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT, - IX86_BUILTIN_GATHERSIV8SI); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si", - V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT, - IX86_BUILTIN_GATHERDIV4SI); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si256", - V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT, - IX86_BUILTIN_GATHERDIV8SI); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4df ", - V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT, - IX86_BUILTIN_GATHERALTSIV4DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8sf ", - V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT, - IX86_BUILTIN_GATHERALTDIV8SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4di ", - V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT, - IX86_BUILTIN_GATHERALTSIV4DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8si ", - V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT, - IX86_BUILTIN_GATHERALTDIV8SI); - - /* AVX512F */ - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16sf", - V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT, - IX86_BUILTIN_GATHER3SIV16SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8df", - V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT, - IX86_BUILTIN_GATHER3SIV8DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16sf", - V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT, - IX86_BUILTIN_GATHER3DIV16SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8df", - V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT, - IX86_BUILTIN_GATHER3DIV8DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16si", - V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT, - IX86_BUILTIN_GATHER3SIV16SI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8di", - V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT, - IX86_BUILTIN_GATHER3SIV8DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16si", - V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT, - IX86_BUILTIN_GATHER3DIV16SI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8di", - V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT, - IX86_BUILTIN_GATHER3DIV8DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8df ", - V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, - IX86_BUILTIN_GATHER3ALTSIV8DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16sf ", - V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, - IX86_BUILTIN_GATHER3ALTDIV16SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8di ", - V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, - IX86_BUILTIN_GATHER3ALTSIV8DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16si ", - V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, - IX86_BUILTIN_GATHER3ALTDIV16SI); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16sf", - VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT, - IX86_BUILTIN_SCATTERSIV16SF); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8df", - VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT, - IX86_BUILTIN_SCATTERSIV8DF); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16sf", - VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT, - IX86_BUILTIN_SCATTERDIV16SF); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8df", - VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT, - IX86_BUILTIN_SCATTERDIV8DF); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16si", - VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT, - IX86_BUILTIN_SCATTERSIV16SI); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8di", - VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT, - IX86_BUILTIN_SCATTERSIV8DI); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16si", - VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT, - IX86_BUILTIN_SCATTERDIV16SI); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8di", - VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT, - IX86_BUILTIN_SCATTERDIV8DI); - - /* AVX512VL */ - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2df", - V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT, - IX86_BUILTIN_GATHER3SIV2DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4df", - V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT, - IX86_BUILTIN_GATHER3SIV4DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2df", - V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT, - IX86_BUILTIN_GATHER3DIV2DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4df", - V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT, - IX86_BUILTIN_GATHER3DIV4DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4sf", - V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT, - IX86_BUILTIN_GATHER3SIV4SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8sf", - V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT, - IX86_BUILTIN_GATHER3SIV8SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4sf", - V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT, - IX86_BUILTIN_GATHER3DIV4SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8sf", - V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT, - IX86_BUILTIN_GATHER3DIV8SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2di", - V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT, - IX86_BUILTIN_GATHER3SIV2DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4di", - V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT, - IX86_BUILTIN_GATHER3SIV4DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2di", - V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT, - IX86_BUILTIN_GATHER3DIV2DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4di", - V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT, - IX86_BUILTIN_GATHER3DIV4DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4si", - V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT, - IX86_BUILTIN_GATHER3SIV4SI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8si", - V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT, - IX86_BUILTIN_GATHER3SIV8SI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4si", - V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT, - IX86_BUILTIN_GATHER3DIV4SI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8si", - V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT, - IX86_BUILTIN_GATHER3DIV8SI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4df ", - V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT, - IX86_BUILTIN_GATHER3ALTSIV4DF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8sf ", - V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT, - IX86_BUILTIN_GATHER3ALTDIV8SF); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4di ", - V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT, - IX86_BUILTIN_GATHER3ALTSIV4DI); - - def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8si ", - V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT, - IX86_BUILTIN_GATHER3ALTDIV8SI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8sf", - VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT, - IX86_BUILTIN_SCATTERSIV8SF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4sf", - VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT, - IX86_BUILTIN_SCATTERSIV4SF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4df", - VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT, - IX86_BUILTIN_SCATTERSIV4DF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2df", - VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT, - IX86_BUILTIN_SCATTERSIV2DF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8sf", - VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT, - IX86_BUILTIN_SCATTERDIV8SF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4sf", - VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT, - IX86_BUILTIN_SCATTERDIV4SF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4df", - VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT, - IX86_BUILTIN_SCATTERDIV4DF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2df", - VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT, - IX86_BUILTIN_SCATTERDIV2DF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8si", - VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT, - IX86_BUILTIN_SCATTERSIV8SI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4si", - VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT, - IX86_BUILTIN_SCATTERSIV4SI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4di", - VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT, - IX86_BUILTIN_SCATTERSIV4DI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2di", - VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT, - IX86_BUILTIN_SCATTERSIV2DI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8si", - VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT, - IX86_BUILTIN_SCATTERDIV8SI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4si", - VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT, - IX86_BUILTIN_SCATTERDIV4SI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4di", - VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT, - IX86_BUILTIN_SCATTERDIV4DI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2di", - VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT, - IX86_BUILTIN_SCATTERDIV2DI); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8df ", - VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT, - IX86_BUILTIN_SCATTERALTSIV8DF); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16sf ", - VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT, - IX86_BUILTIN_SCATTERALTDIV16SF); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8di ", - VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT, - IX86_BUILTIN_SCATTERALTSIV8DI); - - def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16si ", - VOID_FTYPE_PINT_HI_V8DI_V16SI_INT, - IX86_BUILTIN_SCATTERALTDIV16SI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4df ", - VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT, - IX86_BUILTIN_SCATTERALTSIV4DF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8sf ", - VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT, - IX86_BUILTIN_SCATTERALTDIV8SF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4di ", - VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT, - IX86_BUILTIN_SCATTERALTSIV4DI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8si ", - VOID_FTYPE_PINT_QI_V4DI_V8SI_INT, - IX86_BUILTIN_SCATTERALTDIV8SI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2df ", - VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT, - IX86_BUILTIN_SCATTERALTSIV2DF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4sf ", - VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT, - IX86_BUILTIN_SCATTERALTDIV4SF); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2di ", - VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT, - IX86_BUILTIN_SCATTERALTSIV2DI); - - def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4si ", - VOID_FTYPE_PINT_QI_V2DI_V4SI_INT, - IX86_BUILTIN_SCATTERALTDIV4SI); - - /* AVX512PF */ - def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdpd", - VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, - IX86_BUILTIN_GATHERPFDPD); - def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdps", - VOID_FTYPE_HI_V16SI_PCVOID_INT_INT, - IX86_BUILTIN_GATHERPFDPS); - def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqpd", - VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, - IX86_BUILTIN_GATHERPFQPD); - def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqps", - VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, - IX86_BUILTIN_GATHERPFQPS); - def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdpd", - VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, - IX86_BUILTIN_SCATTERPFDPD); - def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdps", - VOID_FTYPE_HI_V16SI_PCVOID_INT_INT, - IX86_BUILTIN_SCATTERPFDPS); - def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqpd", - VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, - IX86_BUILTIN_SCATTERPFQPD); - def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqps", - VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, - IX86_BUILTIN_SCATTERPFQPS); - - /* SHA */ - def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg1", - V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1); - def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg2", - V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2); - def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1nexte", - V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE); - def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1rnds4", - V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4); - def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg1", - V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1); - def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg2", - V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2); - def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256rnds2", - V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2); - - /* RTM. */ - def_builtin (OPTION_MASK_ISA_RTM, 0, "__builtin_ia32_xabort", - VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT); - - /* MMX access to the vec_init patterns. */ - def_builtin_const (OPTION_MASK_ISA_MMX, 0, - "__builtin_ia32_vec_init_v2si", - V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI); - - def_builtin_const (OPTION_MASK_ISA_MMX, 0, - "__builtin_ia32_vec_init_v4hi", - V4HI_FTYPE_HI_HI_HI_HI, - IX86_BUILTIN_VEC_INIT_V4HI); - - def_builtin_const (OPTION_MASK_ISA_MMX, 0, - "__builtin_ia32_vec_init_v8qi", - V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI, - IX86_BUILTIN_VEC_INIT_V8QI); - - /* Access to the vec_extract patterns. */ - def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2df", - DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF); - def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2di", - DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI); - def_builtin_const (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_vec_ext_v4sf", - FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF); - def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v4si", - SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI); - def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v8hi", - HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI); - - def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A - /* As it uses V4HImode, we have to require -mmmx too. */ - | OPTION_MASK_ISA_MMX, 0, - "__builtin_ia32_vec_ext_v4hi", - HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI); - - def_builtin_const (OPTION_MASK_ISA_MMX, 0, - "__builtin_ia32_vec_ext_v2si", - SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI); - - def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi", - QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI); - - /* Access to the vec_set patterns. */ - def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, 0, - "__builtin_ia32_vec_set_v2di", - V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI); - - def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4sf", - V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF); - - def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4si", - V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI); - - def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_set_v8hi", - V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI); - - def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A - /* As it uses V4HImode, we have to require -mmmx too. */ - | OPTION_MASK_ISA_MMX, 0, - "__builtin_ia32_vec_set_v4hi", - V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI); - - def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v16qi", - V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI); - - /* RDSEED */ - def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_hi_step", - INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP); - def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_si_step", - INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP); - def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT, 0, - "__builtin_ia32_rdseed_di_step", - INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP); - - /* ADCX */ - def_builtin (0, 0, "__builtin_ia32_addcarryx_u32", - UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32); - def_builtin (OPTION_MASK_ISA_64BIT, 0, - "__builtin_ia32_addcarryx_u64", - UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, - IX86_BUILTIN_ADDCARRYX64); - - /* SBB */ - def_builtin (0, 0, "__builtin_ia32_sbb_u32", - UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32); - def_builtin (OPTION_MASK_ISA_64BIT, 0, - "__builtin_ia32_sbb_u64", - UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, - IX86_BUILTIN_SBB64); - - /* Read/write FLAGS. */ - if (TARGET_64BIT) - { - def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_readeflags_u64", - UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); - def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_writeeflags_u64", - VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS); - } - else - { - def_builtin (0, 0, "__builtin_ia32_readeflags_u32", - UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); - def_builtin (0, 0, "__builtin_ia32_writeeflags_u32", - VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS); - } - - /* CLFLUSHOPT. */ - def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, 0, "__builtin_ia32_clflushopt", - VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT); - - /* CLWB. */ - def_builtin (OPTION_MASK_ISA_CLWB, 0, "__builtin_ia32_clwb", - VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB); - - /* MONITORX and MWAITX. */ - def_builtin (0, OPTION_MASK_ISA2_MWAITX, "__builtin_ia32_monitorx", - VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX); - def_builtin (0, OPTION_MASK_ISA2_MWAITX, "__builtin_ia32_mwaitx", - VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX); - - /* CLZERO. */ - def_builtin (0, OPTION_MASK_ISA2_CLZERO, "__builtin_ia32_clzero", - VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO); - - /* WAITPKG. */ - def_builtin (0, OPTION_MASK_ISA2_WAITPKG, "__builtin_ia32_umonitor", - VOID_FTYPE_PVOID, IX86_BUILTIN_UMONITOR); - def_builtin (0, OPTION_MASK_ISA2_WAITPKG, "__builtin_ia32_umwait", - UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_UMWAIT); - def_builtin (0, OPTION_MASK_ISA2_WAITPKG, "__builtin_ia32_tpause", - UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE); - - /* CLDEMOTE. */ - def_builtin (0, OPTION_MASK_ISA2_CLDEMOTE, "__builtin_ia32_cldemote", - VOID_FTYPE_PCVOID, IX86_BUILTIN_CLDEMOTE); - - /* Add FMA4 multi-arg argument instructions */ - for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) - { - BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i); - if (d->name == 0) - continue; - - ftype = (enum ix86_builtin_func_type) d->flag; - def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); - } - BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST, - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, - ARRAY_SIZE (bdesc_multi_arg) - 1); - - /* Add CET inrinsics. */ - for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++) - { - BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i); - if (d->name == 0) - continue; - - ftype = (enum ix86_builtin_func_type) d->flag; - def_builtin (d->mask, d->mask2, d->name, ftype, d->code); - } - BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST, - IX86_BUILTIN__BDESC_CET_FIRST, - ARRAY_SIZE (bdesc_cet) - 1); - - for (i = 0, d = bdesc_cet_rdssp; - i < ARRAY_SIZE (bdesc_cet_rdssp); - i++, d++) - { - BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i); - if (d->name == 0) - continue; - - ftype = (enum ix86_builtin_func_type) d->flag; - def_builtin (d->mask, d->mask2, d->name, ftype, d->code); - } - BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST, - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, - ARRAY_SIZE (bdesc_cet_rdssp) - 1); -} - -#undef BDESC_VERIFY -#undef BDESC_VERIFYS - -/* Make builtins to detect cpu type and features supported. NAME is - the builtin name, CODE is the builtin code, and FTYPE is the function - type of the builtin. */ - -static void -make_cpu_type_builtin (const char* name, int code, - enum ix86_builtin_func_type ftype, bool is_const) -{ - tree decl; - tree type; - - type = ix86_get_builtin_func_type (ftype); - decl = add_builtin_function (name, type, code, BUILT_IN_MD, - NULL, NULL_TREE); - gcc_assert (decl != NULL_TREE); - ix86_builtins[(int) code] = decl; - TREE_READONLY (decl) = is_const; -} - -/* Make builtins to get CPU type and features supported. The created - builtins are : - - __builtin_cpu_init (), to detect cpu type and features, - __builtin_cpu_is (""), to check if cpu is of type , - __builtin_cpu_supports (""), to check if cpu supports - */ - -static void -ix86_init_platform_type_builtins (void) -{ - make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT, - INT_FTYPE_VOID, false); - make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS, - INT_FTYPE_PCCHAR, true); - make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS, - INT_FTYPE_PCCHAR, true); -} - -/* Internal method for ix86_init_builtins. */ - -static void -ix86_init_builtins_va_builtins_abi (void) -{ - tree ms_va_ref, sysv_va_ref; - tree fnvoid_va_end_ms, fnvoid_va_end_sysv; - tree fnvoid_va_start_ms, fnvoid_va_start_sysv; - tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv; - tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE; - - if (!TARGET_64BIT) - return; - fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE); - fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE); - ms_va_ref = build_reference_type (ms_va_list_type_node); - sysv_va_ref = build_pointer_type (TREE_TYPE (sysv_va_list_type_node)); - - fnvoid_va_end_ms = build_function_type_list (void_type_node, ms_va_ref, - NULL_TREE); - fnvoid_va_start_ms - = build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE); - fnvoid_va_end_sysv - = build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE); - fnvoid_va_start_sysv - = build_varargs_function_type_list (void_type_node, sysv_va_ref, - NULL_TREE); - fnvoid_va_copy_ms - = build_function_type_list (void_type_node, ms_va_ref, - ms_va_list_type_node, NULL_TREE); - fnvoid_va_copy_sysv - = build_function_type_list (void_type_node, sysv_va_ref, - sysv_va_ref, NULL_TREE); - - add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms, - BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms); - add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms, - BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms); - add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms, - BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms); - add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv, - BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv); - add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv, - BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv); - add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv, - BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv); -} - -static void -ix86_init_builtin_types (void) -{ - tree float80_type_node, const_string_type_node; - - /* The __float80 type. */ - float80_type_node = long_double_type_node; - if (TYPE_MODE (float80_type_node) != XFmode) - { - if (float64x_type_node != NULL_TREE - && TYPE_MODE (float64x_type_node) == XFmode) - float80_type_node = float64x_type_node; - else - { - /* The __float80 type. */ - float80_type_node = make_node (REAL_TYPE); - - TYPE_PRECISION (float80_type_node) = 80; - layout_type (float80_type_node); - } - } - lang_hooks.types.register_builtin_type (float80_type_node, "__float80"); - - /* The __float128 type. The node has already been created as - _Float128, so we only need to register the __float128 name for - it. */ - lang_hooks.types.register_builtin_type (float128_type_node, "__float128"); - - const_string_type_node - = build_pointer_type (build_qualified_type - (char_type_node, TYPE_QUAL_CONST)); - - /* This macro is built by i386-builtin-types.awk. */ - DEFINE_BUILTIN_PRIMITIVE_TYPES; -} - -void -ix86_init_builtins (void) -{ - tree ftype, decl; - - ix86_init_builtin_types (); - - /* Builtins to get CPU type and features. */ - ix86_init_platform_type_builtins (); - - /* TFmode support builtins. */ - def_builtin_const (0, 0, "__builtin_infq", - FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); - def_builtin_const (0, 0, "__builtin_huge_valq", - FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ); - - ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING); - decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ, - BUILT_IN_MD, "nanq", NULL_TREE); - TREE_READONLY (decl) = 1; - ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl; - - decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ, - BUILT_IN_MD, "nansq", NULL_TREE); - TREE_READONLY (decl) = 1; - ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl; - - /* We will expand them to normal call if SSE isn't available since - they are used by libgcc. */ - ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128); - decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ, - BUILT_IN_MD, "__fabstf2", NULL_TREE); - TREE_READONLY (decl) = 1; - ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl; - - ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128); - decl = add_builtin_function ("__builtin_copysignq", ftype, - IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD, - "__copysigntf3", NULL_TREE); - TREE_READONLY (decl) = 1; - ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; - - ix86_init_tm_builtins (); - ix86_init_mmx_sse_builtins (); - - if (TARGET_LP64) - ix86_init_builtins_va_builtins_abi (); - -#ifdef SUBTARGET_INIT_BUILTINS - SUBTARGET_INIT_BUILTINS; -#endif -} - -/* Return the ix86 builtin for CODE. */ - -tree -ix86_builtin_decl (unsigned code, bool) -{ - if (code >= IX86_BUILTIN_MAX) - return error_mark_node; - - return ix86_builtins[code]; -} - -/* This returns the target-specific builtin with code CODE if - current_function_decl has visibility on this builtin, which is checked - using isa flags. Returns NULL_TREE otherwise. */ - -static tree ix86_get_builtin (enum ix86_builtins code) -{ - struct cl_target_option *opts; - tree target_tree = NULL_TREE; - - /* Determine the isa flags of current_function_decl. */ - - if (current_function_decl) - target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl); - - if (target_tree == NULL) - target_tree = target_option_default_node; - - opts = TREE_TARGET_OPTION (target_tree); - - if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags) - || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2)) - return ix86_builtin_decl (code, true); - else - return NULL_TREE; -} - -/* Vectorization library interface and handlers. */ -tree (*ix86_veclib_handler) (combined_fn, tree, tree); - -/* Returns a function decl for a vectorized version of the combined function - with combined_fn code FN and the result vector type TYPE, or NULL_TREE - if it is not available. */ - -tree -ix86_builtin_vectorized_function (unsigned int fn, tree type_out, - tree type_in) -{ - machine_mode in_mode, out_mode; - int in_n, out_n; - - if (TREE_CODE (type_out) != VECTOR_TYPE - || TREE_CODE (type_in) != VECTOR_TYPE) - return NULL_TREE; - - out_mode = TYPE_MODE (TREE_TYPE (type_out)); - out_n = TYPE_VECTOR_SUBPARTS (type_out); - in_mode = TYPE_MODE (TREE_TYPE (type_in)); - in_n = TYPE_VECTOR_SUBPARTS (type_in); - - switch (fn) - { - CASE_CFN_EXP2: - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_EXP2PS); - } - break; - - CASE_CFN_IFLOOR: - CASE_CFN_LFLOOR: - CASE_CFN_LLFLOOR: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == SImode && in_mode == DFmode) - { - if (out_n == 4 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX); - else if (out_n == 8 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256); - else if (out_n == 16 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512); - } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512); - } - break; - - CASE_CFN_ICEIL: - CASE_CFN_LCEIL: - CASE_CFN_LLCEIL: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == SImode && in_mode == DFmode) - { - if (out_n == 4 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX); - else if (out_n == 8 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256); - else if (out_n == 16 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512); - } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512); - } - break; - - CASE_CFN_IRINT: - CASE_CFN_LRINT: - CASE_CFN_LLRINT: - if (out_mode == SImode && in_mode == DFmode) - { - if (out_n == 4 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX); - else if (out_n == 8 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256); - else if (out_n == 16 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512); - } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512); - } - break; - - CASE_CFN_IROUND: - CASE_CFN_LROUND: - CASE_CFN_LLROUND: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == SImode && in_mode == DFmode) - { - if (out_n == 4 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX); - else if (out_n == 8 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256); - else if (out_n == 16 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512); - } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512); - } - break; - - CASE_CFN_FLOOR: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD); - else if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD256); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD512); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS512); - } - break; - - CASE_CFN_CEIL: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_CEILPD); - else if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CEILPD256); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPD512); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CEILPS); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPS256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CEILPS512); - } - break; - - CASE_CFN_TRUNC: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPD); - else if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPS); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512); - } - break; - - CASE_CFN_FMA: - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPD); - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPS); - if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256); - } - break; - - default: - break; - } - - /* Dispatch to a handler for a vectorization library. */ - if (ix86_veclib_handler) - return ix86_veclib_handler (combined_fn (fn), type_out, type_in); - - return NULL_TREE; -} - -/* Returns a decl of a function that implements gather load with - memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE. - Return NULL_TREE if it is not available. */ - -tree -ix86_vectorize_builtin_gather (const_tree mem_vectype, - const_tree index_type, int scale) -{ - bool si; - enum ix86_builtins code; - - if (! TARGET_AVX2 || !TARGET_USE_GATHER) - return NULL_TREE; - - if ((TREE_CODE (index_type) != INTEGER_TYPE - && !POINTER_TYPE_P (index_type)) - || (TYPE_MODE (index_type) != SImode - && TYPE_MODE (index_type) != DImode)) - return NULL_TREE; - - if (TYPE_PRECISION (index_type) > POINTER_SIZE) - return NULL_TREE; - - /* v*gather* insn sign extends index to pointer mode. */ - if (TYPE_PRECISION (index_type) < POINTER_SIZE - && TYPE_UNSIGNED (index_type)) - return NULL_TREE; - - if (scale <= 0 - || scale > 8 - || (scale & (scale - 1)) != 0) - return NULL_TREE; - - si = TYPE_MODE (index_type) == SImode; - switch (TYPE_MODE (mem_vectype)) - { - case E_V2DFmode: - if (TARGET_AVX512VL) - code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF; - else - code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF; - break; - case E_V4DFmode: - if (TARGET_AVX512VL) - code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF; - else - code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF; - break; - case E_V2DImode: - if (TARGET_AVX512VL) - code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI; - else - code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI; - break; - case E_V4DImode: - if (TARGET_AVX512VL) - code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI; - else - code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI; - break; - case E_V4SFmode: - if (TARGET_AVX512VL) - code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF; - else - code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF; - break; - case E_V8SFmode: - if (TARGET_AVX512VL) - code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF; - else - code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF; - break; - case E_V4SImode: - if (TARGET_AVX512VL) - code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI; - else - code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI; - break; - case E_V8SImode: - if (TARGET_AVX512VL) - code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI; - else - code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI; - break; - case E_V8DFmode: - if (TARGET_AVX512F) - code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF; - else - return NULL_TREE; - break; - case E_V8DImode: - if (TARGET_AVX512F) - code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI; - else - return NULL_TREE; - break; - case E_V16SFmode: - if (TARGET_AVX512F) - code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF; - else - return NULL_TREE; - break; - case E_V16SImode: - if (TARGET_AVX512F) - code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI; - else - return NULL_TREE; - break; - default: - return NULL_TREE; - } - - return ix86_get_builtin (code); -} - -/* Returns a code for a target-specific builtin that implements - reciprocal of the function, or NULL_TREE if not available. */ - -tree -ix86_builtin_reciprocal (tree fndecl) -{ - enum ix86_builtins fn_code - = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); - switch (fn_code) - { - /* Vectorized version of sqrt to rsqrt conversion. */ - case IX86_BUILTIN_SQRTPS_NR: - return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR); - - case IX86_BUILTIN_SQRTPS_NR256: - return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256); - - default: - return NULL_TREE; - } -} - -/* Priority of i386 features, greater value is higher priority. This is - used to decide the order in which function dispatch must happen. For - instance, a version specialized for SSE4.2 should be checked for dispatch - before a version for SSE3, as SSE4.2 implies SSE3. */ -enum feature_priority -{ - P_ZERO = 0, - P_MMX, - P_SSE, - P_SSE2, - P_SSE3, - P_SSSE3, - P_PROC_SSSE3, - P_SSE4_A, - P_PROC_SSE4_A, - P_SSE4_1, - P_SSE4_2, - P_PROC_SSE4_2, - P_POPCNT, - P_AES, - P_PCLMUL, - P_AVX, - P_PROC_AVX, - P_BMI, - P_PROC_BMI, - P_FMA4, - P_XOP, - P_PROC_XOP, - P_FMA, - P_PROC_FMA, - P_BMI2, - P_AVX2, - P_PROC_AVX2, - P_AVX512F, - P_PROC_AVX512F -}; - -/* This is the order of bit-fields in __processor_features in cpuinfo.c */ -enum processor_features -{ - F_CMOV = 0, - F_MMX, - F_POPCNT, - F_SSE, - F_SSE2, - F_SSE3, - F_SSSE3, - F_SSE4_1, - F_SSE4_2, - F_AVX, - F_AVX2, - F_SSE4_A, - F_FMA4, - F_XOP, - F_FMA, - F_AVX512F, - F_BMI, - F_BMI2, - F_AES, - F_PCLMUL, - F_AVX512VL, - F_AVX512BW, - F_AVX512DQ, - F_AVX512CD, - F_AVX512ER, - F_AVX512PF, - F_AVX512VBMI, - F_AVX512IFMA, - F_AVX5124VNNIW, - F_AVX5124FMAPS, - F_AVX512VPOPCNTDQ, - F_AVX512VBMI2, - F_GFNI, - F_VPCLMULQDQ, - F_AVX512VNNI, - F_AVX512BITALG, - F_AVX512BF16, - F_AVX512VP2INTERSECT, - F_MAX -}; - -/* These are the values for vendor types and cpu types and subtypes - in cpuinfo.c. Cpu types and subtypes should be subtracted by - the corresponding start value. */ -enum processor_model -{ - M_INTEL = 1, - M_AMD, - M_CPU_TYPE_START, - M_INTEL_BONNELL, - M_INTEL_CORE2, - M_INTEL_COREI7, - M_AMDFAM10H, - M_AMDFAM15H, - M_INTEL_SILVERMONT, - M_INTEL_KNL, - M_AMD_BTVER1, - M_AMD_BTVER2, - M_AMDFAM17H, - M_INTEL_KNM, - M_INTEL_GOLDMONT, - M_INTEL_GOLDMONT_PLUS, - M_INTEL_TREMONT, - M_CPU_SUBTYPE_START, - M_INTEL_COREI7_NEHALEM, - M_INTEL_COREI7_WESTMERE, - M_INTEL_COREI7_SANDYBRIDGE, - M_AMDFAM10H_BARCELONA, - M_AMDFAM10H_SHANGHAI, - M_AMDFAM10H_ISTANBUL, - M_AMDFAM15H_BDVER1, - M_AMDFAM15H_BDVER2, - M_AMDFAM15H_BDVER3, - M_AMDFAM15H_BDVER4, - M_AMDFAM17H_ZNVER1, - M_INTEL_COREI7_IVYBRIDGE, - M_INTEL_COREI7_HASWELL, - M_INTEL_COREI7_BROADWELL, - M_INTEL_COREI7_SKYLAKE, - M_INTEL_COREI7_SKYLAKE_AVX512, - M_INTEL_COREI7_CANNONLAKE, - M_INTEL_COREI7_ICELAKE_CLIENT, - M_INTEL_COREI7_ICELAKE_SERVER, - M_AMDFAM17H_ZNVER2, - M_INTEL_COREI7_CASCADELAKE, - M_INTEL_COREI7_TIGERLAKE, - M_INTEL_COREI7_COOPERLAKE -}; - -struct _arch_names_table -{ - const char *const name; - const enum processor_model model; -}; - -static const _arch_names_table arch_names_table[] = -{ - {"amd", M_AMD}, - {"intel", M_INTEL}, - {"atom", M_INTEL_BONNELL}, - {"slm", M_INTEL_SILVERMONT}, - {"core2", M_INTEL_CORE2}, - {"corei7", M_INTEL_COREI7}, - {"nehalem", M_INTEL_COREI7_NEHALEM}, - {"westmere", M_INTEL_COREI7_WESTMERE}, - {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE}, - {"ivybridge", M_INTEL_COREI7_IVYBRIDGE}, - {"haswell", M_INTEL_COREI7_HASWELL}, - {"broadwell", M_INTEL_COREI7_BROADWELL}, - {"skylake", M_INTEL_COREI7_SKYLAKE}, - {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512}, - {"cannonlake", M_INTEL_COREI7_CANNONLAKE}, - {"icelake-client", M_INTEL_COREI7_ICELAKE_CLIENT}, - {"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER}, - {"cascadelake", M_INTEL_COREI7_CASCADELAKE}, - {"tigerlake", M_INTEL_COREI7_TIGERLAKE}, - {"cooperlake", M_INTEL_COREI7_COOPERLAKE}, - {"bonnell", M_INTEL_BONNELL}, - {"silvermont", M_INTEL_SILVERMONT}, - {"goldmont", M_INTEL_GOLDMONT}, - {"goldmont-plus", M_INTEL_GOLDMONT_PLUS}, - {"tremont", M_INTEL_TREMONT}, - {"knl", M_INTEL_KNL}, - {"knm", M_INTEL_KNM}, - {"amdfam10h", M_AMDFAM10H}, - {"barcelona", M_AMDFAM10H_BARCELONA}, - {"shanghai", M_AMDFAM10H_SHANGHAI}, - {"istanbul", M_AMDFAM10H_ISTANBUL}, - {"btver1", M_AMD_BTVER1}, - {"amdfam15h", M_AMDFAM15H}, - {"bdver1", M_AMDFAM15H_BDVER1}, - {"bdver2", M_AMDFAM15H_BDVER2}, - {"bdver3", M_AMDFAM15H_BDVER3}, - {"bdver4", M_AMDFAM15H_BDVER4}, - {"btver2", M_AMD_BTVER2}, - {"amdfam17h", M_AMDFAM17H}, - {"znver1", M_AMDFAM17H_ZNVER1}, - {"znver2", M_AMDFAM17H_ZNVER2}, -}; - -/* These are the target attribute strings for which a dispatcher is - available, from fold_builtin_cpu. */ -struct _isa_names_table -{ - const char *const name; - const enum processor_features feature; - const enum feature_priority priority; -}; - -static const _isa_names_table isa_names_table[] = -{ - {"cmov", F_CMOV, P_ZERO}, - {"mmx", F_MMX, P_MMX}, - {"popcnt", F_POPCNT, P_POPCNT}, - {"sse", F_SSE, P_SSE}, - {"sse2", F_SSE2, P_SSE2}, - {"sse3", F_SSE3, P_SSE3}, - {"ssse3", F_SSSE3, P_SSSE3}, - {"sse4a", F_SSE4_A, P_SSE4_A}, - {"sse4.1", F_SSE4_1, P_SSE4_1}, - {"sse4.2", F_SSE4_2, P_SSE4_2}, - {"avx", F_AVX, P_AVX}, - {"fma4", F_FMA4, P_FMA4}, - {"xop", F_XOP, P_XOP}, - {"fma", F_FMA, P_FMA}, - {"avx2", F_AVX2, P_AVX2}, - {"avx512f", F_AVX512F, P_AVX512F}, - {"bmi", F_BMI, P_BMI}, - {"bmi2", F_BMI2, P_BMI2}, - {"aes", F_AES, P_AES}, - {"pclmul", F_PCLMUL, P_PCLMUL}, - {"avx512vl",F_AVX512VL, P_ZERO}, - {"avx512bw",F_AVX512BW, P_ZERO}, - {"avx512dq",F_AVX512DQ, P_ZERO}, - {"avx512cd",F_AVX512CD, P_ZERO}, - {"avx512er",F_AVX512ER, P_ZERO}, - {"avx512pf",F_AVX512PF, P_ZERO}, - {"avx512vbmi",F_AVX512VBMI, P_ZERO}, - {"avx512ifma",F_AVX512IFMA, P_ZERO}, - {"avx5124vnniw",F_AVX5124VNNIW, P_ZERO}, - {"avx5124fmaps",F_AVX5124FMAPS, P_ZERO}, - {"avx512vpopcntdq",F_AVX512VPOPCNTDQ, P_ZERO}, - {"avx512vbmi2", F_AVX512VBMI2, P_ZERO}, - {"gfni", F_GFNI, P_ZERO}, - {"vpclmulqdq", F_VPCLMULQDQ, P_ZERO}, - {"avx512vnni", F_AVX512VNNI, P_ZERO}, - {"avx512bitalg", F_AVX512BITALG, P_ZERO}, - {"avx512bf16", F_AVX512BF16, P_ZERO}, - {"avx512vp2intersect",F_AVX512VP2INTERSECT, P_ZERO} -}; - -/* This parses the attribute arguments to target in DECL and determines - the right builtin to use to match the platform specification. - It returns the priority value for this version decl. If PREDICATE_LIST - is not NULL, it stores the list of cpu features that need to be checked - before dispatching this function. */ - -unsigned int -get_builtin_code_for_version (tree decl, tree *predicate_list) -{ - tree attrs; - struct cl_target_option cur_target; - tree target_node; - struct cl_target_option *new_target; - const char *arg_str = NULL; - const char *attrs_str = NULL; - char *tok_str = NULL; - char *token; - - enum feature_priority priority = P_ZERO; - - static unsigned int NUM_FEATURES - = sizeof (isa_names_table) / sizeof (_isa_names_table); - - unsigned int i; - - tree predicate_chain = NULL_TREE; - tree predicate_decl, predicate_arg; - - attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); - gcc_assert (attrs != NULL); - - attrs = TREE_VALUE (TREE_VALUE (attrs)); - - gcc_assert (TREE_CODE (attrs) == STRING_CST); - attrs_str = TREE_STRING_POINTER (attrs); - - /* Return priority zero for default function. */ - if (strcmp (attrs_str, "default") == 0) - return 0; - - /* Handle arch= if specified. For priority, set it to be 1 more than - the best instruction set the processor can handle. For instance, if - there is a version for atom and a version for ssse3 (the highest ISA - priority for atom), the atom version must be checked for dispatch - before the ssse3 version. */ - if (strstr (attrs_str, "arch=") != NULL) - { - cl_target_option_save (&cur_target, &global_options); - target_node - = ix86_valid_target_attribute_tree (decl, attrs, &global_options, - &global_options_set, 0); - - gcc_assert (target_node); - if (target_node == error_mark_node) - return 0; - new_target = TREE_TARGET_OPTION (target_node); - gcc_assert (new_target); - - if (new_target->arch_specified && new_target->arch > 0) - { - switch (new_target->arch) - { - case PROCESSOR_CORE2: - arg_str = "core2"; - priority = P_PROC_SSSE3; - break; - case PROCESSOR_NEHALEM: - if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_PCLMUL) - { - arg_str = "westmere"; - priority = P_PCLMUL; - } - else - { - /* We translate "arch=corei7" and "arch=nehalem" to - "corei7" so that it will be mapped to M_INTEL_COREI7 - as cpu type to cover all M_INTEL_COREI7_XXXs. */ - arg_str = "corei7"; - priority = P_PROC_SSE4_2; - } - break; - case PROCESSOR_SANDYBRIDGE: - if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C) - arg_str = "ivybridge"; - else - arg_str = "sandybridge"; - priority = P_PROC_AVX; - break; - case PROCESSOR_HASWELL: - if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX) - arg_str = "broadwell"; - else - arg_str = "haswell"; - priority = P_PROC_AVX2; - break; - case PROCESSOR_SKYLAKE: - arg_str = "skylake"; - priority = P_PROC_AVX2; - break; - case PROCESSOR_SKYLAKE_AVX512: - arg_str = "skylake-avx512"; - priority = P_PROC_AVX512F; - break; - case PROCESSOR_CANNONLAKE: - arg_str = "cannonlake"; - priority = P_PROC_AVX512F; - break; - case PROCESSOR_ICELAKE_CLIENT: - arg_str = "icelake-client"; - priority = P_PROC_AVX512F; - break; - case PROCESSOR_ICELAKE_SERVER: - arg_str = "icelake-server"; - priority = P_PROC_AVX512F; - break; - case PROCESSOR_CASCADELAKE: - arg_str = "cascadelake"; - priority = P_PROC_AVX512F; - break; - case PROCESSOR_TIGERLAKE: - arg_str = "tigerlake"; - priority = P_PROC_AVX512F; - break; - case PROCESSOR_COOPERLAKE: - arg_str = "cooperlake"; - priority = P_PROC_AVX512F; - break; - case PROCESSOR_BONNELL: - arg_str = "bonnell"; - priority = P_PROC_SSSE3; - break; - case PROCESSOR_KNL: - arg_str = "knl"; - priority = P_PROC_AVX512F; - break; - case PROCESSOR_KNM: - arg_str = "knm"; - priority = P_PROC_AVX512F; - break; - case PROCESSOR_SILVERMONT: - arg_str = "silvermont"; - priority = P_PROC_SSE4_2; - break; - case PROCESSOR_GOLDMONT: - arg_str = "goldmont"; - priority = P_PROC_SSE4_2; - break; - case PROCESSOR_GOLDMONT_PLUS: - arg_str = "goldmont-plus"; - priority = P_PROC_SSE4_2; - break; - case PROCESSOR_TREMONT: - arg_str = "tremont"; - priority = P_PROC_SSE4_2; - break; - case PROCESSOR_AMDFAM10: - arg_str = "amdfam10h"; - priority = P_PROC_SSE4_A; - break; - case PROCESSOR_BTVER1: - arg_str = "btver1"; - priority = P_PROC_SSE4_A; - break; - case PROCESSOR_BTVER2: - arg_str = "btver2"; - priority = P_PROC_BMI; - break; - case PROCESSOR_BDVER1: - arg_str = "bdver1"; - priority = P_PROC_XOP; - break; - case PROCESSOR_BDVER2: - arg_str = "bdver2"; - priority = P_PROC_FMA; - break; - case PROCESSOR_BDVER3: - arg_str = "bdver3"; - priority = P_PROC_FMA; - break; - case PROCESSOR_BDVER4: - arg_str = "bdver4"; - priority = P_PROC_AVX2; - break; - case PROCESSOR_ZNVER1: - arg_str = "znver1"; - priority = P_PROC_AVX2; - break; - case PROCESSOR_ZNVER2: - arg_str = "znver2"; - priority = P_PROC_AVX2; - break; - } - } - - cl_target_option_restore (&global_options, &cur_target); - - if (predicate_list && arg_str == NULL) - { - error_at (DECL_SOURCE_LOCATION (decl), - "no dispatcher found for the versioning attributes"); - return 0; - } - - if (predicate_list) - { - predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS]; - /* For a C string literal the length includes the trailing NULL. */ - predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str); - predicate_chain = tree_cons (predicate_decl, predicate_arg, - predicate_chain); - } - } - - /* Process feature name. */ - tok_str = (char *) xmalloc (strlen (attrs_str) + 1); - strcpy (tok_str, attrs_str); - token = strtok (tok_str, ","); - predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS]; - - while (token != NULL) - { - /* Do not process "arch=" */ - if (strncmp (token, "arch=", 5) == 0) - { - token = strtok (NULL, ","); - continue; - } - for (i = 0; i < NUM_FEATURES; ++i) - { - if (strcmp (token, isa_names_table[i].name) == 0) - { - if (predicate_list) - { - predicate_arg = build_string_literal ( - strlen (isa_names_table[i].name) + 1, - isa_names_table[i].name); - predicate_chain = tree_cons (predicate_decl, predicate_arg, - predicate_chain); - } - /* Find the maximum priority feature. */ - if (isa_names_table[i].priority > priority) - priority = isa_names_table[i].priority; - - break; - } - } - if (predicate_list && priority == P_ZERO) - { - error_at (DECL_SOURCE_LOCATION (decl), - "ISA %qs is not supported in % attribute, " - "use % syntax", token); - return 0; - } - token = strtok (NULL, ","); - } - free (tok_str); - - if (predicate_list && predicate_chain == NULL_TREE) - { - error_at (DECL_SOURCE_LOCATION (decl), - "no dispatcher found for the versioning attributes: %s", - attrs_str); - return 0; - } - else if (predicate_list) - { - predicate_chain = nreverse (predicate_chain); - *predicate_list = predicate_chain; - } - - return priority; -} - -/* This builds the processor_model struct type defined in - libgcc/config/i386/cpuinfo.c */ - -static tree -build_processor_model_struct (void) -{ - const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype", - "__cpu_features"}; - tree field = NULL_TREE, field_chain = NULL_TREE; - int i; - tree type = make_node (RECORD_TYPE); - - /* The first 3 fields are unsigned int. */ - for (i = 0; i < 3; ++i) - { - field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, - get_identifier (field_name[i]), unsigned_type_node); - if (field_chain != NULL_TREE) - DECL_CHAIN (field) = field_chain; - field_chain = field; - } - - /* The last field is an array of unsigned integers of size one. */ - field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, - get_identifier (field_name[3]), - build_array_type (unsigned_type_node, - build_index_type (size_one_node))); - if (field_chain != NULL_TREE) - DECL_CHAIN (field) = field_chain; - field_chain = field; - - finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE); - return type; -} - -/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */ - -static tree -make_var_decl (tree type, const char *name) -{ - tree new_decl; - - new_decl = build_decl (UNKNOWN_LOCATION, - VAR_DECL, - get_identifier(name), - type); - - DECL_EXTERNAL (new_decl) = 1; - TREE_STATIC (new_decl) = 1; - TREE_PUBLIC (new_decl) = 1; - DECL_INITIAL (new_decl) = 0; - DECL_ARTIFICIAL (new_decl) = 0; - DECL_PRESERVE_P (new_decl) = 1; - - make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); - assemble_variable (new_decl, 0, 0, 0); - - return new_decl; -} - -/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded - into an integer defined in libgcc/config/i386/cpuinfo.c */ - -tree -fold_builtin_cpu (tree fndecl, tree *args) -{ - unsigned int i; - enum ix86_builtins fn_code - = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); - tree param_string_cst = NULL; - - tree __processor_model_type = build_processor_model_struct (); - tree __cpu_model_var = make_var_decl (__processor_model_type, - "__cpu_model"); - - - varpool_node::add (__cpu_model_var); - - gcc_assert ((args != NULL) && (*args != NULL)); - - param_string_cst = *args; - while (param_string_cst - && TREE_CODE (param_string_cst) != STRING_CST) - { - /* *args must be a expr that can contain other EXPRS leading to a - STRING_CST. */ - if (!EXPR_P (param_string_cst)) - { - error ("parameter to builtin must be a string constant or literal"); - return integer_zero_node; - } - param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0); - } - - gcc_assert (param_string_cst); - - if (fn_code == IX86_BUILTIN_CPU_IS) - { - tree ref; - tree field; - tree final; - - unsigned int field_val = 0; - unsigned int NUM_ARCH_NAMES - = sizeof (arch_names_table) / sizeof (struct _arch_names_table); - - for (i = 0; i < NUM_ARCH_NAMES; i++) - if (strcmp (arch_names_table[i].name, - TREE_STRING_POINTER (param_string_cst)) == 0) - break; - - if (i == NUM_ARCH_NAMES) - { - error ("parameter to builtin not valid: %s", - TREE_STRING_POINTER (param_string_cst)); - return integer_zero_node; - } - - field = TYPE_FIELDS (__processor_model_type); - field_val = arch_names_table[i].model; - - /* CPU types are stored in the next field. */ - if (field_val > M_CPU_TYPE_START - && field_val < M_CPU_SUBTYPE_START) - { - field = DECL_CHAIN (field); - field_val -= M_CPU_TYPE_START; - } - - /* CPU subtypes are stored in the next field. */ - if (field_val > M_CPU_SUBTYPE_START) - { - field = DECL_CHAIN ( DECL_CHAIN (field)); - field_val -= M_CPU_SUBTYPE_START; - } - - /* Get the appropriate field in __cpu_model. */ - ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, - field, NULL_TREE); - - /* Check the value. */ - final = build2 (EQ_EXPR, unsigned_type_node, ref, - build_int_cstu (unsigned_type_node, field_val)); - return build1 (CONVERT_EXPR, integer_type_node, final); - } - else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS) - { - tree ref; - tree array_elt; - tree field; - tree final; - - unsigned int field_val = 0; - unsigned int NUM_ISA_NAMES - = sizeof (isa_names_table) / sizeof (struct _isa_names_table); - - for (i = 0; i < NUM_ISA_NAMES; i++) - if (strcmp (isa_names_table[i].name, - TREE_STRING_POINTER (param_string_cst)) == 0) - break; - - if (i == NUM_ISA_NAMES) - { - error ("parameter to builtin not valid: %s", - TREE_STRING_POINTER (param_string_cst)); - return integer_zero_node; - } - - if (isa_names_table[i].feature >= 32) - { - tree __cpu_features2_var = make_var_decl (unsigned_type_node, - "__cpu_features2"); - - varpool_node::add (__cpu_features2_var); - field_val = (1U << (isa_names_table[i].feature - 32)); - /* Return __cpu_features2 & field_val */ - final = build2 (BIT_AND_EXPR, unsigned_type_node, - __cpu_features2_var, - build_int_cstu (unsigned_type_node, field_val)); - return build1 (CONVERT_EXPR, integer_type_node, final); - } - - field = TYPE_FIELDS (__processor_model_type); - /* Get the last field, which is __cpu_features. */ - while (DECL_CHAIN (field)) - field = DECL_CHAIN (field); - - /* Get the appropriate field: __cpu_model.__cpu_features */ - ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, - field, NULL_TREE); - - /* Access the 0th element of __cpu_features array. */ - array_elt = build4 (ARRAY_REF, unsigned_type_node, ref, - integer_zero_node, NULL_TREE, NULL_TREE); - - field_val = (1U << isa_names_table[i].feature); - /* Return __cpu_model.__cpu_features[0] & field_val */ - final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt, - build_int_cstu (unsigned_type_node, field_val)); - return build1 (CONVERT_EXPR, integer_type_node, final); - } - gcc_unreachable (); -} - -#include "gt-i386-builtins.h" diff --git a/gcc/config/i386/i386-d.c b/gcc/config/i386/i386-d.c deleted file mode 100644 index 56fec11846e..00000000000 --- a/gcc/config/i386/i386-d.c +++ /dev/null @@ -1,44 +0,0 @@ -/* Subroutines for the D front end on the x86 architecture. - Copyright (C) 2017-2020 Free Software Foundation, Inc. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "d/d-target.h" -#include "d/d-target-def.h" - -/* Implement TARGET_D_CPU_VERSIONS for x86 targets. */ - -void -ix86_d_target_versions (void) -{ - if (TARGET_64BIT) - { - d_add_builtin_version ("X86_64"); - - if (TARGET_X32) - d_add_builtin_version ("D_X32"); - } - else - d_add_builtin_version ("X86"); - - if (TARGET_80387) - d_add_builtin_version ("D_HardFloat"); - else - d_add_builtin_version ("D_SoftFloat"); -} diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c deleted file mode 100644 index 270585decb2..00000000000 --- a/gcc/config/i386/i386-expand.c +++ /dev/null @@ -1,20310 +0,0 @@ -/* Copyright (C) 1988-2020 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -. */ - -#define IN_TARGET_CODE 1 - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "backend.h" -#include "rtl.h" -#include "tree.h" -#include "memmodel.h" -#include "gimple.h" -#include "cfghooks.h" -#include "cfgloop.h" -#include "df.h" -#include "tm_p.h" -#include "stringpool.h" -#include "expmed.h" -#include "optabs.h" -#include "regs.h" -#include "emit-rtl.h" -#include "recog.h" -#include "cgraph.h" -#include "diagnostic.h" -#include "cfgbuild.h" -#include "alias.h" -#include "fold-const.h" -#include "attribs.h" -#include "calls.h" -#include "stor-layout.h" -#include "varasm.h" -#include "output.h" -#include "insn-attr.h" -#include "flags.h" -#include "except.h" -#include "explow.h" -#include "expr.h" -#include "cfgrtl.h" -#include "common/common-target.h" -#include "langhooks.h" -#include "reload.h" -#include "gimplify.h" -#include "dwarf2.h" -#include "tm-constrs.h" -#include "cselib.h" -#include "sched-int.h" -#include "opts.h" -#include "tree-pass.h" -#include "context.h" -#include "pass_manager.h" -#include "target-globals.h" -#include "gimple-iterator.h" -#include "tree-vectorizer.h" -#include "shrink-wrap.h" -#include "builtins.h" -#include "rtl-iter.h" -#include "tree-iterator.h" -#include "dbgcnt.h" -#include "case-cfn-macros.h" -#include "dojump.h" -#include "fold-const-call.h" -#include "tree-vrp.h" -#include "tree-ssanames.h" -#include "selftest.h" -#include "selftest-rtl.h" -#include "print-rtl.h" -#include "intl.h" -#include "ifcvt.h" -#include "symbol-summary.h" -#include "ipa-prop.h" -#include "ipa-fnsummary.h" -#include "wide-int-bitmask.h" -#include "tree-vector-builder.h" -#include "debug.h" -#include "dwarf2out.h" -#include "i386-options.h" -#include "i386-builtins.h" -#include "i386-expand.h" - -/* Split one or more double-mode RTL references into pairs of half-mode - references. The RTL can be REG, offsettable MEM, integer constant, or - CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to - split and "num" is its length. lo_half and hi_half are output arrays - that parallel "operands". */ - -void -split_double_mode (machine_mode mode, rtx operands[], - int num, rtx lo_half[], rtx hi_half[]) -{ - machine_mode half_mode; - unsigned int byte; - rtx mem_op = NULL_RTX; - int mem_num = 0; - - switch (mode) - { - case E_TImode: - half_mode = DImode; - break; - case E_DImode: - half_mode = SImode; - break; - default: - gcc_unreachable (); - } - - byte = GET_MODE_SIZE (half_mode); - - while (num--) - { - rtx op = operands[num]; - - /* simplify_subreg refuse to split volatile memory addresses, - but we still have to handle it. */ - if (MEM_P (op)) - { - if (mem_op && rtx_equal_p (op, mem_op)) - { - lo_half[num] = lo_half[mem_num]; - hi_half[num] = hi_half[mem_num]; - } - else - { - mem_op = op; - mem_num = num; - lo_half[num] = adjust_address (op, half_mode, 0); - hi_half[num] = adjust_address (op, half_mode, byte); - } - } - else - { - lo_half[num] = simplify_gen_subreg (half_mode, op, - GET_MODE (op) == VOIDmode - ? mode : GET_MODE (op), 0); - hi_half[num] = simplify_gen_subreg (half_mode, op, - GET_MODE (op) == VOIDmode - ? mode : GET_MODE (op), byte); - } - } -} - -/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate - for the target. */ - -void -ix86_expand_clear (rtx dest) -{ - rtx tmp; - - /* We play register width games, which are only valid after reload. */ - gcc_assert (reload_completed); - - /* Avoid HImode and its attendant prefix byte. */ - if (GET_MODE_SIZE (GET_MODE (dest)) < 4) - dest = gen_rtx_REG (SImode, REGNO (dest)); - tmp = gen_rtx_SET (dest, const0_rtx); - - if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) - { - rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); - } - - emit_insn (tmp); -} - -void -ix86_expand_move (machine_mode mode, rtx operands[]) -{ - rtx op0, op1; - rtx tmp, addend = NULL_RTX; - enum tls_model model; - - op0 = operands[0]; - op1 = operands[1]; - - switch (GET_CODE (op1)) - { - case CONST: - tmp = XEXP (op1, 0); - - if (GET_CODE (tmp) != PLUS - || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF) - break; - - op1 = XEXP (tmp, 0); - addend = XEXP (tmp, 1); - /* FALLTHRU */ - - case SYMBOL_REF: - model = SYMBOL_REF_TLS_MODEL (op1); - - if (model) - op1 = legitimize_tls_address (op1, model, true); - else if (ix86_force_load_from_GOT_p (op1)) - { - /* Load the external function address via GOT slot to avoid PLT. */ - op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), - (TARGET_64BIT - ? UNSPEC_GOTPCREL - : UNSPEC_GOT)); - op1 = gen_rtx_CONST (Pmode, op1); - op1 = gen_const_mem (Pmode, op1); - set_mem_alias_set (op1, ix86_GOT_alias_set ()); - } - else - { - tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX); - if (tmp) - { - op1 = tmp; - if (!addend) - break; - } - else - { - op1 = operands[1]; - break; - } - } - - if (addend) - { - op1 = force_operand (op1, NULL_RTX); - op1 = expand_simple_binop (Pmode, PLUS, op1, addend, - op0, 1, OPTAB_DIRECT); - } - else - op1 = force_operand (op1, op0); - - if (op1 == op0) - return; - - op1 = convert_to_mode (mode, op1, 1); - - default: - break; - } - - if ((flag_pic || MACHOPIC_INDIRECT) - && symbolic_operand (op1, mode)) - { - if (TARGET_MACHO && !TARGET_64BIT) - { -#if TARGET_MACHO - /* dynamic-no-pic */ - if (MACHOPIC_INDIRECT) - { - rtx temp = (op0 && REG_P (op0) && mode == Pmode) - ? op0 : gen_reg_rtx (Pmode); - op1 = machopic_indirect_data_reference (op1, temp); - if (MACHOPIC_PURE) - op1 = machopic_legitimize_pic_address (op1, mode, - temp == op1 ? 0 : temp); - } - if (op0 != op1 && GET_CODE (op0) != MEM) - { - rtx insn = gen_rtx_SET (op0, op1); - emit_insn (insn); - return; - } - if (GET_CODE (op0) == MEM) - op1 = force_reg (Pmode, op1); - else - { - rtx temp = op0; - if (GET_CODE (temp) != REG) - temp = gen_reg_rtx (Pmode); - temp = legitimize_pic_address (op1, temp); - if (temp == op0) - return; - op1 = temp; - } - /* dynamic-no-pic */ -#endif - } - else - { - if (MEM_P (op0)) - op1 = force_reg (mode, op1); - else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode))) - { - rtx reg = can_create_pseudo_p () ? NULL_RTX : op0; - op1 = legitimize_pic_address (op1, reg); - if (op0 == op1) - return; - op1 = convert_to_mode (mode, op1, 1); - } - } - } - else - { - if (MEM_P (op0) - && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) - || !push_operand (op0, mode)) - && MEM_P (op1)) - op1 = force_reg (mode, op1); - - if (push_operand (op0, mode) - && ! general_no_elim_operand (op1, mode)) - op1 = copy_to_mode_reg (mode, op1); - - /* Force large constants in 64bit compilation into register - to get them CSEed. */ - if (can_create_pseudo_p () - && (mode == DImode) && TARGET_64BIT - && immediate_operand (op1, mode) - && !x86_64_zext_immediate_operand (op1, VOIDmode) - && !register_operand (op0, mode) - && optimize) - op1 = copy_to_mode_reg (mode, op1); - - if (can_create_pseudo_p () - && CONST_DOUBLE_P (op1)) - { - /* If we are loading a floating point constant to a register, - force the value to memory now, since we'll get better code - out the back end. */ - - op1 = validize_mem (force_const_mem (mode, op1)); - if (!register_operand (op0, mode)) - { - rtx temp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (temp, op1)); - emit_move_insn (op0, temp); - return; - } - } - } - - emit_insn (gen_rtx_SET (op0, op1)); -} - -void -ix86_expand_vector_move (machine_mode mode, rtx operands[]) -{ - rtx op0 = operands[0], op1 = operands[1]; - /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU - psABI since the biggest alignment is 4 byte for IA MCU psABI. */ - unsigned int align = (TARGET_IAMCU - ? GET_MODE_BITSIZE (mode) - : GET_MODE_ALIGNMENT (mode)); - - if (push_operand (op0, VOIDmode)) - op0 = emit_move_resolve_push (mode, op0); - - /* Force constants other than zero into memory. We do not know how - the instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if (can_create_pseudo_p () - && (CONSTANT_P (op1) - || (SUBREG_P (op1) - && CONSTANT_P (SUBREG_REG (op1)))) - && ((register_operand (op0, mode) - && !standard_sse_constant_p (op1, mode)) - /* ix86_expand_vector_move_misalign() does not like constants. */ - || (SSE_REG_MODE_P (mode) - && MEM_P (op0) - && MEM_ALIGN (op0) < align))) - { - if (SUBREG_P (op1)) - { - machine_mode imode = GET_MODE (SUBREG_REG (op1)); - rtx r = force_const_mem (imode, SUBREG_REG (op1)); - if (r) - r = validize_mem (r); - else - r = force_reg (imode, SUBREG_REG (op1)); - op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1)); - } - else - op1 = validize_mem (force_const_mem (mode, op1)); - } - - /* We need to check memory alignment for SSE mode since attribute - can make operands unaligned. */ - if (can_create_pseudo_p () - && SSE_REG_MODE_P (mode) - && ((MEM_P (op0) && (MEM_ALIGN (op0) < align)) - || (MEM_P (op1) && (MEM_ALIGN (op1) < align)))) - { - rtx tmp[2]; - - /* ix86_expand_vector_move_misalign() does not like both - arguments in memory. */ - if (!register_operand (op0, mode) - && !register_operand (op1, mode)) - op1 = force_reg (mode, op1); - - tmp[0] = op0; tmp[1] = op1; - ix86_expand_vector_move_misalign (mode, tmp); - return; - } - - /* Make operand1 a register if it isn't already. */ - if (can_create_pseudo_p () - && !register_operand (op0, mode) - && !register_operand (op1, mode)) - { - emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); - return; - } - - emit_insn (gen_rtx_SET (op0, op1)); -} - -/* Split 32-byte AVX unaligned load and store if needed. */ - -static void -ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) -{ - rtx m; - rtx (*extract) (rtx, rtx, rtx); - machine_mode mode; - - if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD) - || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE)) - { - emit_insn (gen_rtx_SET (op0, op1)); - return; - } - - rtx orig_op0 = NULL_RTX; - mode = GET_MODE (op0); - switch (GET_MODE_CLASS (mode)) - { - case MODE_VECTOR_INT: - case MODE_INT: - if (mode != V32QImode) - { - if (!MEM_P (op0)) - { - orig_op0 = op0; - op0 = gen_reg_rtx (V32QImode); - } - else - op0 = gen_lowpart (V32QImode, op0); - op1 = gen_lowpart (V32QImode, op1); - mode = V32QImode; - } - break; - case MODE_VECTOR_FLOAT: - break; - default: - gcc_unreachable (); - } - - switch (mode) - { - default: - gcc_unreachable (); - case E_V32QImode: - extract = gen_avx_vextractf128v32qi; - mode = V16QImode; - break; - case E_V8SFmode: - extract = gen_avx_vextractf128v8sf; - mode = V4SFmode; - break; - case E_V4DFmode: - extract = gen_avx_vextractf128v4df; - mode = V2DFmode; - break; - } - - if (MEM_P (op1)) - { - rtx r = gen_reg_rtx (mode); - m = adjust_address (op1, mode, 0); - emit_move_insn (r, m); - m = adjust_address (op1, mode, 16); - r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); - emit_move_insn (op0, r); - } - else if (MEM_P (op0)) - { - m = adjust_address (op0, mode, 0); - emit_insn (extract (m, op1, const0_rtx)); - m = adjust_address (op0, mode, 16); - emit_insn (extract (m, copy_rtx (op1), const1_rtx)); - } - else - gcc_unreachable (); - - if (orig_op0) - emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); -} - -/* Implement the movmisalign patterns for SSE. Non-SSE modes go - straight to ix86_expand_vector_move. */ -/* Code generation for scalar reg-reg moves of single and double precision data: - if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) - movaps reg, reg - else - movss reg, reg - if (x86_sse_partial_reg_dependency == true) - movapd reg, reg - else - movsd reg, reg - - Code generation for scalar loads of double precision data: - if (x86_sse_split_regs == true) - movlpd mem, reg (gas syntax) - else - movsd mem, reg - - Code generation for unaligned packed loads of single precision data - (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): - if (x86_sse_unaligned_move_optimal) - movups mem, reg - - if (x86_sse_partial_reg_dependency == true) - { - xorps reg, reg - movlps mem, reg - movhps mem+8, reg - } - else - { - movlps mem, reg - movhps mem+8, reg - } - - Code generation for unaligned packed loads of double precision data - (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): - if (x86_sse_unaligned_move_optimal) - movupd mem, reg - - if (x86_sse_split_regs == true) - { - movlpd mem, reg - movhpd mem+8, reg - } - else - { - movsd mem, reg - movhpd mem+8, reg - } - */ - -void -ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) -{ - rtx op0, op1, m; - - op0 = operands[0]; - op1 = operands[1]; - - /* Use unaligned load/store for AVX512 or when optimizing for size. */ - if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ()) - { - emit_insn (gen_rtx_SET (op0, op1)); - return; - } - - if (TARGET_AVX) - { - if (GET_MODE_SIZE (mode) == 32) - ix86_avx256_split_vector_move_misalign (op0, op1); - else - /* Always use 128-bit mov_internal pattern for AVX. */ - emit_insn (gen_rtx_SET (op0, op1)); - return; - } - - if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL - || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - emit_insn (gen_rtx_SET (op0, op1)); - return; - } - - /* ??? If we have typed data, then it would appear that using - movdqu is the only way to get unaligned data loaded with - integer type. */ - if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) - { - emit_insn (gen_rtx_SET (op0, op1)); - return; - } - - if (MEM_P (op1)) - { - if (TARGET_SSE2 && mode == V2DFmode) - { - rtx zero; - - /* When SSE registers are split into halves, we can avoid - writing to the top half twice. */ - if (TARGET_SSE_SPLIT_REGS) - { - emit_clobber (op0); - zero = op0; - } - else - { - /* ??? Not sure about the best option for the Intel chips. - The following would seem to satisfy; the register is - entirely cleared, breaking the dependency chain. We - then store to the upper half, with a dependency depth - of one. A rumor has it that Intel recommends two movsd - followed by an unpacklpd, but this is unconfirmed. And - given that the dependency depth of the unpacklpd would - still be one, I'm not sure why this would be better. */ - zero = CONST0_RTX (V2DFmode); - } - - m = adjust_address (op1, DFmode, 0); - emit_insn (gen_sse2_loadlpd (op0, zero, m)); - m = adjust_address (op1, DFmode, 8); - emit_insn (gen_sse2_loadhpd (op0, op0, m)); - } - else - { - rtx t; - - if (mode != V4SFmode) - t = gen_reg_rtx (V4SFmode); - else - t = op0; - - if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) - emit_move_insn (t, CONST0_RTX (V4SFmode)); - else - emit_clobber (t); - - m = adjust_address (op1, V2SFmode, 0); - emit_insn (gen_sse_loadlps (t, t, m)); - m = adjust_address (op1, V2SFmode, 8); - emit_insn (gen_sse_loadhps (t, t, m)); - if (mode != V4SFmode) - emit_move_insn (op0, gen_lowpart (mode, t)); - } - } - else if (MEM_P (op0)) - { - if (TARGET_SSE2 && mode == V2DFmode) - { - m = adjust_address (op0, DFmode, 0); - emit_insn (gen_sse2_storelpd (m, op1)); - m = adjust_address (op0, DFmode, 8); - emit_insn (gen_sse2_storehpd (m, op1)); - } - else - { - if (mode != V4SFmode) - op1 = gen_lowpart (V4SFmode, op1); - - m = adjust_address (op0, V2SFmode, 0); - emit_insn (gen_sse_storelps (m, op1)); - m = adjust_address (op0, V2SFmode, 8); - emit_insn (gen_sse_storehps (m, copy_rtx (op1))); - } - } - else - gcc_unreachable (); -} - -/* Move bits 64:95 to bits 32:63. */ - -void -ix86_move_vector_high_sse_to_mmx (rtx op) -{ - rtx mask = gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (4, GEN_INT (0), GEN_INT (2), - GEN_INT (0), GEN_INT (0))); - rtx dest = lowpart_subreg (V4SImode, op, GET_MODE (op)); - op = gen_rtx_VEC_SELECT (V4SImode, dest, mask); - rtx insn = gen_rtx_SET (dest, op); - emit_insn (insn); -} - -/* Split MMX pack with signed/unsigned saturation with SSE/SSE2. */ - -void -ix86_split_mmx_pack (rtx operands[], enum rtx_code code) -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - rtx op2 = operands[2]; - - machine_mode dmode = GET_MODE (op0); - machine_mode smode = GET_MODE (op1); - machine_mode inner_dmode = GET_MODE_INNER (dmode); - machine_mode inner_smode = GET_MODE_INNER (smode); - - /* Get the corresponding SSE mode for destination. */ - int nunits = 16 / GET_MODE_SIZE (inner_dmode); - machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode), - nunits).require (); - machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode), - nunits / 2).require (); - - /* Get the corresponding SSE mode for source. */ - nunits = 16 / GET_MODE_SIZE (inner_smode); - machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode), - nunits).require (); - - /* Generate SSE pack with signed/unsigned saturation. */ - rtx dest = lowpart_subreg (sse_dmode, op0, GET_MODE (op0)); - op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1)); - op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2)); - - op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1); - op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2); - rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode, - op1, op2)); - emit_insn (insn); - - ix86_move_vector_high_sse_to_mmx (op0); -} - -/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. */ - -void -ix86_split_mmx_punpck (rtx operands[], bool high_p) -{ - rtx op0 = operands[0]; - rtx op1 = operands[1]; - rtx op2 = operands[2]; - machine_mode mode = GET_MODE (op0); - rtx mask; - /* The corresponding SSE mode. */ - machine_mode sse_mode, double_sse_mode; - - switch (mode) - { - case E_V8QImode: - sse_mode = V16QImode; - double_sse_mode = V32QImode; - mask = gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (16, - GEN_INT (0), GEN_INT (16), - GEN_INT (1), GEN_INT (17), - GEN_INT (2), GEN_INT (18), - GEN_INT (3), GEN_INT (19), - GEN_INT (4), GEN_INT (20), - GEN_INT (5), GEN_INT (21), - GEN_INT (6), GEN_INT (22), - GEN_INT (7), GEN_INT (23))); - break; - - case E_V4HImode: - sse_mode = V8HImode; - double_sse_mode = V16HImode; - mask = gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (8, - GEN_INT (0), GEN_INT (8), - GEN_INT (1), GEN_INT (9), - GEN_INT (2), GEN_INT (10), - GEN_INT (3), GEN_INT (11))); - break; - - case E_V2SImode: - sse_mode = V4SImode; - double_sse_mode = V8SImode; - mask = gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (4, - GEN_INT (0), GEN_INT (4), - GEN_INT (1), GEN_INT (5))); - break; - - default: - gcc_unreachable (); - } - - /* Generate SSE punpcklXX. */ - rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0)); - op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1)); - op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2)); - - op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2); - op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask); - rtx insn = gen_rtx_SET (dest, op2); - emit_insn (insn); - - if (high_p) - { - /* Move bits 64:127 to bits 0:63. */ - mask = gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (4, GEN_INT (2), GEN_INT (3), - GEN_INT (0), GEN_INT (0))); - dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest)); - op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask); - insn = gen_rtx_SET (dest, op1); - emit_insn (insn); - } -} - -/* Helper function of ix86_fixup_binary_operands to canonicalize - operand order. Returns true if the operands should be swapped. */ - -static bool -ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode, - rtx operands[]) -{ - rtx dst = operands[0]; - rtx src1 = operands[1]; - rtx src2 = operands[2]; - - /* If the operation is not commutative, we can't do anything. */ - if (GET_RTX_CLASS (code) != RTX_COMM_ARITH - && GET_RTX_CLASS (code) != RTX_COMM_COMPARE) - return false; - - /* Highest priority is that src1 should match dst. */ - if (rtx_equal_p (dst, src1)) - return false; - if (rtx_equal_p (dst, src2)) - return true; - - /* Next highest priority is that immediate constants come second. */ - if (immediate_operand (src2, mode)) - return false; - if (immediate_operand (src1, mode)) - return true; - - /* Lowest priority is that memory references should come second. */ - if (MEM_P (src2)) - return false; - if (MEM_P (src1)) - return true; - - return false; -} - - -/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the - destination to use for the operation. If different from the true - destination in operands[0], a copy operation will be required. */ - -rtx -ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode, - rtx operands[]) -{ - rtx dst = operands[0]; - rtx src1 = operands[1]; - rtx src2 = operands[2]; - - /* Canonicalize operand order. */ - if (ix86_swap_binary_operands_p (code, mode, operands)) - { - /* It is invalid to swap operands of different modes. */ - gcc_assert (GET_MODE (src1) == GET_MODE (src2)); - - std::swap (src1, src2); - } - - /* Both source operands cannot be in memory. */ - if (MEM_P (src1) && MEM_P (src2)) - { - /* Optimization: Only read from memory once. */ - if (rtx_equal_p (src1, src2)) - { - src2 = force_reg (mode, src2); - src1 = src2; - } - else if (rtx_equal_p (dst, src1)) - src2 = force_reg (mode, src2); - else - src1 = force_reg (mode, src1); - } - - /* If the destination is memory, and we do not have matching source - operands, do things in registers. */ - if (MEM_P (dst) && !rtx_equal_p (dst, src1)) - dst = gen_reg_rtx (mode); - - /* Source 1 cannot be a constant. */ - if (CONSTANT_P (src1)) - src1 = force_reg (mode, src1); - - /* Source 1 cannot be a non-matching memory. */ - if (MEM_P (src1) && !rtx_equal_p (dst, src1)) - src1 = force_reg (mode, src1); - - /* Improve address combine. */ - if (code == PLUS - && GET_MODE_CLASS (mode) == MODE_INT - && MEM_P (src2)) - src2 = force_reg (mode, src2); - - operands[1] = src1; - operands[2] = src2; - return dst; -} - -/* Similarly, but assume that the destination has already been - set up properly. */ - -void -ix86_fixup_binary_operands_no_copy (enum rtx_code code, - machine_mode mode, rtx operands[]) -{ - rtx dst = ix86_fixup_binary_operands (code, mode, operands); - gcc_assert (dst == operands[0]); -} - -/* Attempt to expand a binary operator. Make the expansion closer to the - actual machine, then just general_operand, which will allow 3 separate - memory references (one output, two input) in a single insn. */ - -void -ix86_expand_binary_operator (enum rtx_code code, machine_mode mode, - rtx operands[]) -{ - rtx src1, src2, dst, op, clob; - - dst = ix86_fixup_binary_operands (code, mode, operands); - src1 = operands[1]; - src2 = operands[2]; - - /* Emit the instruction. */ - - op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2)); - - if (reload_completed - && code == PLUS - && !rtx_equal_p (dst, src1)) - { - /* This is going to be an LEA; avoid splitting it later. */ - emit_insn (op); - } - else - { - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); - } - - /* Fix up the destination if needed. */ - if (dst != operands[0]) - emit_move_insn (operands[0], dst); -} - -/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with - the given OPERANDS. */ - -void -ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode, - rtx operands[]) -{ - rtx op1 = NULL_RTX, op2 = NULL_RTX; - if (SUBREG_P (operands[1])) - { - op1 = operands[1]; - op2 = operands[2]; - } - else if (SUBREG_P (operands[2])) - { - op1 = operands[2]; - op2 = operands[1]; - } - /* Optimize (__m128i) d | (__m128i) e and similar code - when d and e are float vectors into float vector logical - insn. In C/C++ without using intrinsics there is no other way - to express vector logical operation on float vectors than - to cast them temporarily to integer vectors. */ - if (op1 - && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL - && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR) - && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT - && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode) - && SUBREG_BYTE (op1) == 0 - && (GET_CODE (op2) == CONST_VECTOR - || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2)) - && SUBREG_BYTE (op2) == 0)) - && can_create_pseudo_p ()) - { - rtx dst; - switch (GET_MODE (SUBREG_REG (op1))) - { - case E_V4SFmode: - case E_V8SFmode: - case E_V16SFmode: - case E_V2DFmode: - case E_V4DFmode: - case E_V8DFmode: - dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1))); - if (GET_CODE (op2) == CONST_VECTOR) - { - op2 = gen_lowpart (GET_MODE (dst), op2); - op2 = force_reg (GET_MODE (dst), op2); - } - else - { - op1 = operands[1]; - op2 = SUBREG_REG (operands[2]); - if (!vector_operand (op2, GET_MODE (dst))) - op2 = force_reg (GET_MODE (dst), op2); - } - op1 = SUBREG_REG (op1); - if (!vector_operand (op1, GET_MODE (dst))) - op1 = force_reg (GET_MODE (dst), op1); - emit_insn (gen_rtx_SET (dst, - gen_rtx_fmt_ee (code, GET_MODE (dst), - op1, op2))); - emit_move_insn (operands[0], gen_lowpart (mode, dst)); - return; - default: - break; - } - } - if (!vector_operand (operands[1], mode)) - operands[1] = force_reg (mode, operands[1]); - if (!vector_operand (operands[2], mode)) - operands[2] = force_reg (mode, operands[2]); - ix86_fixup_binary_operands_no_copy (code, mode, operands); - emit_insn (gen_rtx_SET (operands[0], - gen_rtx_fmt_ee (code, mode, operands[1], - operands[2]))); -} - -/* Return TRUE or FALSE depending on whether the binary operator meets the - appropriate constraints. */ - -bool -ix86_binary_operator_ok (enum rtx_code code, machine_mode mode, - rtx operands[3]) -{ - rtx dst = operands[0]; - rtx src1 = operands[1]; - rtx src2 = operands[2]; - - /* Both source operands cannot be in memory. */ - if (MEM_P (src1) && MEM_P (src2)) - return false; - - /* Canonicalize operand order for commutative operators. */ - if (ix86_swap_binary_operands_p (code, mode, operands)) - std::swap (src1, src2); - - /* If the destination is memory, we must have a matching source operand. */ - if (MEM_P (dst) && !rtx_equal_p (dst, src1)) - return false; - - /* Source 1 cannot be a constant. */ - if (CONSTANT_P (src1)) - return false; - - /* Source 1 cannot be a non-matching memory. */ - if (MEM_P (src1) && !rtx_equal_p (dst, src1)) - /* Support "andhi/andsi/anddi" as a zero-extending move. */ - return (code == AND - && (mode == HImode - || mode == SImode - || (TARGET_64BIT && mode == DImode)) - && satisfies_constraint_L (src2)); - - return true; -} - -/* Attempt to expand a unary operator. Make the expansion closer to the - actual machine, then just general_operand, which will allow 2 separate - memory references (one output, one input) in a single insn. */ - -void -ix86_expand_unary_operator (enum rtx_code code, machine_mode mode, - rtx operands[]) -{ - bool matching_memory = false; - rtx src, dst, op, clob; - - dst = operands[0]; - src = operands[1]; - - /* If the destination is memory, and we do not have matching source - operands, do things in registers. */ - if (MEM_P (dst)) - { - if (rtx_equal_p (dst, src)) - matching_memory = true; - else - dst = gen_reg_rtx (mode); - } - - /* When source operand is memory, destination must match. */ - if (MEM_P (src) && !matching_memory) - src = force_reg (mode, src); - - /* Emit the instruction. */ - - op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src)); - - if (code == NOT) - emit_insn (op); - else - { - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); - } - - /* Fix up the destination if needed. */ - if (dst != operands[0]) - emit_move_insn (operands[0], dst); -} - -/* Predict just emitted jump instruction to be taken with probability PROB. */ - -static void -predict_jump (int prob) -{ - rtx_insn *insn = get_last_insn (); - gcc_assert (JUMP_P (insn)); - add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob)); -} - -/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and - divisor are within the range [0-255]. */ - -void -ix86_split_idivmod (machine_mode mode, rtx operands[], - bool unsigned_p) -{ - rtx_code_label *end_label, *qimode_label; - rtx div, mod; - rtx_insn *insn; - rtx scratch, tmp0, tmp1, tmp2; - rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx); - - switch (mode) - { - case E_SImode: - if (GET_MODE (operands[0]) == SImode) - { - if (GET_MODE (operands[1]) == SImode) - gen_divmod4_1 = unsigned_p ? gen_udivmodsi4_1 : gen_divmodsi4_1; - else - gen_divmod4_1 - = unsigned_p ? gen_udivmodsi4_zext_2 : gen_divmodsi4_zext_2; - } - else - gen_divmod4_1 - = unsigned_p ? gen_udivmodsi4_zext_1 : gen_divmodsi4_zext_1; - break; - - case E_DImode: - gen_divmod4_1 = unsigned_p ? gen_udivmoddi4_1 : gen_divmoddi4_1; - break; - - default: - gcc_unreachable (); - } - - end_label = gen_label_rtx (); - qimode_label = gen_label_rtx (); - - scratch = gen_reg_rtx (mode); - - /* Use 8bit unsigned divimod if dividend and divisor are within - the range [0-255]. */ - emit_move_insn (scratch, operands[2]); - scratch = expand_simple_binop (mode, IOR, scratch, operands[3], - scratch, 1, OPTAB_DIRECT); - emit_insn (gen_test_ccno_1 (mode, scratch, GEN_INT (-0x100))); - tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG); - tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx); - tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0, - gen_rtx_LABEL_REF (VOIDmode, qimode_label), - pc_rtx); - insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0)); - predict_jump (REG_BR_PROB_BASE * 50 / 100); - JUMP_LABEL (insn) = qimode_label; - - /* Generate original signed/unsigned divimod. */ - div = gen_divmod4_1 (operands[0], operands[1], - operands[2], operands[3]); - emit_insn (div); - - /* Branch to the end. */ - emit_jump_insn (gen_jump (end_label)); - emit_barrier (); - - /* Generate 8bit unsigned divide. */ - emit_label (qimode_label); - /* Don't use operands[0] for result of 8bit divide since not all - registers support QImode ZERO_EXTRACT. */ - tmp0 = lowpart_subreg (HImode, scratch, mode); - tmp1 = lowpart_subreg (HImode, operands[2], mode); - tmp2 = lowpart_subreg (QImode, operands[3], mode); - emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2)); - - if (unsigned_p) - { - div = gen_rtx_UDIV (mode, operands[2], operands[3]); - mod = gen_rtx_UMOD (mode, operands[2], operands[3]); - } - else - { - div = gen_rtx_DIV (mode, operands[2], operands[3]); - mod = gen_rtx_MOD (mode, operands[2], operands[3]); - } - if (mode == SImode) - { - if (GET_MODE (operands[0]) != SImode) - div = gen_rtx_ZERO_EXTEND (DImode, div); - if (GET_MODE (operands[1]) != SImode) - mod = gen_rtx_ZERO_EXTEND (DImode, mod); - } - - /* Extract remainder from AH. */ - tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]), - tmp0, GEN_INT (8), GEN_INT (8)); - if (REG_P (operands[1])) - insn = emit_move_insn (operands[1], tmp1); - else - { - /* Need a new scratch register since the old one has result - of 8bit divide. */ - scratch = gen_reg_rtx (GET_MODE (operands[1])); - emit_move_insn (scratch, tmp1); - insn = emit_move_insn (operands[1], scratch); - } - set_unique_reg_note (insn, REG_EQUAL, mod); - - /* Zero extend quotient from AL. */ - tmp1 = gen_lowpart (QImode, tmp0); - insn = emit_insn (gen_extend_insn - (operands[0], tmp1, - GET_MODE (operands[0]), QImode, 1)); - set_unique_reg_note (insn, REG_EQUAL, div); - - emit_label (end_label); -} - -/* Emit x86 binary operand CODE in mode MODE, where the first operand - matches destination. RTX includes clobber of FLAGS_REG. */ - -void -ix86_emit_binop (enum rtx_code code, machine_mode mode, - rtx dst, rtx src) -{ - rtx op, clob; - - op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src)); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - - emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); -} - -/* Return true if regno1 def is nearest to the insn. */ - -static bool -find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2) -{ - rtx_insn *prev = insn; - rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn)); - - if (insn == start) - return false; - while (prev && prev != start) - { - if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev)) - { - prev = PREV_INSN (prev); - continue; - } - if (insn_defines_reg (regno1, INVALID_REGNUM, prev)) - return true; - else if (insn_defines_reg (regno2, INVALID_REGNUM, prev)) - return false; - prev = PREV_INSN (prev); - } - - /* None of the regs is defined in the bb. */ - return false; -} - -/* Split lea instructions into a sequence of instructions - which are executed on ALU to avoid AGU stalls. - It is assumed that it is allowed to clobber flags register - at lea position. */ - -void -ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode) -{ - unsigned int regno0, regno1, regno2; - struct ix86_address parts; - rtx target, tmp; - int ok, adds; - - ok = ix86_decompose_address (operands[1], &parts); - gcc_assert (ok); - - target = gen_lowpart (mode, operands[0]); - - regno0 = true_regnum (target); - regno1 = INVALID_REGNUM; - regno2 = INVALID_REGNUM; - - if (parts.base) - { - parts.base = gen_lowpart (mode, parts.base); - regno1 = true_regnum (parts.base); - } - - if (parts.index) - { - parts.index = gen_lowpart (mode, parts.index); - regno2 = true_regnum (parts.index); - } - - if (parts.disp) - parts.disp = gen_lowpart (mode, parts.disp); - - if (parts.scale > 1) - { - /* Case r1 = r1 + ... */ - if (regno1 == regno0) - { - /* If we have a case r1 = r1 + C * r2 then we - should use multiplication which is very - expensive. Assume cost model is wrong if we - have such case here. */ - gcc_assert (regno2 != regno0); - - for (adds = parts.scale; adds > 0; adds--) - ix86_emit_binop (PLUS, mode, target, parts.index); - } - else - { - /* r1 = r2 + r3 * C case. Need to move r3 into r1. */ - if (regno0 != regno2) - emit_insn (gen_rtx_SET (target, parts.index)); - - /* Use shift for scaling. */ - ix86_emit_binop (ASHIFT, mode, target, - GEN_INT (exact_log2 (parts.scale))); - - if (parts.base) - ix86_emit_binop (PLUS, mode, target, parts.base); - - if (parts.disp && parts.disp != const0_rtx) - ix86_emit_binop (PLUS, mode, target, parts.disp); - } - } - else if (!parts.base && !parts.index) - { - gcc_assert(parts.disp); - emit_insn (gen_rtx_SET (target, parts.disp)); - } - else - { - if (!parts.base) - { - if (regno0 != regno2) - emit_insn (gen_rtx_SET (target, parts.index)); - } - else if (!parts.index) - { - if (regno0 != regno1) - emit_insn (gen_rtx_SET (target, parts.base)); - } - else - { - if (regno0 == regno1) - tmp = parts.index; - else if (regno0 == regno2) - tmp = parts.base; - else - { - rtx tmp1; - - /* Find better operand for SET instruction, depending - on which definition is farther from the insn. */ - if (find_nearest_reg_def (insn, regno1, regno2)) - tmp = parts.index, tmp1 = parts.base; - else - tmp = parts.base, tmp1 = parts.index; - - emit_insn (gen_rtx_SET (target, tmp)); - - if (parts.disp && parts.disp != const0_rtx) - ix86_emit_binop (PLUS, mode, target, parts.disp); - - ix86_emit_binop (PLUS, mode, target, tmp1); - return; - } - - ix86_emit_binop (PLUS, mode, target, tmp); - } - - if (parts.disp && parts.disp != const0_rtx) - ix86_emit_binop (PLUS, mode, target, parts.disp); - } -} - -/* Post-reload splitter for converting an SF or DFmode value in an - SSE register into an unsigned SImode. */ - -void -ix86_split_convert_uns_si_sse (rtx operands[]) -{ - machine_mode vecmode; - rtx value, large, zero_or_two31, input, two31, x; - - large = operands[1]; - zero_or_two31 = operands[2]; - input = operands[3]; - two31 = operands[4]; - vecmode = GET_MODE (large); - value = gen_rtx_REG (vecmode, REGNO (operands[0])); - - /* Load up the value into the low element. We must ensure that the other - elements are valid floats -- zero is the easiest such value. */ - if (MEM_P (input)) - { - if (vecmode == V4SFmode) - emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input)); - else - emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input)); - } - else - { - input = gen_rtx_REG (vecmode, REGNO (input)); - emit_move_insn (value, CONST0_RTX (vecmode)); - if (vecmode == V4SFmode) - emit_insn (gen_sse_movss (value, value, input)); - else - emit_insn (gen_sse2_movsd (value, value, input)); - } - - emit_move_insn (large, two31); - emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31); - - x = gen_rtx_fmt_ee (LE, vecmode, large, value); - emit_insn (gen_rtx_SET (large, x)); - - x = gen_rtx_AND (vecmode, zero_or_two31, large); - emit_insn (gen_rtx_SET (zero_or_two31, x)); - - x = gen_rtx_MINUS (vecmode, value, zero_or_two31); - emit_insn (gen_rtx_SET (value, x)); - - large = gen_rtx_REG (V4SImode, REGNO (large)); - emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31))); - - x = gen_rtx_REG (V4SImode, REGNO (value)); - if (vecmode == V4SFmode) - emit_insn (gen_fix_truncv4sfv4si2 (x, value)); - else - emit_insn (gen_sse2_cvttpd2dq (x, value)); - value = x; - - emit_insn (gen_xorv4si3 (value, value, large)); -} - -static bool ix86_expand_vector_init_one_nonzero (bool mmx_ok, - machine_mode mode, rtx target, - rtx var, int one_var); - -/* Convert an unsigned DImode value into a DFmode, using only SSE. - Expects the 64-bit DImode to be supplied in a pair of integral - registers. Requires SSE2; will use SSE3 if available. For x86_32, - -mfpmath=sse, !optimize_size only. */ - -void -ix86_expand_convert_uns_didf_sse (rtx target, rtx input) -{ - REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; - rtx int_xmm, fp_xmm; - rtx biases, exponents; - rtx x; - - int_xmm = gen_reg_rtx (V4SImode); - if (TARGET_INTER_UNIT_MOVES_TO_VEC) - emit_insn (gen_movdi_to_sse (int_xmm, input)); - else if (TARGET_SSE_SPLIT_REGS) - { - emit_clobber (int_xmm); - emit_move_insn (gen_lowpart (DImode, int_xmm), input); - } - else - { - x = gen_reg_rtx (V2DImode); - ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0); - emit_move_insn (int_xmm, gen_lowpart (V4SImode, x)); - } - - x = gen_rtx_CONST_VECTOR (V4SImode, - gen_rtvec (4, GEN_INT (0x43300000UL), - GEN_INT (0x45300000UL), - const0_rtx, const0_rtx)); - exponents = validize_mem (force_const_mem (V4SImode, x)); - - /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ - emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents)); - - /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) - yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). - Similarly (0x45300000UL ## fp_value_hi_xmm) yields - (0x1.0p84 + double(fp_value_hi_xmm)). - Note these exponents differ by 32. */ - - fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm)); - - /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values - in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */ - real_ldexp (&bias_lo_rvt, &dconst1, 52); - real_ldexp (&bias_hi_rvt, &dconst1, 84); - biases = const_double_from_real_value (bias_lo_rvt, DFmode); - x = const_double_from_real_value (bias_hi_rvt, DFmode); - biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x)); - biases = validize_mem (force_const_mem (V2DFmode, biases)); - emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases)); - - /* Add the upper and lower DFmode values together. */ - if (TARGET_SSE3) - emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm)); - else - { - x = copy_to_mode_reg (V2DFmode, fp_xmm); - emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm)); - emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); - } - - ix86_expand_vector_extract (false, target, fp_xmm, 0); -} - -/* Not used, but eases macroization of patterns. */ -void -ix86_expand_convert_uns_sixf_sse (rtx, rtx) -{ - gcc_unreachable (); -} - -/* Convert an unsigned SImode value into a DFmode. Only currently used - for SSE, but applicable anywhere. */ - -void -ix86_expand_convert_uns_sidf_sse (rtx target, rtx input) -{ - REAL_VALUE_TYPE TWO31r; - rtx x, fp; - - x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1), - NULL, 1, OPTAB_DIRECT); - - fp = gen_reg_rtx (DFmode); - emit_insn (gen_floatsidf2 (fp, x)); - - real_ldexp (&TWO31r, &dconst1, 31); - x = const_double_from_real_value (TWO31r, DFmode); - - x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT); - if (x != target) - emit_move_insn (target, x); -} - -/* Convert a signed DImode value into a DFmode. Only used for SSE in - 32-bit mode; otherwise we have a direct convert instruction. */ - -void -ix86_expand_convert_sign_didf_sse (rtx target, rtx input) -{ - REAL_VALUE_TYPE TWO32r; - rtx fp_lo, fp_hi, x; - - fp_lo = gen_reg_rtx (DFmode); - fp_hi = gen_reg_rtx (DFmode); - - emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input))); - - real_ldexp (&TWO32r, &dconst1, 32); - x = const_double_from_real_value (TWO32r, DFmode); - fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT); - - ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input)); - - x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target, - 0, OPTAB_DIRECT); - if (x != target) - emit_move_insn (target, x); -} - -/* Convert an unsigned SImode value into a SFmode, using only SSE. - For x86_32, -mfpmath=sse, !optimize_size only. */ -void -ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) -{ - REAL_VALUE_TYPE ONE16r; - rtx fp_hi, fp_lo, int_hi, int_lo, x; - - real_ldexp (&ONE16r, &dconst1, 16); - x = const_double_from_real_value (ONE16r, SFmode); - int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff), - NULL, 0, OPTAB_DIRECT); - int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16), - NULL, 0, OPTAB_DIRECT); - fp_hi = gen_reg_rtx (SFmode); - fp_lo = gen_reg_rtx (SFmode); - emit_insn (gen_floatsisf2 (fp_hi, int_hi)); - emit_insn (gen_floatsisf2 (fp_lo, int_lo)); - fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, - 0, OPTAB_DIRECT); - fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, - 0, OPTAB_DIRECT); - if (!rtx_equal_p (target, fp_hi)) - emit_move_insn (target, fp_hi); -} - -/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert - a vector of unsigned ints VAL to vector of floats TARGET. */ - -void -ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val) -{ - rtx tmp[8]; - REAL_VALUE_TYPE TWO16r; - machine_mode intmode = GET_MODE (val); - machine_mode fltmode = GET_MODE (target); - rtx (*cvt) (rtx, rtx); - - if (intmode == V4SImode) - cvt = gen_floatv4siv4sf2; - else - cvt = gen_floatv8siv8sf2; - tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff)); - tmp[0] = force_reg (intmode, tmp[0]); - tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1, - OPTAB_DIRECT); - tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16), - NULL_RTX, 1, OPTAB_DIRECT); - tmp[3] = gen_reg_rtx (fltmode); - emit_insn (cvt (tmp[3], tmp[1])); - tmp[4] = gen_reg_rtx (fltmode); - emit_insn (cvt (tmp[4], tmp[2])); - real_ldexp (&TWO16r, &dconst1, 16); - tmp[5] = const_double_from_real_value (TWO16r, SFmode); - tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5])); - tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1, - OPTAB_DIRECT); - tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1, - OPTAB_DIRECT); - if (tmp[7] != target) - emit_move_insn (target, tmp[7]); -} - -/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc* - pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*. - This is done by doing just signed conversion if < 0x1p31, and otherwise by - subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */ - -rtx -ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp) -{ - REAL_VALUE_TYPE TWO31r; - rtx two31r, tmp[4]; - machine_mode mode = GET_MODE (val); - machine_mode scalarmode = GET_MODE_INNER (mode); - machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode; - rtx (*cmp) (rtx, rtx, rtx, rtx); - int i; - - for (i = 0; i < 3; i++) - tmp[i] = gen_reg_rtx (mode); - real_ldexp (&TWO31r, &dconst1, 31); - two31r = const_double_from_real_value (TWO31r, scalarmode); - two31r = ix86_build_const_vector (mode, 1, two31r); - two31r = force_reg (mode, two31r); - switch (mode) - { - case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break; - case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break; - case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break; - case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break; - default: gcc_unreachable (); - } - tmp[3] = gen_rtx_LE (mode, two31r, val); - emit_insn (cmp (tmp[0], two31r, val, tmp[3])); - tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1], - 0, OPTAB_DIRECT); - if (intmode == V4SImode || TARGET_AVX2) - *xorp = expand_simple_binop (intmode, ASHIFT, - gen_lowpart (intmode, tmp[0]), - GEN_INT (31), NULL_RTX, 0, - OPTAB_DIRECT); - else - { - rtx two31 = gen_int_mode (HOST_WIDE_INT_1U << 31, SImode); - two31 = ix86_build_const_vector (intmode, 1, two31); - *xorp = expand_simple_binop (intmode, AND, - gen_lowpart (intmode, tmp[0]), - two31, NULL_RTX, 0, - OPTAB_DIRECT); - } - return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2], - 0, OPTAB_DIRECT); -} - -/* Generate code for floating point ABS or NEG. */ - -void -ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, - rtx operands[]) -{ - rtx set, dst, src; - bool use_sse = false; - bool vector_mode = VECTOR_MODE_P (mode); - machine_mode vmode = mode; - rtvec par; - - if (vector_mode || mode == TFmode) - use_sse = true; - else if (TARGET_SSE_MATH) - { - use_sse = SSE_FLOAT_MODE_P (mode); - if (mode == SFmode) - vmode = V4SFmode; - else if (mode == DFmode) - vmode = V2DFmode; - } - - dst = operands[0]; - src = operands[1]; - - set = gen_rtx_fmt_e (code, mode, src); - set = gen_rtx_SET (dst, set); - - if (use_sse) - { - rtx mask, use, clob; - - /* NEG and ABS performed with SSE use bitwise mask operations. - Create the appropriate mask now. */ - mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS); - use = gen_rtx_USE (VOIDmode, mask); - if (vector_mode || mode == TFmode) - par = gen_rtvec (2, set, use); - else - { - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (3, set, use, clob); - } - } - else - { - rtx clob; - - /* Changing of sign for FP values is doable using integer unit too. */ - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, set, clob); - } - - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); -} - -/* Deconstruct a floating point ABS or NEG operation - with integer registers into integer operations. */ - -void -ix86_split_fp_absneg_operator (enum rtx_code code, machine_mode mode, - rtx operands[]) -{ - enum rtx_code absneg_op; - rtx dst, set; - - gcc_assert (operands_match_p (operands[0], operands[1])); - - switch (mode) - { - case E_SFmode: - dst = gen_lowpart (SImode, operands[0]); - - if (code == ABS) - { - set = gen_int_mode (0x7fffffff, SImode); - absneg_op = AND; - } - else - { - set = gen_int_mode (0x80000000, SImode); - absneg_op = XOR; - } - set = gen_rtx_fmt_ee (absneg_op, SImode, dst, set); - break; - - case E_DFmode: - if (TARGET_64BIT) - { - dst = gen_lowpart (DImode, operands[0]); - dst = gen_rtx_ZERO_EXTRACT (DImode, dst, const1_rtx, GEN_INT (63)); - - if (code == ABS) - set = const0_rtx; - else - set = gen_rtx_NOT (DImode, dst); - } - else - { - dst = gen_highpart (SImode, operands[0]); - - if (code == ABS) - { - set = gen_int_mode (0x7fffffff, SImode); - absneg_op = AND; - } - else - { - set = gen_int_mode (0x80000000, SImode); - absneg_op = XOR; - } - set = gen_rtx_fmt_ee (absneg_op, SImode, dst, set); - } - break; - - case E_XFmode: - dst = gen_rtx_REG (SImode, - REGNO (operands[0]) + (TARGET_64BIT ? 1 : 2)); - if (code == ABS) - { - set = GEN_INT (0x7fff); - absneg_op = AND; - } - else - { - set = GEN_INT (0x8000); - absneg_op = XOR; - } - set = gen_rtx_fmt_ee (absneg_op, SImode, dst, set); - break; - - default: - gcc_unreachable (); - } - - set = gen_rtx_SET (dst, set); - - rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - rtvec par = gen_rtvec (2, set, clob); - - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); -} - -/* Expand a copysign operation. Special case operand 0 being a constant. */ - -void -ix86_expand_copysign (rtx operands[]) -{ - machine_mode mode, vmode; - rtx dest, op0, op1, mask; - - dest = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - - mode = GET_MODE (dest); - - if (mode == SFmode) - vmode = V4SFmode; - else if (mode == DFmode) - vmode = V2DFmode; - else if (mode == TFmode) - vmode = mode; - else - gcc_unreachable (); - - mask = ix86_build_signbit_mask (vmode, 0, 0); - - if (CONST_DOUBLE_P (op0)) - { - if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) - op0 = simplify_unary_operation (ABS, mode, op0, mode); - - if (mode == SFmode || mode == DFmode) - { - if (op0 == CONST0_RTX (mode)) - op0 = CONST0_RTX (vmode); - else - { - rtx v = ix86_build_const_vector (vmode, false, op0); - - op0 = force_reg (vmode, v); - } - } - else if (op0 != CONST0_RTX (mode)) - op0 = force_reg (mode, op0); - - emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask)); - } - else - { - rtx nmask = ix86_build_signbit_mask (vmode, 0, 1); - - emit_insn (gen_copysign3_var - (mode, dest, NULL_RTX, op0, op1, nmask, mask)); - } -} - -/* Deconstruct a copysign operation into bit masks. Operand 0 is known to - be a constant, and so has already been expanded into a vector constant. */ - -void -ix86_split_copysign_const (rtx operands[]) -{ - machine_mode mode, vmode; - rtx dest, op0, mask, x; - - dest = operands[0]; - op0 = operands[1]; - mask = operands[3]; - - mode = GET_MODE (dest); - vmode = GET_MODE (mask); - - dest = lowpart_subreg (vmode, dest, mode); - x = gen_rtx_AND (vmode, dest, mask); - emit_insn (gen_rtx_SET (dest, x)); - - if (op0 != CONST0_RTX (vmode)) - { - x = gen_rtx_IOR (vmode, dest, op0); - emit_insn (gen_rtx_SET (dest, x)); - } -} - -/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, - so we have to do two masks. */ - -void -ix86_split_copysign_var (rtx operands[]) -{ - machine_mode mode, vmode; - rtx dest, scratch, op0, op1, mask, nmask, x; - - dest = operands[0]; - scratch = operands[1]; - op0 = operands[2]; - op1 = operands[3]; - nmask = operands[4]; - mask = operands[5]; - - mode = GET_MODE (dest); - vmode = GET_MODE (mask); - - if (rtx_equal_p (op0, op1)) - { - /* Shouldn't happen often (it's useless, obviously), but when it does - we'd generate incorrect code if we continue below. */ - emit_move_insn (dest, op0); - return; - } - - if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ - { - gcc_assert (REGNO (op1) == REGNO (scratch)); - - x = gen_rtx_AND (vmode, scratch, mask); - emit_insn (gen_rtx_SET (scratch, x)); - - dest = mask; - op0 = lowpart_subreg (vmode, op0, mode); - x = gen_rtx_NOT (vmode, dest); - x = gen_rtx_AND (vmode, x, op0); - emit_insn (gen_rtx_SET (dest, x)); - } - else - { - if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ - { - x = gen_rtx_AND (vmode, scratch, mask); - } - else /* alternative 2,4 */ - { - gcc_assert (REGNO (mask) == REGNO (scratch)); - op1 = lowpart_subreg (vmode, op1, mode); - x = gen_rtx_AND (vmode, scratch, op1); - } - emit_insn (gen_rtx_SET (scratch, x)); - - if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ - { - dest = lowpart_subreg (vmode, op0, mode); - x = gen_rtx_AND (vmode, dest, nmask); - } - else /* alternative 3,4 */ - { - gcc_assert (REGNO (nmask) == REGNO (dest)); - dest = nmask; - op0 = lowpart_subreg (vmode, op0, mode); - x = gen_rtx_AND (vmode, dest, op0); - } - emit_insn (gen_rtx_SET (dest, x)); - } - - x = gen_rtx_IOR (vmode, dest, scratch); - emit_insn (gen_rtx_SET (dest, x)); -} - -/* Expand an xorsign operation. */ - -void -ix86_expand_xorsign (rtx operands[]) -{ - machine_mode mode, vmode; - rtx dest, op0, op1, mask; - - dest = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - - mode = GET_MODE (dest); - - if (mode == SFmode) - vmode = V4SFmode; - else if (mode == DFmode) - vmode = V2DFmode; - else - gcc_unreachable (); - - mask = ix86_build_signbit_mask (vmode, 0, 0); - - emit_insn (gen_xorsign3_1 (mode, dest, op0, op1, mask)); -} - -/* Deconstruct an xorsign operation into bit masks. */ - -void -ix86_split_xorsign (rtx operands[]) -{ - machine_mode mode, vmode; - rtx dest, op0, mask, x; - - dest = operands[0]; - op0 = operands[1]; - mask = operands[3]; - - mode = GET_MODE (dest); - vmode = GET_MODE (mask); - - dest = lowpart_subreg (vmode, dest, mode); - x = gen_rtx_AND (vmode, dest, mask); - emit_insn (gen_rtx_SET (dest, x)); - - op0 = lowpart_subreg (vmode, op0, mode); - x = gen_rtx_XOR (vmode, dest, op0); - emit_insn (gen_rtx_SET (dest, x)); -} - -static rtx ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1); - -void -ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) -{ - machine_mode mode = GET_MODE (op0); - rtx tmp; - - /* Handle special case - vector comparsion with boolean result, transform - it using ptest instruction. */ - if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) - { - rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG); - machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode; - - gcc_assert (code == EQ || code == NE); - /* Generate XOR since we can't check that one operand is zero vector. */ - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1))); - tmp = gen_lowpart (p_mode, tmp); - emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG), - gen_rtx_UNSPEC (CCmode, - gen_rtvec (2, tmp, tmp), - UNSPEC_PTEST))); - tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, label), - pc_rtx); - emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); - return; - } - - switch (mode) - { - case E_SFmode: - case E_DFmode: - case E_XFmode: - case E_QImode: - case E_HImode: - case E_SImode: - simple: - tmp = ix86_expand_compare (code, op0, op1); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, label), - pc_rtx); - emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); - return; - - case E_DImode: - if (TARGET_64BIT) - goto simple; - /* For 32-bit target DI comparison may be performed on - SSE registers. To allow this we should avoid split - to SI mode which is achieved by doing xor in DI mode - and then comparing with zero (which is recognized by - STV pass). We don't compare using xor when optimizing - for size. */ - if (!optimize_insn_for_size_p () - && TARGET_STV - && (code == EQ || code == NE)) - { - op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1)); - op1 = const0_rtx; - } - /* FALLTHRU */ - case E_TImode: - /* Expand DImode branch into multiple compare+branch. */ - { - rtx lo[2], hi[2]; - rtx_code_label *label2; - enum rtx_code code1, code2, code3; - machine_mode submode; - - if (CONSTANT_P (op0) && !CONSTANT_P (op1)) - { - std::swap (op0, op1); - code = swap_condition (code); - } - - split_double_mode (mode, &op0, 1, lo+0, hi+0); - split_double_mode (mode, &op1, 1, lo+1, hi+1); - - submode = mode == DImode ? SImode : DImode; - - /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to - avoid two branches. This costs one extra insn, so disable when - optimizing for size. */ - - if ((code == EQ || code == NE) - && (!optimize_insn_for_size_p () - || hi[1] == const0_rtx || lo[1] == const0_rtx)) - { - rtx xor0, xor1; - - xor1 = hi[0]; - if (hi[1] != const0_rtx) - xor1 = expand_binop (submode, xor_optab, xor1, hi[1], - NULL_RTX, 0, OPTAB_WIDEN); - - xor0 = lo[0]; - if (lo[1] != const0_rtx) - xor0 = expand_binop (submode, xor_optab, xor0, lo[1], - NULL_RTX, 0, OPTAB_WIDEN); - - tmp = expand_binop (submode, ior_optab, xor1, xor0, - NULL_RTX, 0, OPTAB_WIDEN); - - ix86_expand_branch (code, tmp, const0_rtx, label); - return; - } - - /* Otherwise, if we are doing less-than or greater-or-equal-than, - op1 is a constant and the low word is zero, then we can just - examine the high word. Similarly for low word -1 and - less-or-equal-than or greater-than. */ - - if (CONST_INT_P (hi[1])) - switch (code) - { - case LT: case LTU: case GE: case GEU: - if (lo[1] == const0_rtx) - { - ix86_expand_branch (code, hi[0], hi[1], label); - return; - } - break; - case LE: case LEU: case GT: case GTU: - if (lo[1] == constm1_rtx) - { - ix86_expand_branch (code, hi[0], hi[1], label); - return; - } - break; - default: - break; - } - - /* Emulate comparisons that do not depend on Zero flag with - double-word subtraction. Note that only Overflow, Sign - and Carry flags are valid, so swap arguments and condition - of comparisons that would otherwise test Zero flag. */ - - switch (code) - { - case LE: case LEU: case GT: case GTU: - std::swap (lo[0], lo[1]); - std::swap (hi[0], hi[1]); - code = swap_condition (code); - /* FALLTHRU */ - - case LT: case LTU: case GE: case GEU: - { - bool uns = (code == LTU || code == GEU); - rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx) - = uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz; - - if (!nonimmediate_operand (lo[0], submode)) - lo[0] = force_reg (submode, lo[0]); - if (!x86_64_general_operand (lo[1], submode)) - lo[1] = force_reg (submode, lo[1]); - - if (!register_operand (hi[0], submode)) - hi[0] = force_reg (submode, hi[0]); - if ((uns && !nonimmediate_operand (hi[1], submode)) - || (!uns && !x86_64_general_operand (hi[1], submode))) - hi[1] = force_reg (submode, hi[1]); - - emit_insn (gen_cmp_1 (submode, lo[0], lo[1])); - - tmp = gen_rtx_SCRATCH (submode); - emit_insn (sbb_insn (submode, tmp, hi[0], hi[1])); - - tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG); - ix86_expand_branch (code, tmp, const0_rtx, label); - return; - } - - default: - break; - } - - /* Otherwise, we need two or three jumps. */ - - label2 = gen_label_rtx (); - - code1 = code; - code2 = swap_condition (code); - code3 = unsigned_condition (code); - - switch (code) - { - case LT: case GT: case LTU: case GTU: - break; - - case LE: code1 = LT; code2 = GT; break; - case GE: code1 = GT; code2 = LT; break; - case LEU: code1 = LTU; code2 = GTU; break; - case GEU: code1 = GTU; code2 = LTU; break; - - case EQ: code1 = UNKNOWN; code2 = NE; break; - case NE: code2 = UNKNOWN; break; - - default: - gcc_unreachable (); - } - - /* - * a < b => - * if (hi(a) < hi(b)) goto true; - * if (hi(a) > hi(b)) goto false; - * if (lo(a) < lo(b)) goto true; - * false: - */ - - if (code1 != UNKNOWN) - ix86_expand_branch (code1, hi[0], hi[1], label); - if (code2 != UNKNOWN) - ix86_expand_branch (code2, hi[0], hi[1], label2); - - ix86_expand_branch (code3, lo[0], lo[1], label); - - if (code2 != UNKNOWN) - emit_label (label2); - return; - } - - default: - gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC); - goto simple; - } -} - -/* Figure out whether to use unordered fp comparisons. */ - -static bool -ix86_unordered_fp_compare (enum rtx_code code) -{ - if (!TARGET_IEEE_FP) - return false; - - switch (code) - { - case LT: - case LE: - case GT: - case GE: - case LTGT: - return false; - - case EQ: - case NE: - - case UNORDERED: - case ORDERED: - case UNLT: - case UNLE: - case UNGT: - case UNGE: - case UNEQ: - return true; - - default: - gcc_unreachable (); - } -} - -/* Return a comparison we can do and that it is equivalent to - swap_condition (code) apart possibly from orderedness. - But, never change orderedness if TARGET_IEEE_FP, returning - UNKNOWN in that case if necessary. */ - -static enum rtx_code -ix86_fp_swap_condition (enum rtx_code code) -{ - switch (code) - { - case GT: /* GTU - CF=0 & ZF=0 */ - return TARGET_IEEE_FP ? UNKNOWN : UNLT; - case GE: /* GEU - CF=0 */ - return TARGET_IEEE_FP ? UNKNOWN : UNLE; - case UNLT: /* LTU - CF=1 */ - return TARGET_IEEE_FP ? UNKNOWN : GT; - case UNLE: /* LEU - CF=1 | ZF=1 */ - return TARGET_IEEE_FP ? UNKNOWN : GE; - default: - return swap_condition (code); - } -} - -/* Return cost of comparison CODE using the best strategy for performance. - All following functions do use number of instructions as a cost metrics. - In future this should be tweaked to compute bytes for optimize_size and - take into account performance of various instructions on various CPUs. */ - -static int -ix86_fp_comparison_cost (enum rtx_code code) -{ - int arith_cost; - - /* The cost of code using bit-twiddling on %ah. */ - switch (code) - { - case UNLE: - case UNLT: - case LTGT: - case GT: - case GE: - case UNORDERED: - case ORDERED: - case UNEQ: - arith_cost = 4; - break; - case LT: - case NE: - case EQ: - case UNGE: - arith_cost = TARGET_IEEE_FP ? 5 : 4; - break; - case LE: - case UNGT: - arith_cost = TARGET_IEEE_FP ? 6 : 4; - break; - default: - gcc_unreachable (); - } - - switch (ix86_fp_comparison_strategy (code)) - { - case IX86_FPCMP_COMI: - return arith_cost > 4 ? 3 : 2; - case IX86_FPCMP_SAHF: - return arith_cost > 4 ? 4 : 3; - default: - return arith_cost; - } -} - -/* Swap, force into registers, or otherwise massage the two operands - to a fp comparison. The operands are updated in place; the new - comparison code is returned. */ - -static enum rtx_code -ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) -{ - bool unordered_compare = ix86_unordered_fp_compare (code); - rtx op0 = *pop0, op1 = *pop1; - machine_mode op_mode = GET_MODE (op0); - bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); - - /* All of the unordered compare instructions only work on registers. - The same is true of the fcomi compare instructions. The XFmode - compare instructions require registers except when comparing - against zero or when converting operand 1 from fixed point to - floating point. */ - - if (!is_sse - && (unordered_compare - || (op_mode == XFmode - && ! (standard_80387_constant_p (op0) == 1 - || standard_80387_constant_p (op1) == 1) - && GET_CODE (op1) != FLOAT) - || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI)) - { - op0 = force_reg (op_mode, op0); - op1 = force_reg (op_mode, op1); - } - else - { - /* %%% We only allow op1 in memory; op0 must be st(0). So swap - things around if they appear profitable, otherwise force op0 - into a register. */ - - if (standard_80387_constant_p (op0) == 0 - || (MEM_P (op0) - && ! (standard_80387_constant_p (op1) == 0 - || MEM_P (op1)))) - { - enum rtx_code new_code = ix86_fp_swap_condition (code); - if (new_code != UNKNOWN) - { - std::swap (op0, op1); - code = new_code; - } - } - - if (!REG_P (op0)) - op0 = force_reg (op_mode, op0); - - if (CONSTANT_P (op1)) - { - int tmp = standard_80387_constant_p (op1); - if (tmp == 0) - op1 = validize_mem (force_const_mem (op_mode, op1)); - else if (tmp == 1) - { - if (TARGET_CMOVE) - op1 = force_reg (op_mode, op1); - } - else - op1 = force_reg (op_mode, op1); - } - } - - /* Try to rearrange the comparison to make it cheaper. */ - if (ix86_fp_comparison_cost (code) - > ix86_fp_comparison_cost (swap_condition (code)) - && (REG_P (op1) || can_create_pseudo_p ())) - { - std::swap (op0, op1); - code = swap_condition (code); - if (!REG_P (op0)) - op0 = force_reg (op_mode, op0); - } - - *pop0 = op0; - *pop1 = op1; - return code; -} - -/* Generate insn patterns to do a floating point compare of OPERANDS. */ - -static rtx -ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1) -{ - bool unordered_compare = ix86_unordered_fp_compare (code); - machine_mode cmp_mode; - rtx tmp, scratch; - - code = ix86_prepare_fp_compare_args (code, &op0, &op1); - - tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); - if (unordered_compare) - tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP); - - /* Do fcomi/sahf based test when profitable. */ - switch (ix86_fp_comparison_strategy (code)) - { - case IX86_FPCMP_COMI: - cmp_mode = CCFPmode; - emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp)); - break; - - case IX86_FPCMP_SAHF: - cmp_mode = CCFPmode; - tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); - scratch = gen_reg_rtx (HImode); - emit_insn (gen_rtx_SET (scratch, tmp)); - emit_insn (gen_x86_sahf_1 (scratch)); - break; - - case IX86_FPCMP_ARITH: - cmp_mode = CCNOmode; - tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); - scratch = gen_reg_rtx (HImode); - emit_insn (gen_rtx_SET (scratch, tmp)); - - /* In the unordered case, we have to check C2 for NaN's, which - doesn't happen to work out to anything nice combination-wise. - So do some bit twiddling on the value we've got in AH to come - up with an appropriate set of condition codes. */ - - switch (code) - { - case GT: - case UNGT: - if (code == GT || !TARGET_IEEE_FP) - { - emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); - code = EQ; - } - else - { - emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); - emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); - cmp_mode = CCmode; - code = GEU; - } - break; - case LT: - case UNLT: - if (code == LT && TARGET_IEEE_FP) - { - emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx)); - cmp_mode = CCmode; - code = EQ; - } - else - { - emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx)); - code = NE; - } - break; - case GE: - case UNGE: - if (code == GE || !TARGET_IEEE_FP) - { - emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05))); - code = EQ; - } - else - { - emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx)); - code = NE; - } - break; - case LE: - case UNLE: - if (code == LE && TARGET_IEEE_FP) - { - emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); - emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); - cmp_mode = CCmode; - code = LTU; - } - else - { - emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); - code = NE; - } - break; - case EQ: - case UNEQ: - if (code == EQ && TARGET_IEEE_FP) - { - emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); - cmp_mode = CCmode; - code = EQ; - } - else - { - emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); - code = NE; - } - break; - case NE: - case LTGT: - if (code == NE && TARGET_IEEE_FP) - { - emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); - emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, - GEN_INT (0x40))); - code = NE; - } - else - { - emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); - code = EQ; - } - break; - - case UNORDERED: - emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); - code = NE; - break; - case ORDERED: - emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); - code = EQ; - break; - - default: - gcc_unreachable (); - } - break; - - default: - gcc_unreachable(); - } - - /* Return the test that should be put into the flags user, i.e. - the bcc, scc, or cmov instruction. */ - return gen_rtx_fmt_ee (code, VOIDmode, - gen_rtx_REG (cmp_mode, FLAGS_REG), - const0_rtx); -} - -/* Generate insn patterns to do an integer compare of OPERANDS. */ - -static rtx -ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) -{ - machine_mode cmpmode; - rtx tmp, flags; - - cmpmode = SELECT_CC_MODE (code, op0, op1); - flags = gen_rtx_REG (cmpmode, FLAGS_REG); - - /* This is very simple, but making the interface the same as in the - FP case makes the rest of the code easier. */ - tmp = gen_rtx_COMPARE (cmpmode, op0, op1); - emit_insn (gen_rtx_SET (flags, tmp)); - - /* Return the test that should be put into the flags user, i.e. - the bcc, scc, or cmov instruction. */ - return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); -} - -static rtx -ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1) -{ - rtx ret; - - if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) - ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); - - else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) - { - gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0))); - ret = ix86_expand_fp_compare (code, op0, op1); - } - else - ret = ix86_expand_int_compare (code, op0, op1); - - return ret; -} - -void -ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1) -{ - rtx ret; - - gcc_assert (GET_MODE (dest) == QImode); - - ret = ix86_expand_compare (code, op0, op1); - PUT_MODE (ret, QImode); - emit_insn (gen_rtx_SET (dest, ret)); -} - -/* Expand comparison setting or clearing carry flag. Return true when - successful and set pop for the operation. */ -static bool -ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) -{ - machine_mode mode - = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); - - /* Do not handle double-mode compares that go through special path. */ - if (mode == (TARGET_64BIT ? TImode : DImode)) - return false; - - if (SCALAR_FLOAT_MODE_P (mode)) - { - rtx compare_op; - rtx_insn *compare_seq; - - gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); - - /* Shortcut: following common codes never translate - into carry flag compares. */ - if (code == EQ || code == NE || code == UNEQ || code == LTGT - || code == ORDERED || code == UNORDERED) - return false; - - /* These comparisons require zero flag; swap operands so they won't. */ - if ((code == GT || code == UNLE || code == LE || code == UNGT) - && !TARGET_IEEE_FP) - { - std::swap (op0, op1); - code = swap_condition (code); - } - - /* Try to expand the comparison and verify that we end up with - carry flag based comparison. This fails to be true only when - we decide to expand comparison using arithmetic that is not - too common scenario. */ - start_sequence (); - compare_op = ix86_expand_fp_compare (code, op0, op1); - compare_seq = get_insns (); - end_sequence (); - - if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode) - code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); - else - code = GET_CODE (compare_op); - - if (code != LTU && code != GEU) - return false; - - emit_insn (compare_seq); - *pop = compare_op; - return true; - } - - if (!INTEGRAL_MODE_P (mode)) - return false; - - switch (code) - { - case LTU: - case GEU: - break; - - /* Convert a==0 into (unsigned)a<1. */ - case EQ: - case NE: - if (op1 != const0_rtx) - return false; - op1 = const1_rtx; - code = (code == EQ ? LTU : GEU); - break; - - /* Convert a>b into b=b-1. */ - case GTU: - case LEU: - if (CONST_INT_P (op1)) - { - op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); - /* Bail out on overflow. We still can swap operands but that - would force loading of the constant into register. */ - if (op1 == const0_rtx - || !x86_64_immediate_operand (op1, GET_MODE (op1))) - return false; - code = (code == GTU ? GEU : LTU); - } - else - { - std::swap (op0, op1); - code = (code == GTU ? LTU : GEU); - } - break; - - /* Convert a>=0 into (unsigned)a<0x80000000. */ - case LT: - case GE: - if (mode == DImode || op1 != const0_rtx) - return false; - op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); - code = (code == LT ? GEU : LTU); - break; - case LE: - case GT: - if (mode == DImode || op1 != constm1_rtx) - return false; - op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); - code = (code == LE ? GEU : LTU); - break; - - default: - return false; - } - /* Swapping operands may cause constant to appear as first operand. */ - if (!nonimmediate_operand (op0, VOIDmode)) - { - if (!can_create_pseudo_p ()) - return false; - op0 = force_reg (mode, op0); - } - *pop = ix86_expand_compare (code, op0, op1); - gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); - return true; -} - -/* Expand conditional increment or decrement using adb/sbb instructions. - The default case using setcc followed by the conditional move can be - done by generic code. */ -bool -ix86_expand_int_addcc (rtx operands[]) -{ - enum rtx_code code = GET_CODE (operands[1]); - rtx flags; - rtx (*insn) (machine_mode, rtx, rtx, rtx, rtx, rtx); - rtx compare_op; - rtx val = const0_rtx; - bool fpcmp = false; - machine_mode mode; - rtx op0 = XEXP (operands[1], 0); - rtx op1 = XEXP (operands[1], 1); - - if (operands[3] != const1_rtx - && operands[3] != constm1_rtx) - return false; - if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) - return false; - code = GET_CODE (compare_op); - - flags = XEXP (compare_op, 0); - - if (GET_MODE (flags) == CCFPmode) - { - fpcmp = true; - code = ix86_fp_compare_code_to_integer (code); - } - - if (code != LTU) - { - val = constm1_rtx; - if (fpcmp) - PUT_CODE (compare_op, - reverse_condition_maybe_unordered - (GET_CODE (compare_op))); - else - PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); - } - - mode = GET_MODE (operands[0]); - - /* Construct either adc or sbb insn. */ - if ((code == LTU) == (operands[3] == constm1_rtx)) - insn = gen_sub3_carry; - else - insn = gen_add3_carry; - - emit_insn (insn (mode, operands[0], operands[2], val, flags, compare_op)); - - return true; -} - -bool -ix86_expand_int_movcc (rtx operands[]) -{ - enum rtx_code code = GET_CODE (operands[1]), compare_code; - rtx_insn *compare_seq; - rtx compare_op; - machine_mode mode = GET_MODE (operands[0]); - bool sign_bit_compare_p = false; - rtx op0 = XEXP (operands[1], 0); - rtx op1 = XEXP (operands[1], 1); - - if (GET_MODE (op0) == TImode - || (GET_MODE (op0) == DImode - && !TARGET_64BIT)) - return false; - - start_sequence (); - compare_op = ix86_expand_compare (code, op0, op1); - compare_seq = get_insns (); - end_sequence (); - - compare_code = GET_CODE (compare_op); - - if ((op1 == const0_rtx && (code == GE || code == LT)) - || (op1 == constm1_rtx && (code == GT || code == LE))) - sign_bit_compare_p = true; - - /* Don't attempt mode expansion here -- if we had to expand 5 or 6 - HImode insns, we'd be swallowed in word prefix ops. */ - - if ((mode != HImode || TARGET_FAST_PREFIX) - && (mode != (TARGET_64BIT ? TImode : DImode)) - && CONST_INT_P (operands[2]) - && CONST_INT_P (operands[3])) - { - rtx out = operands[0]; - HOST_WIDE_INT ct = INTVAL (operands[2]); - HOST_WIDE_INT cf = INTVAL (operands[3]); - HOST_WIDE_INT diff; - - diff = ct - cf; - /* Sign bit compares are better done using shifts than we do by using - sbb. */ - if (sign_bit_compare_p - || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) - { - /* Detect overlap between destination and compare sources. */ - rtx tmp = out; - - if (!sign_bit_compare_p) - { - rtx flags; - bool fpcmp = false; - - compare_code = GET_CODE (compare_op); - - flags = XEXP (compare_op, 0); - - if (GET_MODE (flags) == CCFPmode) - { - fpcmp = true; - compare_code - = ix86_fp_compare_code_to_integer (compare_code); - } - - /* To simplify rest of code, restrict to the GEU case. */ - if (compare_code == LTU) - { - std::swap (ct, cf); - compare_code = reverse_condition (compare_code); - code = reverse_condition (code); - } - else - { - if (fpcmp) - PUT_CODE (compare_op, - reverse_condition_maybe_unordered - (GET_CODE (compare_op))); - else - PUT_CODE (compare_op, - reverse_condition (GET_CODE (compare_op))); - } - diff = ct - cf; - - if (reg_overlap_mentioned_p (out, op0) - || reg_overlap_mentioned_p (out, op1)) - tmp = gen_reg_rtx (mode); - - if (mode == DImode) - emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op)); - else - emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), - flags, compare_op)); - } - else - { - if (code == GT || code == GE) - code = reverse_condition (code); - else - { - std::swap (ct, cf); - diff = ct - cf; - } - tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1); - } - - if (diff == 1) - { - /* - * cmpl op0,op1 - * sbbl dest,dest - * [addl dest, ct] - * - * Size 5 - 8. - */ - if (ct) - tmp = expand_simple_binop (mode, PLUS, - tmp, GEN_INT (ct), - copy_rtx (tmp), 1, OPTAB_DIRECT); - } - else if (cf == -1) - { - /* - * cmpl op0,op1 - * sbbl dest,dest - * orl $ct, dest - * - * Size 8. - */ - tmp = expand_simple_binop (mode, IOR, - tmp, GEN_INT (ct), - copy_rtx (tmp), 1, OPTAB_DIRECT); - } - else if (diff == -1 && ct) - { - /* - * cmpl op0,op1 - * sbbl dest,dest - * notl dest - * [addl dest, cf] - * - * Size 8 - 11. - */ - tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); - if (cf) - tmp = expand_simple_binop (mode, PLUS, - copy_rtx (tmp), GEN_INT (cf), - copy_rtx (tmp), 1, OPTAB_DIRECT); - } - else - { - /* - * cmpl op0,op1 - * sbbl dest,dest - * [notl dest] - * andl cf - ct, dest - * [addl dest, ct] - * - * Size 8 - 11. - */ - - if (cf == 0) - { - cf = ct; - ct = 0; - tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); - } - - tmp = expand_simple_binop (mode, AND, - copy_rtx (tmp), - gen_int_mode (cf - ct, mode), - copy_rtx (tmp), 1, OPTAB_DIRECT); - if (ct) - tmp = expand_simple_binop (mode, PLUS, - copy_rtx (tmp), GEN_INT (ct), - copy_rtx (tmp), 1, OPTAB_DIRECT); - } - - if (!rtx_equal_p (tmp, out)) - emit_move_insn (copy_rtx (out), copy_rtx (tmp)); - - return true; - } - - if (diff < 0) - { - machine_mode cmp_mode = GET_MODE (op0); - enum rtx_code new_code; - - if (SCALAR_FLOAT_MODE_P (cmp_mode)) - { - gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); - - /* We may be reversing a non-trapping - comparison to a trapping comparison. */ - if (HONOR_NANS (cmp_mode) && flag_trapping_math - && code != EQ && code != NE - && code != ORDERED && code != UNORDERED) - new_code = UNKNOWN; - else - new_code = reverse_condition_maybe_unordered (code); - } - else - new_code = ix86_reverse_condition (code, cmp_mode); - if (new_code != UNKNOWN) - { - std::swap (ct, cf); - diff = -diff; - code = new_code; - } - } - - compare_code = UNKNOWN; - if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT - && CONST_INT_P (op1)) - { - if (op1 == const0_rtx - && (code == LT || code == GE)) - compare_code = code; - else if (op1 == constm1_rtx) - { - if (code == LE) - compare_code = LT; - else if (code == GT) - compare_code = GE; - } - } - - /* Optimize dest = (op0 < 0) ? -1 : cf. */ - if (compare_code != UNKNOWN - && GET_MODE (op0) == GET_MODE (out) - && (cf == -1 || ct == -1)) - { - /* If lea code below could be used, only optimize - if it results in a 2 insn sequence. */ - - if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 - || diff == 3 || diff == 5 || diff == 9) - || (compare_code == LT && ct == -1) - || (compare_code == GE && cf == -1)) - { - /* - * notl op1 (if necessary) - * sarl $31, op1 - * orl cf, op1 - */ - if (ct != -1) - { - cf = ct; - ct = -1; - code = reverse_condition (code); - } - - out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); - - out = expand_simple_binop (mode, IOR, - out, GEN_INT (cf), - out, 1, OPTAB_DIRECT); - if (out != operands[0]) - emit_move_insn (operands[0], out); - - return true; - } - } - - - if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 - || diff == 3 || diff == 5 || diff == 9) - && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) - && (mode != DImode - || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) - { - /* - * xorl dest,dest - * cmpl op1,op2 - * setcc dest - * lea cf(dest*(ct-cf)),dest - * - * Size 14. - * - * This also catches the degenerate setcc-only case. - */ - - rtx tmp; - int nops; - - out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); - - nops = 0; - /* On x86_64 the lea instruction operates on Pmode, so we need - to get arithmetics done in proper mode to match. */ - if (diff == 1) - tmp = copy_rtx (out); - else - { - rtx out1; - out1 = copy_rtx (out); - tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); - nops++; - if (diff & 1) - { - tmp = gen_rtx_PLUS (mode, tmp, out1); - nops++; - } - } - if (cf != 0) - { - tmp = plus_constant (mode, tmp, cf); - nops++; - } - if (!rtx_equal_p (tmp, out)) - { - if (nops == 1) - out = force_operand (tmp, copy_rtx (out)); - else - emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp))); - } - if (!rtx_equal_p (out, operands[0])) - emit_move_insn (operands[0], copy_rtx (out)); - - return true; - } - - /* - * General case: Jumpful: - * xorl dest,dest cmpl op1, op2 - * cmpl op1, op2 movl ct, dest - * setcc dest jcc 1f - * decl dest movl cf, dest - * andl (cf-ct),dest 1: - * addl ct,dest - * - * Size 20. Size 14. - * - * This is reasonably steep, but branch mispredict costs are - * high on modern cpus, so consider failing only if optimizing - * for space. - */ - - if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) - && BRANCH_COST (optimize_insn_for_speed_p (), - false) >= 2) - { - if (cf == 0) - { - machine_mode cmp_mode = GET_MODE (op0); - enum rtx_code new_code; - - if (SCALAR_FLOAT_MODE_P (cmp_mode)) - { - gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); - - /* We may be reversing a non-trapping - comparison to a trapping comparison. */ - if (HONOR_NANS (cmp_mode) && flag_trapping_math - && code != EQ && code != NE - && code != ORDERED && code != UNORDERED) - new_code = UNKNOWN; - else - new_code = reverse_condition_maybe_unordered (code); - - } - else - { - new_code = ix86_reverse_condition (code, cmp_mode); - if (compare_code != UNKNOWN && new_code != UNKNOWN) - compare_code = reverse_condition (compare_code); - } - - if (new_code != UNKNOWN) - { - cf = ct; - ct = 0; - code = new_code; - } - } - - if (compare_code != UNKNOWN) - { - /* notl op1 (if needed) - sarl $31, op1 - andl (cf-ct), op1 - addl ct, op1 - - For x < 0 (resp. x <= -1) there will be no notl, - so if possible swap the constants to get rid of the - complement. - True/false will be -1/0 while code below (store flag - followed by decrement) is 0/-1, so the constants need - to be exchanged once more. */ - - if (compare_code == GE || !cf) - { - code = reverse_condition (code); - compare_code = LT; - } - else - std::swap (ct, cf); - - out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); - } - else - { - out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); - - out = expand_simple_binop (mode, PLUS, copy_rtx (out), - constm1_rtx, - copy_rtx (out), 1, OPTAB_DIRECT); - } - - out = expand_simple_binop (mode, AND, copy_rtx (out), - gen_int_mode (cf - ct, mode), - copy_rtx (out), 1, OPTAB_DIRECT); - if (ct) - out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), - copy_rtx (out), 1, OPTAB_DIRECT); - if (!rtx_equal_p (out, operands[0])) - emit_move_insn (operands[0], copy_rtx (out)); - - return true; - } - } - - if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) - { - /* Try a few things more with specific constants and a variable. */ - - optab op; - rtx var, orig_out, out, tmp; - - if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2) - return false; - - /* If one of the two operands is an interesting constant, load a - constant with the above and mask it in with a logical operation. */ - - if (CONST_INT_P (operands[2])) - { - var = operands[3]; - if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) - operands[3] = constm1_rtx, op = and_optab; - else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) - operands[3] = const0_rtx, op = ior_optab; - else - return false; - } - else if (CONST_INT_P (operands[3])) - { - var = operands[2]; - if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) - operands[2] = constm1_rtx, op = and_optab; - else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) - operands[2] = const0_rtx, op = ior_optab; - else - return false; - } - else - return false; - - orig_out = operands[0]; - tmp = gen_reg_rtx (mode); - operands[0] = tmp; - - /* Recurse to get the constant loaded. */ - if (!ix86_expand_int_movcc (operands)) - return false; - - /* Mask in the interesting variable. */ - out = expand_binop (mode, op, var, tmp, orig_out, 0, - OPTAB_WIDEN); - if (!rtx_equal_p (out, orig_out)) - emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); - - return true; - } - - /* - * For comparison with above, - * - * movl cf,dest - * movl ct,tmp - * cmpl op1,op2 - * cmovcc tmp,dest - * - * Size 15. - */ - - if (! nonimmediate_operand (operands[2], mode)) - operands[2] = force_reg (mode, operands[2]); - if (! nonimmediate_operand (operands[3], mode)) - operands[3] = force_reg (mode, operands[3]); - - if (! register_operand (operands[2], VOIDmode) - && (mode == QImode - || ! register_operand (operands[3], VOIDmode))) - operands[2] = force_reg (mode, operands[2]); - - if (mode == QImode - && ! register_operand (operands[3], VOIDmode)) - operands[3] = force_reg (mode, operands[3]); - - emit_insn (compare_seq); - emit_insn (gen_rtx_SET (operands[0], - gen_rtx_IF_THEN_ELSE (mode, - compare_op, operands[2], - operands[3]))); - return true; -} - -/* Detect conditional moves that exactly match min/max operational - semantics. Note that this is IEEE safe, as long as we don't - interchange the operands. - - Returns FALSE if this conditional move doesn't match a MIN/MAX, - and TRUE if the operation is successful and instructions are emitted. */ - -static bool -ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, - rtx cmp_op1, rtx if_true, rtx if_false) -{ - machine_mode mode; - bool is_min; - rtx tmp; - - if (code == LT) - ; - else if (code == UNGE) - std::swap (if_true, if_false); - else - return false; - - if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) - is_min = true; - else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) - is_min = false; - else - return false; - - mode = GET_MODE (dest); - - /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, - but MODE may be a vector mode and thus not appropriate. */ - if (!flag_finite_math_only || flag_signed_zeros) - { - int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; - rtvec v; - - if_true = force_reg (mode, if_true); - v = gen_rtvec (2, if_true, if_false); - tmp = gen_rtx_UNSPEC (mode, v, u); - } - else - { - code = is_min ? SMIN : SMAX; - if (MEM_P (if_true) && MEM_P (if_false)) - if_true = force_reg (mode, if_true); - tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); - } - - emit_insn (gen_rtx_SET (dest, tmp)); - return true; -} - -/* Return true if MODE is valid for vector compare to mask register, - Same result for conditionl vector move with mask register. */ -static bool -ix86_valid_mask_cmp_mode (machine_mode mode) -{ - /* XOP has its own vector conditional movement. */ - if (TARGET_XOP && !TARGET_AVX512F) - return false; - - /* AVX512F is needed for mask operation. */ - if (!(TARGET_AVX512F && VECTOR_MODE_P (mode))) - return false; - - /* AVX512BW is needed for vector QI/HImode, - AVX512VL is needed for 128/256-bit vector. */ - machine_mode inner_mode = GET_MODE_INNER (mode); - int vector_size = GET_MODE_SIZE (mode); - if ((inner_mode == QImode || inner_mode == HImode) && !TARGET_AVX512BW) - return false; - - return vector_size == 64 || TARGET_AVX512VL; -} - -/* Expand an SSE comparison. Return the register with the result. */ - -static rtx -ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, - rtx op_true, rtx op_false) -{ - machine_mode mode = GET_MODE (dest); - machine_mode cmp_ops_mode = GET_MODE (cmp_op0); - - /* In general case result of comparison can differ from operands' type. */ - machine_mode cmp_mode; - - /* In AVX512F the result of comparison is an integer mask. */ - bool maskcmp = false; - rtx x; - - if (ix86_valid_mask_cmp_mode (cmp_ops_mode)) - { - unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode); - maskcmp = true; - cmp_mode = nbits > 8 ? int_mode_for_size (nbits, 0).require () : E_QImode; - } - else - cmp_mode = cmp_ops_mode; - - cmp_op0 = force_reg (cmp_ops_mode, cmp_op0); - - int (*op1_predicate)(rtx, machine_mode) - = VECTOR_MODE_P (cmp_ops_mode) ? vector_operand : nonimmediate_operand; - - if (!op1_predicate (cmp_op1, cmp_ops_mode)) - cmp_op1 = force_reg (cmp_ops_mode, cmp_op1); - - if (optimize - || (maskcmp && cmp_mode != mode) - || (op_true && reg_overlap_mentioned_p (dest, op_true)) - || (op_false && reg_overlap_mentioned_p (dest, op_false))) - dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); - - x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1); - - if (cmp_mode != mode && !maskcmp) - { - x = force_reg (cmp_ops_mode, x); - convert_move (dest, x, false); - } - else - emit_insn (gen_rtx_SET (dest, x)); - - return dest; -} - -/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical - operations. This is used for both scalar and vector conditional moves. */ - -void -ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) -{ - machine_mode mode = GET_MODE (dest); - machine_mode cmpmode = GET_MODE (cmp); - - /* In AVX512F the result of comparison is an integer mask. */ - bool maskcmp = mode != cmpmode && ix86_valid_mask_cmp_mode (mode); - - rtx t2, t3, x; - - /* If we have an integer mask and FP value then we need - to cast mask to FP mode. */ - if (mode != cmpmode && VECTOR_MODE_P (cmpmode)) - { - cmp = force_reg (cmpmode, cmp); - cmp = gen_rtx_SUBREG (mode, cmp, 0); - } - - if (maskcmp) - { - /* Using vector move with mask register. */ - cmp = force_reg (cmpmode, cmp); - /* Optimize for mask zero. */ - op_true = (op_true != CONST0_RTX (mode) - ? force_reg (mode, op_true) : op_true); - op_false = (op_false != CONST0_RTX (mode) - ? force_reg (mode, op_false) : op_false); - if (op_true == CONST0_RTX (mode)) - { - rtx (*gen_not) (rtx, rtx); - switch (cmpmode) - { - case E_QImode: gen_not = gen_knotqi; break; - case E_HImode: gen_not = gen_knothi; break; - case E_SImode: gen_not = gen_knotsi; break; - case E_DImode: gen_not = gen_knotdi; break; - default: gcc_unreachable (); - } - rtx n = gen_reg_rtx (cmpmode); - emit_insn (gen_not (n, cmp)); - cmp = n; - /* Reverse op_true op_false. */ - std::swap (op_true, op_false); - } - - rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp); - emit_insn (gen_rtx_SET (dest, vec_merge)); - return; - } - else if (vector_all_ones_operand (op_true, mode) - && op_false == CONST0_RTX (mode)) - { - emit_insn (gen_rtx_SET (dest, cmp)); - return; - } - else if (op_false == CONST0_RTX (mode)) - { - op_true = force_reg (mode, op_true); - x = gen_rtx_AND (mode, cmp, op_true); - emit_insn (gen_rtx_SET (dest, x)); - return; - } - else if (op_true == CONST0_RTX (mode)) - { - op_false = force_reg (mode, op_false); - x = gen_rtx_NOT (mode, cmp); - x = gen_rtx_AND (mode, x, op_false); - emit_insn (gen_rtx_SET (dest, x)); - return; - } - else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)) - { - op_false = force_reg (mode, op_false); - x = gen_rtx_IOR (mode, cmp, op_false); - emit_insn (gen_rtx_SET (dest, x)); - return; - } - else if (TARGET_XOP) - { - op_true = force_reg (mode, op_true); - - if (!nonimmediate_operand (op_false, mode)) - op_false = force_reg (mode, op_false); - - emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp, - op_true, - op_false))); - return; - } - - rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; - rtx d = dest; - - if (!vector_operand (op_true, mode)) - op_true = force_reg (mode, op_true); - - op_false = force_reg (mode, op_false); - - switch (mode) - { - case E_V4SFmode: - if (TARGET_SSE4_1) - gen = gen_sse4_1_blendvps; - break; - case E_V2DFmode: - if (TARGET_SSE4_1) - gen = gen_sse4_1_blendvpd; - break; - case E_SFmode: - if (TARGET_SSE4_1) - { - gen = gen_sse4_1_blendvss; - op_true = force_reg (mode, op_true); - } - break; - case E_DFmode: - if (TARGET_SSE4_1) - { - gen = gen_sse4_1_blendvsd; - op_true = force_reg (mode, op_true); - } - break; - case E_V16QImode: - case E_V8HImode: - case E_V4SImode: - case E_V2DImode: - if (TARGET_SSE4_1) - { - gen = gen_sse4_1_pblendvb; - if (mode != V16QImode) - d = gen_reg_rtx (V16QImode); - op_false = gen_lowpart (V16QImode, op_false); - op_true = gen_lowpart (V16QImode, op_true); - cmp = gen_lowpart (V16QImode, cmp); - } - break; - case E_V8SFmode: - if (TARGET_AVX) - gen = gen_avx_blendvps256; - break; - case E_V4DFmode: - if (TARGET_AVX) - gen = gen_avx_blendvpd256; - break; - case E_V32QImode: - case E_V16HImode: - case E_V8SImode: - case E_V4DImode: - if (TARGET_AVX2) - { - gen = gen_avx2_pblendvb; - if (mode != V32QImode) - d = gen_reg_rtx (V32QImode); - op_false = gen_lowpart (V32QImode, op_false); - op_true = gen_lowpart (V32QImode, op_true); - cmp = gen_lowpart (V32QImode, cmp); - } - break; - - case E_V64QImode: - gen = gen_avx512bw_blendmv64qi; - break; - case E_V32HImode: - gen = gen_avx512bw_blendmv32hi; - break; - case E_V16SImode: - gen = gen_avx512f_blendmv16si; - break; - case E_V8DImode: - gen = gen_avx512f_blendmv8di; - break; - case E_V8DFmode: - gen = gen_avx512f_blendmv8df; - break; - case E_V16SFmode: - gen = gen_avx512f_blendmv16sf; - break; - - default: - break; - } - - if (gen != NULL) - { - emit_insn (gen (d, op_false, op_true, cmp)); - if (d != dest) - emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); - } - else - { - op_true = force_reg (mode, op_true); - - t2 = gen_reg_rtx (mode); - if (optimize) - t3 = gen_reg_rtx (mode); - else - t3 = dest; - - x = gen_rtx_AND (mode, op_true, cmp); - emit_insn (gen_rtx_SET (t2, x)); - - x = gen_rtx_NOT (mode, cmp); - x = gen_rtx_AND (mode, x, op_false); - emit_insn (gen_rtx_SET (t3, x)); - - x = gen_rtx_IOR (mode, t3, t2); - emit_insn (gen_rtx_SET (dest, x)); - } -} - -/* Swap, force into registers, or otherwise massage the two operands - to an sse comparison with a mask result. Thus we differ a bit from - ix86_prepare_fp_compare_args which expects to produce a flags result. - - The DEST operand exists to help determine whether to commute commutative - operators. The POP0/POP1 operands are updated in place. The new - comparison code is returned, or UNKNOWN if not implementable. */ - -static enum rtx_code -ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, - rtx *pop0, rtx *pop1) -{ - switch (code) - { - case LTGT: - case UNEQ: - /* AVX supports all the needed comparisons. */ - if (TARGET_AVX) - break; - /* We have no LTGT as an operator. We could implement it with - NE & ORDERED, but this requires an extra temporary. It's - not clear that it's worth it. */ - return UNKNOWN; - - case LT: - case LE: - case UNGT: - case UNGE: - /* These are supported directly. */ - break; - - case EQ: - case NE: - case UNORDERED: - case ORDERED: - /* AVX has 3 operand comparisons, no need to swap anything. */ - if (TARGET_AVX) - break; - /* For commutative operators, try to canonicalize the destination - operand to be first in the comparison - this helps reload to - avoid extra moves. */ - if (!dest || !rtx_equal_p (dest, *pop1)) - break; - /* FALLTHRU */ - - case GE: - case GT: - case UNLE: - case UNLT: - /* These are not supported directly before AVX, and furthermore - ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the - comparison operands to transform into something that is - supported. */ - std::swap (*pop0, *pop1); - code = swap_condition (code); - break; - - default: - gcc_unreachable (); - } - - return code; -} - -/* Expand a floating-point conditional move. Return true if successful. */ - -bool -ix86_expand_fp_movcc (rtx operands[]) -{ - machine_mode mode = GET_MODE (operands[0]); - enum rtx_code code = GET_CODE (operands[1]); - rtx tmp, compare_op; - rtx op0 = XEXP (operands[1], 0); - rtx op1 = XEXP (operands[1], 1); - - if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) - { - machine_mode cmode; - - /* Since we've no cmove for sse registers, don't force bad register - allocation just to gain access to it. Deny movcc when the - comparison mode doesn't match the move mode. */ - cmode = GET_MODE (op0); - if (cmode == VOIDmode) - cmode = GET_MODE (op1); - if (cmode != mode) - return false; - - code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1); - if (code == UNKNOWN) - return false; - - if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1, - operands[2], operands[3])) - return true; - - tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1, - operands[2], operands[3]); - ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); - return true; - } - - if (GET_MODE (op0) == TImode - || (GET_MODE (op0) == DImode - && !TARGET_64BIT)) - return false; - - /* The floating point conditional move instructions don't directly - support conditions resulting from a signed integer comparison. */ - - compare_op = ix86_expand_compare (code, op0, op1); - if (!fcmov_comparison_operator (compare_op, VOIDmode)) - { - tmp = gen_reg_rtx (QImode); - ix86_expand_setcc (tmp, code, op0, op1); - - compare_op = ix86_expand_compare (NE, tmp, const0_rtx); - } - - emit_insn (gen_rtx_SET (operands[0], - gen_rtx_IF_THEN_ELSE (mode, compare_op, - operands[2], operands[3]))); - - return true; -} - -/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */ - -static int -ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code) -{ - switch (code) - { - case EQ: - return 0; - case LT: - case LTU: - return 1; - case LE: - case LEU: - return 2; - case NE: - return 4; - case GE: - case GEU: - return 5; - case GT: - case GTU: - return 6; - default: - gcc_unreachable (); - } -} - -/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */ - -static int -ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) -{ - switch (code) - { - case EQ: - return 0x00; - case NE: - return 0x04; - case GT: - return 0x0e; - case LE: - return 0x02; - case GE: - return 0x0d; - case LT: - return 0x01; - case UNLE: - return 0x0a; - case UNLT: - return 0x09; - case UNGE: - return 0x05; - case UNGT: - return 0x06; - case UNEQ: - return 0x18; - case LTGT: - return 0x0c; - case ORDERED: - return 0x07; - case UNORDERED: - return 0x03; - default: - gcc_unreachable (); - } -} - -/* Return immediate value to be used in UNSPEC_PCMP - for comparison CODE in MODE. */ - -static int -ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode) -{ - if (FLOAT_MODE_P (mode)) - return ix86_fp_cmp_code_to_pcmp_immediate (code); - return ix86_int_cmp_code_to_pcmp_immediate (code); -} - -/* Expand AVX-512 vector comparison. */ - -bool -ix86_expand_mask_vec_cmp (rtx operands[]) -{ - machine_mode mask_mode = GET_MODE (operands[0]); - machine_mode cmp_mode = GET_MODE (operands[2]); - enum rtx_code code = GET_CODE (operands[1]); - rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode)); - int unspec_code; - rtx unspec; - - switch (code) - { - case LEU: - case GTU: - case GEU: - case LTU: - unspec_code = UNSPEC_UNSIGNED_PCMP; - break; - - default: - unspec_code = UNSPEC_PCMP; - } - - unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2], - operands[3], imm), - unspec_code); - emit_insn (gen_rtx_SET (operands[0], unspec)); - - return true; -} - -/* Expand fp vector comparison. */ - -bool -ix86_expand_fp_vec_cmp (rtx operands[]) -{ - enum rtx_code code = GET_CODE (operands[1]); - rtx cmp; - - code = ix86_prepare_sse_fp_compare_args (operands[0], code, - &operands[2], &operands[3]); - if (code == UNKNOWN) - { - rtx temp; - switch (GET_CODE (operands[1])) - { - case LTGT: - temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2], - operands[3], NULL, NULL); - cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2], - operands[3], NULL, NULL); - code = AND; - break; - case UNEQ: - temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2], - operands[3], NULL, NULL); - cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2], - operands[3], NULL, NULL); - code = IOR; - break; - default: - gcc_unreachable (); - } - cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, - OPTAB_DIRECT); - } - else - cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3], - operands[1], operands[2]); - - if (operands[0] != cmp) - emit_move_insn (operands[0], cmp); - - return true; -} - -static rtx -ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, - rtx op_true, rtx op_false, bool *negate) -{ - machine_mode data_mode = GET_MODE (dest); - machine_mode mode = GET_MODE (cop0); - rtx x; - - *negate = false; - - /* XOP supports all of the comparisons on all 128-bit vector int types. */ - if (TARGET_XOP - && (mode == V16QImode || mode == V8HImode - || mode == V4SImode || mode == V2DImode)) - ; - /* AVX512F supports all of the comparsions - on all 128/256/512-bit vector int types. */ - else if (ix86_valid_mask_cmp_mode (mode)) - ; - else - { - /* Canonicalize the comparison to EQ, GT, GTU. */ - switch (code) - { - case EQ: - case GT: - case GTU: - break; - - case NE: - case LE: - case LEU: - code = reverse_condition (code); - *negate = true; - break; - - case GE: - case GEU: - code = reverse_condition (code); - *negate = true; - /* FALLTHRU */ - - case LT: - case LTU: - std::swap (cop0, cop1); - code = swap_condition (code); - break; - - default: - gcc_unreachable (); - } - - /* Only SSE4.1/SSE4.2 supports V2DImode. */ - if (mode == V2DImode) - { - switch (code) - { - case EQ: - /* SSE4.1 supports EQ. */ - if (!TARGET_SSE4_1) - return NULL; - break; - - case GT: - case GTU: - /* SSE4.2 supports GT/GTU. */ - if (!TARGET_SSE4_2) - return NULL; - break; - - default: - gcc_unreachable (); - } - } - - rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode); - rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode); - if (*negate) - std::swap (optrue, opfalse); - - /* Transform x > y ? 0 : -1 (i.e. x <= y ? -1 : 0 or x <= y) when - not using integer masks into min (x, y) == x ? -1 : 0 (i.e. - min (x, y) == x). While we add one instruction (the minimum), - we remove the need for two instructions in the negation, as the - result is done this way. - When using masks, do it for SI/DImode element types, as it is shorter - than the two subtractions. */ - if ((code != EQ - && GET_MODE_SIZE (mode) != 64 - && vector_all_ones_operand (opfalse, data_mode) - && optrue == CONST0_RTX (data_mode)) - || (code == GTU - && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4 - /* Don't do it if not using integer masks and we'd end up with - the right values in the registers though. */ - && (GET_MODE_SIZE (mode) == 64 - || !vector_all_ones_operand (optrue, data_mode) - || opfalse != CONST0_RTX (data_mode)))) - { - rtx (*gen) (rtx, rtx, rtx) = NULL; - - switch (mode) - { - case E_V16SImode: - gen = (code == GTU) ? gen_uminv16si3 : gen_sminv16si3; - break; - case E_V8DImode: - gen = (code == GTU) ? gen_uminv8di3 : gen_sminv8di3; - cop0 = force_reg (mode, cop0); - cop1 = force_reg (mode, cop1); - break; - case E_V32QImode: - if (TARGET_AVX2) - gen = (code == GTU) ? gen_uminv32qi3 : gen_sminv32qi3; - break; - case E_V16HImode: - if (TARGET_AVX2) - gen = (code == GTU) ? gen_uminv16hi3 : gen_sminv16hi3; - break; - case E_V8SImode: - if (TARGET_AVX2) - gen = (code == GTU) ? gen_uminv8si3 : gen_sminv8si3; - break; - case E_V4DImode: - if (TARGET_AVX512VL) - { - gen = (code == GTU) ? gen_uminv4di3 : gen_sminv4di3; - cop0 = force_reg (mode, cop0); - cop1 = force_reg (mode, cop1); - } - break; - case E_V16QImode: - if (code == GTU && TARGET_SSE2) - gen = gen_uminv16qi3; - else if (code == GT && TARGET_SSE4_1) - gen = gen_sminv16qi3; - break; - case E_V8HImode: - if (code == GTU && TARGET_SSE4_1) - gen = gen_uminv8hi3; - else if (code == GT && TARGET_SSE2) - gen = gen_sminv8hi3; - break; - case E_V4SImode: - if (TARGET_SSE4_1) - gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3; - break; - case E_V2DImode: - if (TARGET_AVX512VL) - { - gen = (code == GTU) ? gen_uminv2di3 : gen_sminv2di3; - cop0 = force_reg (mode, cop0); - cop1 = force_reg (mode, cop1); - } - break; - default: - break; - } - - if (gen) - { - rtx tem = gen_reg_rtx (mode); - if (!vector_operand (cop0, mode)) - cop0 = force_reg (mode, cop0); - if (!vector_operand (cop1, mode)) - cop1 = force_reg (mode, cop1); - *negate = !*negate; - emit_insn (gen (tem, cop0, cop1)); - cop1 = tem; - code = EQ; - } - } - - /* Unsigned parallel compare is not supported by the hardware. - Play some tricks to turn this into a signed comparison - against 0. */ - if (code == GTU) - { - cop0 = force_reg (mode, cop0); - - switch (mode) - { - case E_V16SImode: - case E_V8DImode: - case E_V8SImode: - case E_V4DImode: - case E_V4SImode: - case E_V2DImode: - { - rtx t1, t2, mask; - - /* Subtract (-(INT MAX) - 1) from both operands to make - them signed. */ - mask = ix86_build_signbit_mask (mode, true, false); - t1 = gen_reg_rtx (mode); - emit_insn (gen_sub3_insn (t1, cop0, mask)); - - t2 = gen_reg_rtx (mode); - emit_insn (gen_sub3_insn (t2, cop1, mask)); - - cop0 = t1; - cop1 = t2; - code = GT; - } - break; - - case E_V64QImode: - case E_V32HImode: - case E_V32QImode: - case E_V16HImode: - case E_V16QImode: - case E_V8HImode: - /* Perform a parallel unsigned saturating subtraction. */ - x = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET - (x, gen_rtx_US_MINUS (mode, cop0, cop1))); - cop0 = x; - cop1 = CONST0_RTX (mode); - code = EQ; - *negate = !*negate; - break; - - default: - gcc_unreachable (); - } - } - } - - if (*negate) - std::swap (op_true, op_false); - - /* Allow the comparison to be done in one mode, but the movcc to - happen in another mode. */ - if (data_mode == mode) - { - x = ix86_expand_sse_cmp (dest, code, cop0, cop1, - op_true, op_false); - } - else - { - gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); - x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1, - op_true, op_false); - if (GET_MODE (x) == mode) - x = gen_lowpart (data_mode, x); - } - - return x; -} - -/* Expand integer vector comparison. */ - -bool -ix86_expand_int_vec_cmp (rtx operands[]) -{ - rtx_code code = GET_CODE (operands[1]); - bool negate = false; - rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2], - operands[3], NULL, NULL, &negate); - - if (!cmp) - return false; - - if (negate) - cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp, - CONST0_RTX (GET_MODE (cmp)), - NULL, NULL, &negate); - - gcc_assert (!negate); - - if (operands[0] != cmp) - emit_move_insn (operands[0], cmp); - - return true; -} - -/* Expand a floating-point vector conditional move; a vcond operation - rather than a movcc operation. */ - -bool -ix86_expand_fp_vcond (rtx operands[]) -{ - enum rtx_code code = GET_CODE (operands[3]); - rtx cmp; - - code = ix86_prepare_sse_fp_compare_args (operands[0], code, - &operands[4], &operands[5]); - if (code == UNKNOWN) - { - rtx temp; - switch (GET_CODE (operands[3])) - { - case LTGT: - temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], - operands[5], operands[0], operands[0]); - cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], - operands[5], operands[1], operands[2]); - code = AND; - break; - case UNEQ: - temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], - operands[5], operands[0], operands[0]); - cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], - operands[5], operands[1], operands[2]); - code = IOR; - break; - default: - gcc_unreachable (); - } - cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, - OPTAB_DIRECT); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); - return true; - } - - if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], - operands[5], operands[1], operands[2])) - return true; - - cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], - operands[1], operands[2]); - ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); - return true; -} - -/* Expand a signed/unsigned integral vector conditional move. */ - -bool -ix86_expand_int_vcond (rtx operands[]) -{ - machine_mode data_mode = GET_MODE (operands[0]); - machine_mode mode = GET_MODE (operands[4]); - enum rtx_code code = GET_CODE (operands[3]); - bool negate = false; - rtx x, cop0, cop1; - - cop0 = operands[4]; - cop1 = operands[5]; - - /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 - and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ - if ((code == LT || code == GE) - && data_mode == mode - && cop1 == CONST0_RTX (mode) - && operands[1 + (code == LT)] == CONST0_RTX (data_mode) - && GET_MODE_UNIT_SIZE (data_mode) > 1 - && GET_MODE_UNIT_SIZE (data_mode) <= 8 - && (GET_MODE_SIZE (data_mode) == 16 - || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) - { - rtx negop = operands[2 - (code == LT)]; - int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; - if (negop == CONST1_RTX (data_mode)) - { - rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), - operands[0], 1, OPTAB_DIRECT); - if (res != operands[0]) - emit_move_insn (operands[0], res); - return true; - } - else if (GET_MODE_INNER (data_mode) != DImode - && vector_all_ones_operand (negop, data_mode)) - { - rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), - operands[0], 0, OPTAB_DIRECT); - if (res != operands[0]) - emit_move_insn (operands[0], res); - return true; - } - } - - if (!nonimmediate_operand (cop1, mode)) - cop1 = force_reg (mode, cop1); - if (!general_operand (operands[1], data_mode)) - operands[1] = force_reg (data_mode, operands[1]); - if (!general_operand (operands[2], data_mode)) - operands[2] = force_reg (data_mode, operands[2]); - - x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1, - operands[1], operands[2], &negate); - - if (!x) - return false; - - ix86_expand_sse_movcc (operands[0], x, operands[1+negate], - operands[2-negate]); - return true; -} - -static bool -ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1, - struct expand_vec_perm_d *d) -{ - /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const - expander, so args are either in d, or in op0, op1 etc. */ - machine_mode mode = GET_MODE (d ? d->op0 : op0); - machine_mode maskmode = mode; - rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; - - switch (mode) - { - case E_V8HImode: - if (TARGET_AVX512VL && TARGET_AVX512BW) - gen = gen_avx512vl_vpermt2varv8hi3; - break; - case E_V16HImode: - if (TARGET_AVX512VL && TARGET_AVX512BW) - gen = gen_avx512vl_vpermt2varv16hi3; - break; - case E_V64QImode: - if (TARGET_AVX512VBMI) - gen = gen_avx512bw_vpermt2varv64qi3; - break; - case E_V32HImode: - if (TARGET_AVX512BW) - gen = gen_avx512bw_vpermt2varv32hi3; - break; - case E_V4SImode: - if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermt2varv4si3; - break; - case E_V8SImode: - if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermt2varv8si3; - break; - case E_V16SImode: - if (TARGET_AVX512F) - gen = gen_avx512f_vpermt2varv16si3; - break; - case E_V4SFmode: - if (TARGET_AVX512VL) - { - gen = gen_avx512vl_vpermt2varv4sf3; - maskmode = V4SImode; - } - break; - case E_V8SFmode: - if (TARGET_AVX512VL) - { - gen = gen_avx512vl_vpermt2varv8sf3; - maskmode = V8SImode; - } - break; - case E_V16SFmode: - if (TARGET_AVX512F) - { - gen = gen_avx512f_vpermt2varv16sf3; - maskmode = V16SImode; - } - break; - case E_V2DImode: - if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermt2varv2di3; - break; - case E_V4DImode: - if (TARGET_AVX512VL) - gen = gen_avx512vl_vpermt2varv4di3; - break; - case E_V8DImode: - if (TARGET_AVX512F) - gen = gen_avx512f_vpermt2varv8di3; - break; - case E_V2DFmode: - if (TARGET_AVX512VL) - { - gen = gen_avx512vl_vpermt2varv2df3; - maskmode = V2DImode; - } - break; - case E_V4DFmode: - if (TARGET_AVX512VL) - { - gen = gen_avx512vl_vpermt2varv4df3; - maskmode = V4DImode; - } - break; - case E_V8DFmode: - if (TARGET_AVX512F) - { - gen = gen_avx512f_vpermt2varv8df3; - maskmode = V8DImode; - } - break; - default: - break; - } - - if (gen == NULL) - return false; - - /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const - expander, so args are either in d, or in op0, op1 etc. */ - if (d) - { - rtx vec[64]; - target = d->target; - op0 = d->op0; - op1 = d->op1; - for (int i = 0; i < d->nelt; ++i) - vec[i] = GEN_INT (d->perm[i]); - mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); - } - - emit_insn (gen (target, force_reg (maskmode, mask), op0, op1)); - return true; -} - -/* Expand a variable vector permutation. */ - -void -ix86_expand_vec_perm (rtx operands[]) -{ - rtx target = operands[0]; - rtx op0 = operands[1]; - rtx op1 = operands[2]; - rtx mask = operands[3]; - rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32]; - machine_mode mode = GET_MODE (op0); - machine_mode maskmode = GET_MODE (mask); - int w, e, i; - bool one_operand_shuffle = rtx_equal_p (op0, op1); - - /* Number of elements in the vector. */ - w = GET_MODE_NUNITS (mode); - e = GET_MODE_UNIT_SIZE (mode); - gcc_assert (w <= 64); - - if (TARGET_AVX512F && one_operand_shuffle) - { - rtx (*gen) (rtx, rtx, rtx) = NULL; - switch (mode) - { - case E_V16SImode: - gen =gen_avx512f_permvarv16si; - break; - case E_V16SFmode: - gen = gen_avx512f_permvarv16sf; - break; - case E_V8DImode: - gen = gen_avx512f_permvarv8di; - break; - case E_V8DFmode: - gen = gen_avx512f_permvarv8df; - break; - default: - break; - } - if (gen != NULL) - { - emit_insn (gen (target, op0, mask)); - return; - } - } - - if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL)) - return; - - if (TARGET_AVX2) - { - if (mode == V4DImode || mode == V4DFmode || mode == V16HImode) - { - /* Unfortunately, the VPERMQ and VPERMPD instructions only support - an constant shuffle operand. With a tiny bit of effort we can - use VPERMD instead. A re-interpretation stall for V4DFmode is - unfortunate but there's no avoiding it. - Similarly for V16HImode we don't have instructions for variable - shuffling, while for V32QImode we can use after preparing suitable - masks vpshufb; vpshufb; vpermq; vpor. */ - - if (mode == V16HImode) - { - maskmode = mode = V32QImode; - w = 32; - e = 1; - } - else - { - maskmode = mode = V8SImode; - w = 8; - e = 4; - } - t1 = gen_reg_rtx (maskmode); - - /* Replicate the low bits of the V4DImode mask into V8SImode: - mask = { A B C D } - t1 = { A A B B C C D D }. */ - for (i = 0; i < w / 2; ++i) - vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2); - vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); - vt = force_reg (maskmode, vt); - mask = gen_lowpart (maskmode, mask); - if (maskmode == V8SImode) - emit_insn (gen_avx2_permvarv8si (t1, mask, vt)); - else - emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt)); - - /* Multiply the shuffle indicies by two. */ - t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1, - OPTAB_DIRECT); - - /* Add one to the odd shuffle indicies: - t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */ - for (i = 0; i < w / 2; ++i) - { - vec[i * 2] = const0_rtx; - vec[i * 2 + 1] = const1_rtx; - } - vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); - vt = validize_mem (force_const_mem (maskmode, vt)); - t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1, - OPTAB_DIRECT); - - /* Continue as if V8SImode (resp. V32QImode) was used initially. */ - operands[3] = mask = t1; - target = gen_reg_rtx (mode); - op0 = gen_lowpart (mode, op0); - op1 = gen_lowpart (mode, op1); - } - - switch (mode) - { - case E_V8SImode: - /* The VPERMD and VPERMPS instructions already properly ignore - the high bits of the shuffle elements. No need for us to - perform an AND ourselves. */ - if (one_operand_shuffle) - { - emit_insn (gen_avx2_permvarv8si (target, op0, mask)); - if (target != operands[0]) - emit_move_insn (operands[0], - gen_lowpart (GET_MODE (operands[0]), target)); - } - else - { - t1 = gen_reg_rtx (V8SImode); - t2 = gen_reg_rtx (V8SImode); - emit_insn (gen_avx2_permvarv8si (t1, op0, mask)); - emit_insn (gen_avx2_permvarv8si (t2, op1, mask)); - goto merge_two; - } - return; - - case E_V8SFmode: - mask = gen_lowpart (V8SImode, mask); - if (one_operand_shuffle) - emit_insn (gen_avx2_permvarv8sf (target, op0, mask)); - else - { - t1 = gen_reg_rtx (V8SFmode); - t2 = gen_reg_rtx (V8SFmode); - emit_insn (gen_avx2_permvarv8sf (t1, op0, mask)); - emit_insn (gen_avx2_permvarv8sf (t2, op1, mask)); - goto merge_two; - } - return; - - case E_V4SImode: - /* By combining the two 128-bit input vectors into one 256-bit - input vector, we can use VPERMD and VPERMPS for the full - two-operand shuffle. */ - t1 = gen_reg_rtx (V8SImode); - t2 = gen_reg_rtx (V8SImode); - emit_insn (gen_avx_vec_concatv8si (t1, op0, op1)); - emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); - emit_insn (gen_avx2_permvarv8si (t1, t1, t2)); - emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx)); - return; - - case E_V4SFmode: - t1 = gen_reg_rtx (V8SFmode); - t2 = gen_reg_rtx (V8SImode); - mask = gen_lowpart (V4SImode, mask); - emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1)); - emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); - emit_insn (gen_avx2_permvarv8sf (t1, t1, t2)); - emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx)); - return; - - case E_V32QImode: - t1 = gen_reg_rtx (V32QImode); - t2 = gen_reg_rtx (V32QImode); - t3 = gen_reg_rtx (V32QImode); - vt2 = GEN_INT (-128); - vt = gen_const_vec_duplicate (V32QImode, vt2); - vt = force_reg (V32QImode, vt); - for (i = 0; i < 32; i++) - vec[i] = i < 16 ? vt2 : const0_rtx; - vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec)); - vt2 = force_reg (V32QImode, vt2); - /* From mask create two adjusted masks, which contain the same - bits as mask in the low 7 bits of each vector element. - The first mask will have the most significant bit clear - if it requests element from the same 128-bit lane - and MSB set if it requests element from the other 128-bit lane. - The second mask will have the opposite values of the MSB, - and additionally will have its 128-bit lanes swapped. - E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have - t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and - t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ... - stands for other 12 bytes. */ - /* The bit whether element is from the same lane or the other - lane is bit 4, so shift it up by 3 to the MSB position. */ - t5 = gen_reg_rtx (V4DImode); - emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask), - GEN_INT (3))); - /* Clear MSB bits from the mask just in case it had them set. */ - emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask)); - /* After this t1 will have MSB set for elements from other lane. */ - emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2)); - /* Clear bits other than MSB. */ - emit_insn (gen_andv32qi3 (t1, t1, vt)); - /* Or in the lower bits from mask into t3. */ - emit_insn (gen_iorv32qi3 (t3, t1, t2)); - /* And invert MSB bits in t1, so MSB is set for elements from the same - lane. */ - emit_insn (gen_xorv32qi3 (t1, t1, vt)); - /* Swap 128-bit lanes in t3. */ - t6 = gen_reg_rtx (V4DImode); - emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3), - const2_rtx, GEN_INT (3), - const0_rtx, const1_rtx)); - /* And or in the lower bits from mask into t1. */ - emit_insn (gen_iorv32qi3 (t1, t1, t2)); - if (one_operand_shuffle) - { - /* Each of these shuffles will put 0s in places where - element from the other 128-bit lane is needed, otherwise - will shuffle in the requested value. */ - emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, - gen_lowpart (V32QImode, t6))); - emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1)); - /* For t3 the 128-bit lanes are swapped again. */ - t7 = gen_reg_rtx (V4DImode); - emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3), - const2_rtx, GEN_INT (3), - const0_rtx, const1_rtx)); - /* And oring both together leads to the result. */ - emit_insn (gen_iorv32qi3 (target, t1, - gen_lowpart (V32QImode, t7))); - if (target != operands[0]) - emit_move_insn (operands[0], - gen_lowpart (GET_MODE (operands[0]), target)); - return; - } - - t4 = gen_reg_rtx (V32QImode); - /* Similarly to the above one_operand_shuffle code, - just for repeated twice for each operand. merge_two: - code will merge the two results together. */ - emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, - gen_lowpart (V32QImode, t6))); - emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, - gen_lowpart (V32QImode, t6))); - emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1)); - emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1)); - t7 = gen_reg_rtx (V4DImode); - emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4), - const2_rtx, GEN_INT (3), - const0_rtx, const1_rtx)); - t8 = gen_reg_rtx (V4DImode); - emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3), - const2_rtx, GEN_INT (3), - const0_rtx, const1_rtx)); - emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7))); - emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8))); - t1 = t4; - t2 = t3; - goto merge_two; - - default: - gcc_assert (GET_MODE_SIZE (mode) <= 16); - break; - } - } - - if (TARGET_XOP) - { - /* The XOP VPPERM insn supports three inputs. By ignoring the - one_operand_shuffle special case, we avoid creating another - set of constant vectors in memory. */ - one_operand_shuffle = false; - - /* mask = mask & {2*w-1, ...} */ - vt = GEN_INT (2*w - 1); - } - else - { - /* mask = mask & {w-1, ...} */ - vt = GEN_INT (w - 1); - } - - vt = gen_const_vec_duplicate (maskmode, vt); - mask = expand_simple_binop (maskmode, AND, mask, vt, - NULL_RTX, 0, OPTAB_DIRECT); - - /* For non-QImode operations, convert the word permutation control - into a byte permutation control. */ - if (mode != V16QImode) - { - mask = expand_simple_binop (maskmode, ASHIFT, mask, - GEN_INT (exact_log2 (e)), - NULL_RTX, 0, OPTAB_DIRECT); - - /* Convert mask to vector of chars. */ - mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask)); - - /* Replicate each of the input bytes into byte positions: - (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8} - (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12} - (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */ - for (i = 0; i < 16; ++i) - vec[i] = GEN_INT (i/e * e); - vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); - vt = validize_mem (force_const_mem (V16QImode, vt)); - if (TARGET_XOP) - emit_insn (gen_xop_pperm (mask, mask, mask, vt)); - else - emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt)); - - /* Convert it into the byte positions by doing - mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */ - for (i = 0; i < 16; ++i) - vec[i] = GEN_INT (i % e); - vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); - vt = validize_mem (force_const_mem (V16QImode, vt)); - emit_insn (gen_addv16qi3 (mask, mask, vt)); - } - - /* The actual shuffle operations all operate on V16QImode. */ - op0 = gen_lowpart (V16QImode, op0); - op1 = gen_lowpart (V16QImode, op1); - - if (TARGET_XOP) - { - if (GET_MODE (target) != V16QImode) - target = gen_reg_rtx (V16QImode); - emit_insn (gen_xop_pperm (target, op0, op1, mask)); - if (target != operands[0]) - emit_move_insn (operands[0], - gen_lowpart (GET_MODE (operands[0]), target)); - } - else if (one_operand_shuffle) - { - if (GET_MODE (target) != V16QImode) - target = gen_reg_rtx (V16QImode); - emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask)); - if (target != operands[0]) - emit_move_insn (operands[0], - gen_lowpart (GET_MODE (operands[0]), target)); - } - else - { - rtx xops[6]; - bool ok; - - /* Shuffle the two input vectors independently. */ - t1 = gen_reg_rtx (V16QImode); - t2 = gen_reg_rtx (V16QImode); - emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask)); - emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask)); - - merge_two: - /* Then merge them together. The key is whether any given control - element contained a bit set that indicates the second word. */ - mask = operands[3]; - vt = GEN_INT (w); - if (maskmode == V2DImode && !TARGET_SSE4_1) - { - /* Without SSE4.1, we don't have V2DImode EQ. Perform one - more shuffle to convert the V2DI input mask into a V4SI - input mask. At which point the masking that expand_int_vcond - will work as desired. */ - rtx t3 = gen_reg_rtx (V4SImode); - emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask), - const0_rtx, const0_rtx, - const2_rtx, const2_rtx)); - mask = t3; - maskmode = V4SImode; - e = w = 4; - } - - vt = gen_const_vec_duplicate (maskmode, vt); - vt = force_reg (maskmode, vt); - mask = expand_simple_binop (maskmode, AND, mask, vt, - NULL_RTX, 0, OPTAB_DIRECT); - - if (GET_MODE (target) != mode) - target = gen_reg_rtx (mode); - xops[0] = target; - xops[1] = gen_lowpart (mode, t2); - xops[2] = gen_lowpart (mode, t1); - xops[3] = gen_rtx_EQ (maskmode, mask, vt); - xops[4] = mask; - xops[5] = vt; - ok = ix86_expand_int_vcond (xops); - gcc_assert (ok); - if (target != operands[0]) - emit_move_insn (operands[0], - gen_lowpart (GET_MODE (operands[0]), target)); - } -} - -/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is - true if we should do zero extension, else sign extension. HIGH_P is - true if we want the N/2 high elements, else the low elements. */ - -void -ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) -{ - machine_mode imode = GET_MODE (src); - rtx tmp; - - if (TARGET_SSE4_1) - { - rtx (*unpack)(rtx, rtx); - rtx (*extract)(rtx, rtx) = NULL; - machine_mode halfmode = BLKmode; - - switch (imode) - { - case E_V64QImode: - if (unsigned_p) - unpack = gen_avx512bw_zero_extendv32qiv32hi2; - else - unpack = gen_avx512bw_sign_extendv32qiv32hi2; - halfmode = V32QImode; - extract - = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi; - break; - case E_V32QImode: - if (unsigned_p) - unpack = gen_avx2_zero_extendv16qiv16hi2; - else - unpack = gen_avx2_sign_extendv16qiv16hi2; - halfmode = V16QImode; - extract - = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi; - break; - case E_V32HImode: - if (unsigned_p) - unpack = gen_avx512f_zero_extendv16hiv16si2; - else - unpack = gen_avx512f_sign_extendv16hiv16si2; - halfmode = V16HImode; - extract - = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi; - break; - case E_V16HImode: - if (unsigned_p) - unpack = gen_avx2_zero_extendv8hiv8si2; - else - unpack = gen_avx2_sign_extendv8hiv8si2; - halfmode = V8HImode; - extract - = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi; - break; - case E_V16SImode: - if (unsigned_p) - unpack = gen_avx512f_zero_extendv8siv8di2; - else - unpack = gen_avx512f_sign_extendv8siv8di2; - halfmode = V8SImode; - extract - = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si; - break; - case E_V8SImode: - if (unsigned_p) - unpack = gen_avx2_zero_extendv4siv4di2; - else - unpack = gen_avx2_sign_extendv4siv4di2; - halfmode = V4SImode; - extract - = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si; - break; - case E_V16QImode: - if (unsigned_p) - unpack = gen_sse4_1_zero_extendv8qiv8hi2; - else - unpack = gen_sse4_1_sign_extendv8qiv8hi2; - break; - case E_V8HImode: - if (unsigned_p) - unpack = gen_sse4_1_zero_extendv4hiv4si2; - else - unpack = gen_sse4_1_sign_extendv4hiv4si2; - break; - case E_V4SImode: - if (unsigned_p) - unpack = gen_sse4_1_zero_extendv2siv2di2; - else - unpack = gen_sse4_1_sign_extendv2siv2di2; - break; - default: - gcc_unreachable (); - } - - if (GET_MODE_SIZE (imode) >= 32) - { - tmp = gen_reg_rtx (halfmode); - emit_insn (extract (tmp, src)); - } - else if (high_p) - { - /* Shift higher 8 bytes to lower 8 bytes. */ - tmp = gen_reg_rtx (V1TImode); - emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src), - GEN_INT (64))); - tmp = gen_lowpart (imode, tmp); - } - else - tmp = src; - - emit_insn (unpack (dest, tmp)); - } - else - { - rtx (*unpack)(rtx, rtx, rtx); - - switch (imode) - { - case E_V16QImode: - if (high_p) - unpack = gen_vec_interleave_highv16qi; - else - unpack = gen_vec_interleave_lowv16qi; - break; - case E_V8HImode: - if (high_p) - unpack = gen_vec_interleave_highv8hi; - else - unpack = gen_vec_interleave_lowv8hi; - break; - case E_V4SImode: - if (high_p) - unpack = gen_vec_interleave_highv4si; - else - unpack = gen_vec_interleave_lowv4si; - break; - default: - gcc_unreachable (); - } - - if (unsigned_p) - tmp = force_reg (imode, CONST0_RTX (imode)); - else - tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), - src, pc_rtx, pc_rtx); - - rtx tmp2 = gen_reg_rtx (imode); - emit_insn (unpack (tmp2, src, tmp)); - emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2)); - } -} - -/* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode, - but works for floating pointer parameters and nonoffsetable memories. - For pushes, it returns just stack offsets; the values will be saved - in the right order. Maximally three parts are generated. */ - -static int -ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode) -{ - int size; - - if (!TARGET_64BIT) - size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; - else - size = (GET_MODE_SIZE (mode) + 4) / 8; - - gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand))); - gcc_assert (size >= 2 && size <= 4); - - /* Optimize constant pool reference to immediates. This is used by fp - moves, that force all constants to memory to allow combining. */ - if (MEM_P (operand) && MEM_READONLY_P (operand)) - operand = avoid_constant_pool_reference (operand); - - if (MEM_P (operand) && !offsettable_memref_p (operand)) - { - /* The only non-offsetable memories we handle are pushes. */ - int ok = push_operand (operand, VOIDmode); - - gcc_assert (ok); - - operand = copy_rtx (operand); - PUT_MODE (operand, word_mode); - parts[0] = parts[1] = parts[2] = parts[3] = operand; - return size; - } - - if (GET_CODE (operand) == CONST_VECTOR) - { - scalar_int_mode imode = int_mode_for_mode (mode).require (); - /* Caution: if we looked through a constant pool memory above, - the operand may actually have a different mode now. That's - ok, since we want to pun this all the way back to an integer. */ - operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); - gcc_assert (operand != NULL); - mode = imode; - } - - if (!TARGET_64BIT) - { - if (mode == DImode) - split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); - else - { - int i; - - if (REG_P (operand)) - { - gcc_assert (reload_completed); - for (i = 0; i < size; i++) - parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i); - } - else if (offsettable_memref_p (operand)) - { - operand = adjust_address (operand, SImode, 0); - parts[0] = operand; - for (i = 1; i < size; i++) - parts[i] = adjust_address (operand, SImode, 4 * i); - } - else if (CONST_DOUBLE_P (operand)) - { - const REAL_VALUE_TYPE *r; - long l[4]; - - r = CONST_DOUBLE_REAL_VALUE (operand); - switch (mode) - { - case E_TFmode: - real_to_target (l, r, mode); - parts[3] = gen_int_mode (l[3], SImode); - parts[2] = gen_int_mode (l[2], SImode); - break; - case E_XFmode: - /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since - long double may not be 80-bit. */ - real_to_target (l, r, mode); - parts[2] = gen_int_mode (l[2], SImode); - break; - case E_DFmode: - REAL_VALUE_TO_TARGET_DOUBLE (*r, l); - break; - default: - gcc_unreachable (); - } - parts[1] = gen_int_mode (l[1], SImode); - parts[0] = gen_int_mode (l[0], SImode); - } - else - gcc_unreachable (); - } - } - else - { - if (mode == TImode) - split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); - if (mode == XFmode || mode == TFmode) - { - machine_mode upper_mode = mode==XFmode ? SImode : DImode; - if (REG_P (operand)) - { - gcc_assert (reload_completed); - parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); - parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); - } - else if (offsettable_memref_p (operand)) - { - operand = adjust_address (operand, DImode, 0); - parts[0] = operand; - parts[1] = adjust_address (operand, upper_mode, 8); - } - else if (CONST_DOUBLE_P (operand)) - { - long l[4]; - - real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode); - - /* real_to_target puts 32-bit pieces in each long. */ - parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff)) - | ((l[1] & HOST_WIDE_INT_C (0xffffffff)) - << 32), DImode); - - if (upper_mode == SImode) - parts[1] = gen_int_mode (l[2], SImode); - else - parts[1] - = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff)) - | ((l[3] & HOST_WIDE_INT_C (0xffffffff)) - << 32), DImode); - } - else - gcc_unreachable (); - } - } - - return size; -} - -/* Emit insns to perform a move or push of DI, DF, XF, and TF values. - Return false when normal moves are needed; true when all required - insns have been emitted. Operands 2-4 contain the input values - int the correct order; operands 5-7 contain the output values. */ - -void -ix86_split_long_move (rtx operands[]) -{ - rtx part[2][4]; - int nparts, i, j; - int push = 0; - int collisions = 0; - machine_mode mode = GET_MODE (operands[0]); - bool collisionparts[4]; - - /* The DFmode expanders may ask us to move double. - For 64bit target this is single move. By hiding the fact - here we simplify i386.md splitters. */ - if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8) - { - /* Optimize constant pool reference to immediates. This is used by - fp moves, that force all constants to memory to allow combining. */ - - if (MEM_P (operands[1]) - && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) - operands[1] = get_pool_constant (XEXP (operands[1], 0)); - if (push_operand (operands[0], VOIDmode)) - { - operands[0] = copy_rtx (operands[0]); - PUT_MODE (operands[0], word_mode); - } - else - operands[0] = gen_lowpart (DImode, operands[0]); - operands[1] = gen_lowpart (DImode, operands[1]); - emit_move_insn (operands[0], operands[1]); - return; - } - - /* The only non-offsettable memory we handle is push. */ - if (push_operand (operands[0], VOIDmode)) - push = 1; - else - gcc_assert (!MEM_P (operands[0]) - || offsettable_memref_p (operands[0])); - - nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); - ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); - - /* When emitting push, take care for source operands on the stack. */ - if (push && MEM_P (operands[1]) - && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) - { - rtx src_base = XEXP (part[1][nparts - 1], 0); - - /* Compensate for the stack decrement by 4. */ - if (!TARGET_64BIT && nparts == 3 - && mode == XFmode && TARGET_128BIT_LONG_DOUBLE) - src_base = plus_constant (Pmode, src_base, 4); - - /* src_base refers to the stack pointer and is - automatically decreased by emitted push. */ - for (i = 0; i < nparts; i++) - part[1][i] = change_address (part[1][i], - GET_MODE (part[1][i]), src_base); - } - - /* We need to do copy in the right order in case an address register - of the source overlaps the destination. */ - if (REG_P (part[0][0]) && MEM_P (part[1][0])) - { - rtx tmp; - - for (i = 0; i < nparts; i++) - { - collisionparts[i] - = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0)); - if (collisionparts[i]) - collisions++; - } - - /* Collision in the middle part can be handled by reordering. */ - if (collisions == 1 && nparts == 3 && collisionparts [1]) - { - std::swap (part[0][1], part[0][2]); - std::swap (part[1][1], part[1][2]); - } - else if (collisions == 1 - && nparts == 4 - && (collisionparts [1] || collisionparts [2])) - { - if (collisionparts [1]) - { - std::swap (part[0][1], part[0][2]); - std::swap (part[1][1], part[1][2]); - } - else - { - std::swap (part[0][2], part[0][3]); - std::swap (part[1][2], part[1][3]); - } - } - - /* If there are more collisions, we can't handle it by reordering. - Do an lea to the last part and use only one colliding move. */ - else if (collisions > 1) - { - rtx base, addr; - - collisions = 1; - - base = part[0][nparts - 1]; - - /* Handle the case when the last part isn't valid for lea. - Happens in 64-bit mode storing the 12-byte XFmode. */ - if (GET_MODE (base) != Pmode) - base = gen_rtx_REG (Pmode, REGNO (base)); - - addr = XEXP (part[1][0], 0); - if (TARGET_TLS_DIRECT_SEG_REFS) - { - struct ix86_address parts; - int ok = ix86_decompose_address (addr, &parts); - gcc_assert (ok); - /* It is not valid to use %gs: or %fs: in lea. */ - gcc_assert (parts.seg == ADDR_SPACE_GENERIC); - } - emit_insn (gen_rtx_SET (base, addr)); - part[1][0] = replace_equiv_address (part[1][0], base); - for (i = 1; i < nparts; i++) - { - tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i); - part[1][i] = replace_equiv_address (part[1][i], tmp); - } - } - } - - if (push) - { - if (!TARGET_64BIT) - { - if (nparts == 3) - { - if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) - emit_insn (gen_add2_insn (stack_pointer_rtx, GEN_INT (-4))); - emit_move_insn (part[0][2], part[1][2]); - } - else if (nparts == 4) - { - emit_move_insn (part[0][3], part[1][3]); - emit_move_insn (part[0][2], part[1][2]); - } - } - else - { - /* In 64bit mode we don't have 32bit push available. In case this is - register, it is OK - we will just use larger counterpart. We also - retype memory - these comes from attempt to avoid REX prefix on - moving of second half of TFmode value. */ - if (GET_MODE (part[1][1]) == SImode) - { - switch (GET_CODE (part[1][1])) - { - case MEM: - part[1][1] = adjust_address (part[1][1], DImode, 0); - break; - - case REG: - part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); - break; - - default: - gcc_unreachable (); - } - - if (GET_MODE (part[1][0]) == SImode) - part[1][0] = part[1][1]; - } - } - emit_move_insn (part[0][1], part[1][1]); - emit_move_insn (part[0][0], part[1][0]); - return; - } - - /* Choose correct order to not overwrite the source before it is copied. */ - if ((REG_P (part[0][0]) - && REG_P (part[1][1]) - && (REGNO (part[0][0]) == REGNO (part[1][1]) - || (nparts == 3 - && REGNO (part[0][0]) == REGNO (part[1][2])) - || (nparts == 4 - && REGNO (part[0][0]) == REGNO (part[1][3])))) - || (collisions > 0 - && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) - { - for (i = 0, j = nparts - 1; i < nparts; i++, j--) - { - operands[2 + i] = part[0][j]; - operands[6 + i] = part[1][j]; - } - } - else - { - for (i = 0; i < nparts; i++) - { - operands[2 + i] = part[0][i]; - operands[6 + i] = part[1][i]; - } - } - - /* If optimizing for size, attempt to locally unCSE nonzero constants. */ - if (optimize_insn_for_size_p ()) - { - for (j = 0; j < nparts - 1; j++) - if (CONST_INT_P (operands[6 + j]) - && operands[6 + j] != const0_rtx - && REG_P (operands[2 + j])) - for (i = j; i < nparts - 1; i++) - if (CONST_INT_P (operands[7 + i]) - && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j])) - operands[7 + i] = operands[2 + j]; - } - - for (i = 0; i < nparts; i++) - emit_move_insn (operands[2 + i], operands[6 + i]); - - return; -} - -/* Helper function of ix86_split_ashl used to generate an SImode/DImode - left shift by a constant, either using a single shift or - a sequence of add instructions. */ - -static void -ix86_expand_ashl_const (rtx operand, int count, machine_mode mode) -{ - if (count == 1 - || (count * ix86_cost->add <= ix86_cost->shift_const - && !optimize_insn_for_size_p ())) - { - while (count-- > 0) - emit_insn (gen_add2_insn (operand, operand)); - } - else - { - rtx (*insn)(rtx, rtx, rtx); - - insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3; - emit_insn (insn (operand, operand, GEN_INT (count))); - } -} - -void -ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode) -{ - rtx (*gen_ashl3)(rtx, rtx, rtx); - rtx (*gen_shld)(rtx, rtx, rtx); - int half_width = GET_MODE_BITSIZE (mode) >> 1; - machine_mode half_mode; - - rtx low[2], high[2]; - int count; - - if (CONST_INT_P (operands[2])) - { - split_double_mode (mode, operands, 2, low, high); - count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); - - if (count >= half_width) - { - emit_move_insn (high[0], low[1]); - emit_move_insn (low[0], const0_rtx); - - if (count > half_width) - ix86_expand_ashl_const (high[0], count - half_width, mode); - } - else - { - gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; - - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - - emit_insn (gen_shld (high[0], low[0], GEN_INT (count))); - ix86_expand_ashl_const (low[0], count, mode); - } - return; - } - - split_double_mode (mode, operands, 1, low, high); - half_mode = mode == DImode ? SImode : DImode; - - gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3; - - if (operands[1] == const1_rtx) - { - /* Assuming we've chosen a QImode capable registers, then 1 << N - can be done with two 32/64-bit shifts, no branches, no cmoves. */ - if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) - { - rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); - - ix86_expand_clear (low[0]); - ix86_expand_clear (high[0]); - emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width))); - - d = gen_lowpart (QImode, low[0]); - d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); - s = gen_rtx_EQ (QImode, flags, const0_rtx); - emit_insn (gen_rtx_SET (d, s)); - - d = gen_lowpart (QImode, high[0]); - d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); - s = gen_rtx_NE (QImode, flags, const0_rtx); - emit_insn (gen_rtx_SET (d, s)); - } - - /* Otherwise, we can get the same results by manually performing - a bit extract operation on bit 5/6, and then performing the two - shifts. The two methods of getting 0/1 into low/high are exactly - the same size. Avoiding the shift in the bit extract case helps - pentium4 a bit; no one else seems to care much either way. */ - else - { - rtx (*gen_lshr3)(rtx, rtx, rtx); - rtx (*gen_and3)(rtx, rtx, rtx); - rtx (*gen_xor3)(rtx, rtx, rtx); - HOST_WIDE_INT bits; - rtx x; - - if (mode == DImode) - { - gen_lshr3 = gen_lshrsi3; - gen_and3 = gen_andsi3; - gen_xor3 = gen_xorsi3; - bits = 5; - } - else - { - gen_lshr3 = gen_lshrdi3; - gen_and3 = gen_anddi3; - gen_xor3 = gen_xordi3; - bits = 6; - } - - if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) - x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]); - else - x = gen_lowpart (half_mode, operands[2]); - emit_insn (gen_rtx_SET (high[0], x)); - - emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits))); - emit_insn (gen_and3 (high[0], high[0], const1_rtx)); - emit_move_insn (low[0], high[0]); - emit_insn (gen_xor3 (low[0], low[0], const1_rtx)); - } - - emit_insn (gen_ashl3 (low[0], low[0], operands[2])); - emit_insn (gen_ashl3 (high[0], high[0], operands[2])); - return; - } - - if (operands[1] == constm1_rtx) - { - /* For -1 << N, we can avoid the shld instruction, because we - know that we're shifting 0...31/63 ones into a -1. */ - emit_move_insn (low[0], constm1_rtx); - if (optimize_insn_for_size_p ()) - emit_move_insn (high[0], low[0]); - else - emit_move_insn (high[0], constm1_rtx); - } - else - { - gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; - - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - - split_double_mode (mode, operands, 1, low, high); - emit_insn (gen_shld (high[0], low[0], operands[2])); - } - - emit_insn (gen_ashl3 (low[0], low[0], operands[2])); - - if (TARGET_CMOVE && scratch) - { - ix86_expand_clear (scratch); - emit_insn (gen_x86_shift_adj_1 - (half_mode, high[0], low[0], operands[2], scratch)); - } - else - emit_insn (gen_x86_shift_adj_2 (half_mode, high[0], low[0], operands[2])); -} - -void -ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode) -{ - rtx (*gen_ashr3)(rtx, rtx, rtx) - = mode == DImode ? gen_ashrsi3 : gen_ashrdi3; - rtx (*gen_shrd)(rtx, rtx, rtx); - int half_width = GET_MODE_BITSIZE (mode) >> 1; - - rtx low[2], high[2]; - int count; - - if (CONST_INT_P (operands[2])) - { - split_double_mode (mode, operands, 2, low, high); - count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); - - if (count == GET_MODE_BITSIZE (mode) - 1) - { - emit_move_insn (high[0], high[1]); - emit_insn (gen_ashr3 (high[0], high[0], - GEN_INT (half_width - 1))); - emit_move_insn (low[0], high[0]); - - } - else if (count >= half_width) - { - emit_move_insn (low[0], high[1]); - emit_move_insn (high[0], low[0]); - emit_insn (gen_ashr3 (high[0], high[0], - GEN_INT (half_width - 1))); - - if (count > half_width) - emit_insn (gen_ashr3 (low[0], low[0], - GEN_INT (count - half_width))); - } - else - { - gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; - - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - - emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); - emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count))); - } - } - else - { - machine_mode half_mode; - - gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; - - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - - split_double_mode (mode, operands, 1, low, high); - half_mode = mode == DImode ? SImode : DImode; - - emit_insn (gen_shrd (low[0], high[0], operands[2])); - emit_insn (gen_ashr3 (high[0], high[0], operands[2])); - - if (TARGET_CMOVE && scratch) - { - emit_move_insn (scratch, high[0]); - emit_insn (gen_ashr3 (scratch, scratch, - GEN_INT (half_width - 1))); - emit_insn (gen_x86_shift_adj_1 - (half_mode, low[0], high[0], operands[2], scratch)); - } - else - emit_insn (gen_x86_shift_adj_3 - (half_mode, low[0], high[0], operands[2])); - } -} - -void -ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode) -{ - rtx (*gen_lshr3)(rtx, rtx, rtx) - = mode == DImode ? gen_lshrsi3 : gen_lshrdi3; - rtx (*gen_shrd)(rtx, rtx, rtx); - int half_width = GET_MODE_BITSIZE (mode) >> 1; - - rtx low[2], high[2]; - int count; - - if (CONST_INT_P (operands[2])) - { - split_double_mode (mode, operands, 2, low, high); - count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); - - if (count >= half_width) - { - emit_move_insn (low[0], high[1]); - ix86_expand_clear (high[0]); - - if (count > half_width) - emit_insn (gen_lshr3 (low[0], low[0], - GEN_INT (count - half_width))); - } - else - { - gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; - - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - - emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); - emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count))); - } - } - else - { - machine_mode half_mode; - - gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; - - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); - - split_double_mode (mode, operands, 1, low, high); - half_mode = mode == DImode ? SImode : DImode; - - emit_insn (gen_shrd (low[0], high[0], operands[2])); - emit_insn (gen_lshr3 (high[0], high[0], operands[2])); - - if (TARGET_CMOVE && scratch) - { - ix86_expand_clear (scratch); - emit_insn (gen_x86_shift_adj_1 - (half_mode, low[0], high[0], operands[2], scratch)); - } - else - emit_insn (gen_x86_shift_adj_2 - (half_mode, low[0], high[0], operands[2])); - } -} - -/* Return mode for the memcpy/memset loop counter. Prefer SImode over - DImode for constant loop counts. */ - -static machine_mode -counter_mode (rtx count_exp) -{ - if (GET_MODE (count_exp) != VOIDmode) - return GET_MODE (count_exp); - if (!CONST_INT_P (count_exp)) - return Pmode; - if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff)) - return DImode; - return SImode; -} - -/* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR - to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT - specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set - memory by VALUE (supposed to be in MODE). - - The size is rounded down to whole number of chunk size moved at once. - SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */ - - -static void -expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem, - rtx destptr, rtx srcptr, rtx value, - rtx count, machine_mode mode, int unroll, - int expected_size, bool issetmem) -{ - rtx_code_label *out_label, *top_label; - rtx iter, tmp; - machine_mode iter_mode = counter_mode (count); - int piece_size_n = GET_MODE_SIZE (mode) * unroll; - rtx piece_size = GEN_INT (piece_size_n); - rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); - rtx size; - int i; - - top_label = gen_label_rtx (); - out_label = gen_label_rtx (); - iter = gen_reg_rtx (iter_mode); - - size = expand_simple_binop (iter_mode, AND, count, piece_size_mask, - NULL, 1, OPTAB_DIRECT); - /* Those two should combine. */ - if (piece_size == const1_rtx) - { - emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode, - true, out_label); - predict_jump (REG_BR_PROB_BASE * 10 / 100); - } - emit_move_insn (iter, const0_rtx); - - emit_label (top_label); - - tmp = convert_modes (Pmode, iter_mode, iter, true); - - /* This assert could be relaxed - in this case we'll need to compute - smallest power of two, containing in PIECE_SIZE_N and pass it to - offset_address. */ - gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0); - destmem = offset_address (destmem, tmp, piece_size_n); - destmem = adjust_address (destmem, mode, 0); - - if (!issetmem) - { - srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n); - srcmem = adjust_address (srcmem, mode, 0); - - /* When unrolling for chips that reorder memory reads and writes, - we can save registers by using single temporary. - Also using 4 temporaries is overkill in 32bit mode. */ - if (!TARGET_64BIT && 0) - { - for (i = 0; i < unroll; i++) - { - if (i) - { - destmem = adjust_address (copy_rtx (destmem), mode, - GET_MODE_SIZE (mode)); - srcmem = adjust_address (copy_rtx (srcmem), mode, - GET_MODE_SIZE (mode)); - } - emit_move_insn (destmem, srcmem); - } - } - else - { - rtx tmpreg[4]; - gcc_assert (unroll <= 4); - for (i = 0; i < unroll; i++) - { - tmpreg[i] = gen_reg_rtx (mode); - if (i) - srcmem = adjust_address (copy_rtx (srcmem), mode, - GET_MODE_SIZE (mode)); - emit_move_insn (tmpreg[i], srcmem); - } - for (i = 0; i < unroll; i++) - { - if (i) - destmem = adjust_address (copy_rtx (destmem), mode, - GET_MODE_SIZE (mode)); - emit_move_insn (destmem, tmpreg[i]); - } - } - } - else - for (i = 0; i < unroll; i++) - { - if (i) - destmem = adjust_address (copy_rtx (destmem), mode, - GET_MODE_SIZE (mode)); - emit_move_insn (destmem, value); - } - - tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter, - true, OPTAB_LIB_WIDEN); - if (tmp != iter) - emit_move_insn (iter, tmp); - - emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, - true, top_label); - if (expected_size != -1) - { - expected_size /= GET_MODE_SIZE (mode) * unroll; - if (expected_size == 0) - predict_jump (0); - else if (expected_size > REG_BR_PROB_BASE) - predict_jump (REG_BR_PROB_BASE - 1); - else - predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) - / expected_size); - } - else - predict_jump (REG_BR_PROB_BASE * 80 / 100); - iter = ix86_zero_extend_to_Pmode (iter); - tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr, - true, OPTAB_LIB_WIDEN); - if (tmp != destptr) - emit_move_insn (destptr, tmp); - if (!issetmem) - { - tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr, - true, OPTAB_LIB_WIDEN); - if (tmp != srcptr) - emit_move_insn (srcptr, tmp); - } - emit_label (out_label); -} - -/* Divide COUNTREG by SCALE. */ -static rtx -scale_counter (rtx countreg, int scale) -{ - rtx sc; - - if (scale == 1) - return countreg; - if (CONST_INT_P (countreg)) - return GEN_INT (INTVAL (countreg) / scale); - gcc_assert (REG_P (countreg)); - - sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg, - GEN_INT (exact_log2 (scale)), - NULL, 1, OPTAB_DIRECT); - return sc; -} - -/* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument. - When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored. - When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored. - For setmem case, VALUE is a promoted to a wider size ORIG_VALUE. - ORIG_VALUE is the original value passed to memset to fill the memory with. - Other arguments have same meaning as for previous function. */ - -static void -expand_set_or_cpymem_via_rep (rtx destmem, rtx srcmem, - rtx destptr, rtx srcptr, rtx value, rtx orig_value, - rtx count, - machine_mode mode, bool issetmem) -{ - rtx destexp; - rtx srcexp; - rtx countreg; - HOST_WIDE_INT rounded_count; - - /* If possible, it is shorter to use rep movs. - TODO: Maybe it is better to move this logic to decide_alg. */ - if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3) - && (!issetmem || orig_value == const0_rtx)) - mode = SImode; - - if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) - destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); - - countreg = ix86_zero_extend_to_Pmode (scale_counter (count, - GET_MODE_SIZE (mode))); - if (mode != QImode) - { - destexp = gen_rtx_ASHIFT (Pmode, countreg, - GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); - destexp = gen_rtx_PLUS (Pmode, destexp, destptr); - } - else - destexp = gen_rtx_PLUS (Pmode, destptr, countreg); - if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count)) - { - rounded_count - = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); - destmem = shallow_copy_rtx (destmem); - set_mem_size (destmem, rounded_count); - } - else if (MEM_SIZE_KNOWN_P (destmem)) - clear_mem_size (destmem); - - if (issetmem) - { - value = force_reg (mode, gen_lowpart (mode, value)); - emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp)); - } - else - { - if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode) - srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0); - if (mode != QImode) - { - srcexp = gen_rtx_ASHIFT (Pmode, countreg, - GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); - srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr); - } - else - srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg); - if (CONST_INT_P (count)) - { - rounded_count - = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); - srcmem = shallow_copy_rtx (srcmem); - set_mem_size (srcmem, rounded_count); - } - else - { - if (MEM_SIZE_KNOWN_P (srcmem)) - clear_mem_size (srcmem); - } - emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg, - destexp, srcexp)); - } -} - -/* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to - DESTMEM. - SRC is passed by pointer to be updated on return. - Return value is updated DST. */ -static rtx -emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr, - HOST_WIDE_INT size_to_move) -{ - rtx dst = destmem, src = *srcmem, tempreg; - enum insn_code code; - machine_mode move_mode; - int piece_size, i; - - /* Find the widest mode in which we could perform moves. - Start with the biggest power of 2 less than SIZE_TO_MOVE and half - it until move of such size is supported. */ - piece_size = 1 << floor_log2 (size_to_move); - while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode) - || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing) - { - gcc_assert (piece_size > 1); - piece_size >>= 1; - } - - /* Find the corresponding vector mode with the same size as MOVE_MODE. - MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ - if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) - { - int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); - if (!mode_for_vector (word_mode, nunits).exists (&move_mode) - || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing) - { - move_mode = word_mode; - piece_size = GET_MODE_SIZE (move_mode); - code = optab_handler (mov_optab, move_mode); - } - } - gcc_assert (code != CODE_FOR_nothing); - - dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); - src = adjust_automodify_address_nv (src, move_mode, srcptr, 0); - - /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ - gcc_assert (size_to_move % piece_size == 0); - - for (i = 0; i < size_to_move; i += piece_size) - { - /* We move from memory to memory, so we'll need to do it via - a temporary register. */ - tempreg = gen_reg_rtx (move_mode); - emit_insn (GEN_FCN (code) (tempreg, src)); - emit_insn (GEN_FCN (code) (dst, tempreg)); - - emit_move_insn (destptr, - plus_constant (Pmode, copy_rtx (destptr), piece_size)); - emit_move_insn (srcptr, - plus_constant (Pmode, copy_rtx (srcptr), piece_size)); - - dst = adjust_automodify_address_nv (dst, move_mode, destptr, - piece_size); - src = adjust_automodify_address_nv (src, move_mode, srcptr, - piece_size); - } - - /* Update DST and SRC rtx. */ - *srcmem = src; - return dst; -} - -/* Helper function for the string operations below. Dest VARIABLE whether - it is aligned to VALUE bytes. If true, jump to the label. */ - -static rtx_code_label * -ix86_expand_aligntest (rtx variable, int value, bool epilogue) -{ - rtx_code_label *label = gen_label_rtx (); - rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); - if (GET_MODE (variable) == DImode) - emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); - else - emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); - emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), - 1, label); - if (epilogue) - predict_jump (REG_BR_PROB_BASE * 50 / 100); - else - predict_jump (REG_BR_PROB_BASE * 90 / 100); - return label; -} - - -/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */ - -static void -expand_cpymem_epilogue (rtx destmem, rtx srcmem, - rtx destptr, rtx srcptr, rtx count, int max_size) -{ - rtx src, dest; - if (CONST_INT_P (count)) - { - HOST_WIDE_INT countval = INTVAL (count); - HOST_WIDE_INT epilogue_size = countval % max_size; - int i; - - /* For now MAX_SIZE should be a power of 2. This assert could be - relaxed, but it'll require a bit more complicated epilogue - expanding. */ - gcc_assert ((max_size & (max_size - 1)) == 0); - for (i = max_size; i >= 1; i >>= 1) - { - if (epilogue_size & i) - destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); - } - return; - } - if (max_size > 8) - { - count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1), - count, 1, OPTAB_DIRECT); - expand_set_or_cpymem_via_loop (destmem, srcmem, destptr, srcptr, NULL, - count, QImode, 1, 4, false); - return; - } - - /* When there are stringops, we can cheaply increase dest and src pointers. - Otherwise we save code size by maintaining offset (zero is readily - available from preceding rep operation) and using x86 addressing modes. - */ - if (TARGET_SINGLE_STRINGOP) - { - if (max_size > 4) - { - rtx_code_label *label = ix86_expand_aligntest (count, 4, true); - src = change_address (srcmem, SImode, srcptr); - dest = change_address (destmem, SImode, destptr); - emit_insn (gen_strmov (destptr, dest, srcptr, src)); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 2) - { - rtx_code_label *label = ix86_expand_aligntest (count, 2, true); - src = change_address (srcmem, HImode, srcptr); - dest = change_address (destmem, HImode, destptr); - emit_insn (gen_strmov (destptr, dest, srcptr, src)); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 1) - { - rtx_code_label *label = ix86_expand_aligntest (count, 1, true); - src = change_address (srcmem, QImode, srcptr); - dest = change_address (destmem, QImode, destptr); - emit_insn (gen_strmov (destptr, dest, srcptr, src)); - emit_label (label); - LABEL_NUSES (label) = 1; - } - } - else - { - rtx offset = force_reg (Pmode, const0_rtx); - rtx tmp; - - if (max_size > 4) - { - rtx_code_label *label = ix86_expand_aligntest (count, 4, true); - src = change_address (srcmem, SImode, srcptr); - dest = change_address (destmem, SImode, destptr); - emit_move_insn (dest, src); - tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL, - true, OPTAB_LIB_WIDEN); - if (tmp != offset) - emit_move_insn (offset, tmp); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 2) - { - rtx_code_label *label = ix86_expand_aligntest (count, 2, true); - tmp = gen_rtx_PLUS (Pmode, srcptr, offset); - src = change_address (srcmem, HImode, tmp); - tmp = gen_rtx_PLUS (Pmode, destptr, offset); - dest = change_address (destmem, HImode, tmp); - emit_move_insn (dest, src); - tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp, - true, OPTAB_LIB_WIDEN); - if (tmp != offset) - emit_move_insn (offset, tmp); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 1) - { - rtx_code_label *label = ix86_expand_aligntest (count, 1, true); - tmp = gen_rtx_PLUS (Pmode, srcptr, offset); - src = change_address (srcmem, QImode, tmp); - tmp = gen_rtx_PLUS (Pmode, destptr, offset); - dest = change_address (destmem, QImode, tmp); - emit_move_insn (dest, src); - emit_label (label); - LABEL_NUSES (label) = 1; - } - } -} - -/* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM - with value PROMOTED_VAL. - SRC is passed by pointer to be updated on return. - Return value is updated DST. */ -static rtx -emit_memset (rtx destmem, rtx destptr, rtx promoted_val, - HOST_WIDE_INT size_to_move) -{ - rtx dst = destmem; - enum insn_code code; - machine_mode move_mode; - int piece_size, i; - - /* Find the widest mode in which we could perform moves. - Start with the biggest power of 2 less than SIZE_TO_MOVE and half - it until move of such size is supported. */ - move_mode = GET_MODE (promoted_val); - if (move_mode == VOIDmode) - move_mode = QImode; - if (size_to_move < GET_MODE_SIZE (move_mode)) - { - unsigned int move_bits = size_to_move * BITS_PER_UNIT; - move_mode = int_mode_for_size (move_bits, 0).require (); - promoted_val = gen_lowpart (move_mode, promoted_val); - } - piece_size = GET_MODE_SIZE (move_mode); - code = optab_handler (mov_optab, move_mode); - gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX); - - dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); - - /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ - gcc_assert (size_to_move % piece_size == 0); - - for (i = 0; i < size_to_move; i += piece_size) - { - if (piece_size <= GET_MODE_SIZE (word_mode)) - { - emit_insn (gen_strset (destptr, dst, promoted_val)); - dst = adjust_automodify_address_nv (dst, move_mode, destptr, - piece_size); - continue; - } - - emit_insn (GEN_FCN (code) (dst, promoted_val)); - - emit_move_insn (destptr, - plus_constant (Pmode, copy_rtx (destptr), piece_size)); - - dst = adjust_automodify_address_nv (dst, move_mode, destptr, - piece_size); - } - - /* Update DST rtx. */ - return dst; -} -/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ -static void -expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value, - rtx count, int max_size) -{ - count = expand_simple_binop (counter_mode (count), AND, count, - GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT); - expand_set_or_cpymem_via_loop (destmem, NULL, destptr, NULL, - gen_lowpart (QImode, value), count, QImode, - 1, max_size / 2, true); -} - -/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ -static void -expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, - rtx count, int max_size) -{ - rtx dest; - - if (CONST_INT_P (count)) - { - HOST_WIDE_INT countval = INTVAL (count); - HOST_WIDE_INT epilogue_size = countval % max_size; - int i; - - /* For now MAX_SIZE should be a power of 2. This assert could be - relaxed, but it'll require a bit more complicated epilogue - expanding. */ - gcc_assert ((max_size & (max_size - 1)) == 0); - for (i = max_size; i >= 1; i >>= 1) - { - if (epilogue_size & i) - { - if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) - destmem = emit_memset (destmem, destptr, vec_value, i); - else - destmem = emit_memset (destmem, destptr, value, i); - } - } - return; - } - if (max_size > 32) - { - expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size); - return; - } - if (max_size > 16) - { - rtx_code_label *label = ix86_expand_aligntest (count, 16, true); - if (TARGET_64BIT) - { - dest = change_address (destmem, DImode, destptr); - emit_insn (gen_strset (destptr, dest, value)); - dest = adjust_automodify_address_nv (dest, DImode, destptr, 8); - emit_insn (gen_strset (destptr, dest, value)); - } - else - { - dest = change_address (destmem, SImode, destptr); - emit_insn (gen_strset (destptr, dest, value)); - dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); - emit_insn (gen_strset (destptr, dest, value)); - dest = adjust_automodify_address_nv (dest, SImode, destptr, 8); - emit_insn (gen_strset (destptr, dest, value)); - dest = adjust_automodify_address_nv (dest, SImode, destptr, 12); - emit_insn (gen_strset (destptr, dest, value)); - } - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 8) - { - rtx_code_label *label = ix86_expand_aligntest (count, 8, true); - if (TARGET_64BIT) - { - dest = change_address (destmem, DImode, destptr); - emit_insn (gen_strset (destptr, dest, value)); - } - else - { - dest = change_address (destmem, SImode, destptr); - emit_insn (gen_strset (destptr, dest, value)); - dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); - emit_insn (gen_strset (destptr, dest, value)); - } - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 4) - { - rtx_code_label *label = ix86_expand_aligntest (count, 4, true); - dest = change_address (destmem, SImode, destptr); - emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 2) - { - rtx_code_label *label = ix86_expand_aligntest (count, 2, true); - dest = change_address (destmem, HImode, destptr); - emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); - emit_label (label); - LABEL_NUSES (label) = 1; - } - if (max_size > 1) - { - rtx_code_label *label = ix86_expand_aligntest (count, 1, true); - dest = change_address (destmem, QImode, destptr); - emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); - emit_label (label); - LABEL_NUSES (label) = 1; - } -} - -/* Adjust COUNTER by the VALUE. */ -static void -ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) -{ - emit_insn (gen_add2_insn (countreg, GEN_INT (-value))); -} - -/* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to - DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN. - Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are - ignored. - Return value is updated DESTMEM. */ - -static rtx -expand_set_or_cpymem_prologue (rtx destmem, rtx srcmem, - rtx destptr, rtx srcptr, rtx value, - rtx vec_value, rtx count, int align, - int desired_alignment, bool issetmem) -{ - int i; - for (i = 1; i < desired_alignment; i <<= 1) - { - if (align <= i) - { - rtx_code_label *label = ix86_expand_aligntest (destptr, i, false); - if (issetmem) - { - if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) - destmem = emit_memset (destmem, destptr, vec_value, i); - else - destmem = emit_memset (destmem, destptr, value, i); - } - else - destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); - ix86_adjust_counter (count, i); - emit_label (label); - LABEL_NUSES (label) = 1; - set_mem_align (destmem, i * 2 * BITS_PER_UNIT); - } - } - return destmem; -} - -/* Test if COUNT&SIZE is nonzero and if so, expand movme - or setmem sequence that is valid for SIZE..2*SIZE-1 bytes - and jump to DONE_LABEL. */ -static void -expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem, - rtx destptr, rtx srcptr, - rtx value, rtx vec_value, - rtx count, int size, - rtx done_label, bool issetmem) -{ - rtx_code_label *label = ix86_expand_aligntest (count, size, false); - machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk (); - rtx modesize; - int n; - - /* If we do not have vector value to copy, we must reduce size. */ - if (issetmem) - { - if (!vec_value) - { - if (GET_MODE (value) == VOIDmode && size > 8) - mode = Pmode; - else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value))) - mode = GET_MODE (value); - } - else - mode = GET_MODE (vec_value), value = vec_value; - } - else - { - /* Choose appropriate vector mode. */ - if (size >= 32) - mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode; - else if (size >= 16) - mode = TARGET_SSE ? V16QImode : DImode; - srcmem = change_address (srcmem, mode, srcptr); - } - destmem = change_address (destmem, mode, destptr); - modesize = GEN_INT (GET_MODE_SIZE (mode)); - gcc_assert (GET_MODE_SIZE (mode) <= size); - for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) - { - if (issetmem) - emit_move_insn (destmem, gen_lowpart (mode, value)); - else - { - emit_move_insn (destmem, srcmem); - srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); - } - destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); - } - - destmem = offset_address (destmem, count, 1); - destmem = offset_address (destmem, GEN_INT (-2 * size), - GET_MODE_SIZE (mode)); - if (!issetmem) - { - srcmem = offset_address (srcmem, count, 1); - srcmem = offset_address (srcmem, GEN_INT (-2 * size), - GET_MODE_SIZE (mode)); - } - for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) - { - if (issetmem) - emit_move_insn (destmem, gen_lowpart (mode, value)); - else - { - emit_move_insn (destmem, srcmem); - srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); - } - destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); - } - emit_jump_insn (gen_jump (done_label)); - emit_barrier (); - - emit_label (label); - LABEL_NUSES (label) = 1; -} - -/* Handle small memcpy (up to SIZE that is supposed to be small power of 2. - and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN - bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can - proceed with an loop copying SIZE bytes at once. Do moves in MODE. - DONE_LABEL is a label after the whole copying sequence. The label is created - on demand if *DONE_LABEL is NULL. - MIN_SIZE is minimal size of block copied. This value gets adjusted for new - bounds after the initial copies. - - DESTMEM/SRCMEM are memory expressions pointing to the copies block, - DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether - we will dispatch to a library call for large blocks. - - In pseudocode we do: - - if (COUNT < SIZE) - { - Assume that SIZE is 4. Bigger sizes are handled analogously - if (COUNT & 4) - { - copy 4 bytes from SRCPTR to DESTPTR - copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4 - goto done_label - } - if (!COUNT) - goto done_label; - copy 1 byte from SRCPTR to DESTPTR - if (COUNT & 2) - { - copy 2 bytes from SRCPTR to DESTPTR - copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2 - } - } - else - { - copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR - copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE - - OLD_DESPTR = DESTPTR; - Align DESTPTR up to DESIRED_ALIGN - SRCPTR += DESTPTR - OLD_DESTPTR - COUNT -= DEST_PTR - OLD_DESTPTR - if (DYNAMIC_CHECK) - Round COUNT down to multiple of SIZE - << optional caller supplied zero size guard is here >> - << optional caller supplied dynamic check is here >> - << caller supplied main copy loop is here >> - } - done_label: - */ -static void -expand_set_or_cpymem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem, - rtx *destptr, rtx *srcptr, - machine_mode mode, - rtx value, rtx vec_value, - rtx *count, - rtx_code_label **done_label, - int size, - int desired_align, - int align, - unsigned HOST_WIDE_INT *min_size, - bool dynamic_check, - bool issetmem) -{ - rtx_code_label *loop_label = NULL, *label; - int n; - rtx modesize; - int prolog_size = 0; - rtx mode_value; - - /* Chose proper value to copy. */ - if (issetmem && VECTOR_MODE_P (mode)) - mode_value = vec_value; - else - mode_value = value; - gcc_assert (GET_MODE_SIZE (mode) <= size); - - /* See if block is big or small, handle small blocks. */ - if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size) - { - int size2 = size; - loop_label = gen_label_rtx (); - - if (!*done_label) - *done_label = gen_label_rtx (); - - emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count), - 1, loop_label); - size2 >>= 1; - - /* Handle sizes > 3. */ - for (;size2 > 2; size2 >>= 1) - expand_small_cpymem_or_setmem (destmem, srcmem, - *destptr, *srcptr, - value, vec_value, - *count, - size2, *done_label, issetmem); - /* Nothing to copy? Jump to DONE_LABEL if so */ - emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count), - 1, *done_label); - - /* Do a byte copy. */ - destmem = change_address (destmem, QImode, *destptr); - if (issetmem) - emit_move_insn (destmem, gen_lowpart (QImode, value)); - else - { - srcmem = change_address (srcmem, QImode, *srcptr); - emit_move_insn (destmem, srcmem); - } - - /* Handle sizes 2 and 3. */ - label = ix86_expand_aligntest (*count, 2, false); - destmem = change_address (destmem, HImode, *destptr); - destmem = offset_address (destmem, *count, 1); - destmem = offset_address (destmem, GEN_INT (-2), 2); - if (issetmem) - emit_move_insn (destmem, gen_lowpart (HImode, value)); - else - { - srcmem = change_address (srcmem, HImode, *srcptr); - srcmem = offset_address (srcmem, *count, 1); - srcmem = offset_address (srcmem, GEN_INT (-2), 2); - emit_move_insn (destmem, srcmem); - } - - emit_label (label); - LABEL_NUSES (label) = 1; - emit_jump_insn (gen_jump (*done_label)); - emit_barrier (); - } - else - gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size - || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size); - - /* Start memcpy for COUNT >= SIZE. */ - if (loop_label) - { - emit_label (loop_label); - LABEL_NUSES (loop_label) = 1; - } - - /* Copy first desired_align bytes. */ - if (!issetmem) - srcmem = change_address (srcmem, mode, *srcptr); - destmem = change_address (destmem, mode, *destptr); - modesize = GEN_INT (GET_MODE_SIZE (mode)); - for (n = 0; prolog_size < desired_align - align; n++) - { - if (issetmem) - emit_move_insn (destmem, mode_value); - else - { - emit_move_insn (destmem, srcmem); - srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); - } - destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); - prolog_size += GET_MODE_SIZE (mode); - } - - - /* Copy last SIZE bytes. */ - destmem = offset_address (destmem, *count, 1); - destmem = offset_address (destmem, - GEN_INT (-size - prolog_size), - 1); - if (issetmem) - emit_move_insn (destmem, mode_value); - else - { - srcmem = offset_address (srcmem, *count, 1); - srcmem = offset_address (srcmem, - GEN_INT (-size - prolog_size), - 1); - emit_move_insn (destmem, srcmem); - } - for (n = 1; n * GET_MODE_SIZE (mode) < size; n++) - { - destmem = offset_address (destmem, modesize, 1); - if (issetmem) - emit_move_insn (destmem, mode_value); - else - { - srcmem = offset_address (srcmem, modesize, 1); - emit_move_insn (destmem, srcmem); - } - } - - /* Align destination. */ - if (desired_align > 1 && desired_align > align) - { - rtx saveddest = *destptr; - - gcc_assert (desired_align <= size); - /* Align destptr up, place it to new register. */ - *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr, - GEN_INT (prolog_size), - NULL_RTX, 1, OPTAB_DIRECT); - if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest)) - REG_POINTER (*destptr) = 1; - *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr, - GEN_INT (-desired_align), - *destptr, 1, OPTAB_DIRECT); - /* See how many bytes we skipped. */ - saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest, - *destptr, - saveddest, 1, OPTAB_DIRECT); - /* Adjust srcptr and count. */ - if (!issetmem) - *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, - saveddest, *srcptr, 1, OPTAB_DIRECT); - *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, - saveddest, *count, 1, OPTAB_DIRECT); - /* We copied at most size + prolog_size. */ - if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size)) - *min_size - = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size); - else - *min_size = 0; - - /* Our loops always round down the block size, but for dispatch to - library we need precise value. */ - if (dynamic_check) - *count = expand_simple_binop (GET_MODE (*count), AND, *count, - GEN_INT (-size), *count, 1, OPTAB_DIRECT); - } - else - { - gcc_assert (prolog_size == 0); - /* Decrease count, so we won't end up copying last word twice. */ - if (!CONST_INT_P (*count)) - *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, - constm1_rtx, *count, 1, OPTAB_DIRECT); - else - *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1, - (unsigned HOST_WIDE_INT)size)); - if (*min_size) - *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size); - } -} - - -/* This function is like the previous one, except here we know how many bytes - need to be copied. That allows us to update alignment not only of DST, which - is returned, but also of SRC, which is passed as a pointer for that - reason. */ -static rtx -expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, - rtx srcreg, rtx value, rtx vec_value, - int desired_align, int align_bytes, - bool issetmem) -{ - rtx src = NULL; - rtx orig_dst = dst; - rtx orig_src = NULL; - int piece_size = 1; - int copied_bytes = 0; - - if (!issetmem) - { - gcc_assert (srcp != NULL); - src = *srcp; - orig_src = src; - } - - for (piece_size = 1; - piece_size <= desired_align && copied_bytes < align_bytes; - piece_size <<= 1) - { - if (align_bytes & piece_size) - { - if (issetmem) - { - if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value))) - dst = emit_memset (dst, destreg, vec_value, piece_size); - else - dst = emit_memset (dst, destreg, value, piece_size); - } - else - dst = emit_memmov (dst, &src, destreg, srcreg, piece_size); - copied_bytes += piece_size; - } - } - if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) - set_mem_align (dst, desired_align * BITS_PER_UNIT); - if (MEM_SIZE_KNOWN_P (orig_dst)) - set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes); - - if (!issetmem) - { - int src_align_bytes = get_mem_align_offset (src, desired_align - * BITS_PER_UNIT); - if (src_align_bytes >= 0) - src_align_bytes = desired_align - src_align_bytes; - if (src_align_bytes >= 0) - { - unsigned int src_align; - for (src_align = desired_align; src_align >= 2; src_align >>= 1) - { - if ((src_align_bytes & (src_align - 1)) - == (align_bytes & (src_align - 1))) - break; - } - if (src_align > (unsigned int) desired_align) - src_align = desired_align; - if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) - set_mem_align (src, src_align * BITS_PER_UNIT); - } - if (MEM_SIZE_KNOWN_P (orig_src)) - set_mem_size (src, MEM_SIZE (orig_src) - align_bytes); - *srcp = src; - } - - return dst; -} - -/* Return true if ALG can be used in current context. - Assume we expand memset if MEMSET is true. */ -static bool -alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) -{ - if (alg == no_stringop) - return false; - if (alg == vector_loop) - return TARGET_SSE || TARGET_AVX; - /* Algorithms using the rep prefix want at least edi and ecx; - additionally, memset wants eax and memcpy wants esi. Don't - consider such algorithms if the user has appropriated those - registers for their own purposes, or if we have a non-default - address space, since some string insns cannot override the segment. */ - if (alg == rep_prefix_1_byte - || alg == rep_prefix_4_byte - || alg == rep_prefix_8_byte) - { - if (have_as) - return false; - if (fixed_regs[CX_REG] - || fixed_regs[DI_REG] - || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])) - return false; - } - return true; -} - -/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */ -static enum stringop_alg -decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, - unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, - bool memset, bool zero_memset, bool have_as, - int *dynamic_check, bool *noalign, bool recur) -{ - const struct stringop_algs *algs; - bool optimize_for_speed; - int max = 0; - const struct processor_costs *cost; - int i; - bool any_alg_usable_p = false; - - *noalign = false; - *dynamic_check = -1; - - /* Even if the string operation call is cold, we still might spend a lot - of time processing large blocks. */ - if (optimize_function_for_size_p (cfun) - || (optimize_insn_for_size_p () - && (max_size < 256 - || (expected_size != -1 && expected_size < 256)))) - optimize_for_speed = false; - else - optimize_for_speed = true; - - cost = optimize_for_speed ? ix86_cost : &ix86_size_cost; - if (memset) - algs = &cost->memset[TARGET_64BIT != 0]; - else - algs = &cost->memcpy[TARGET_64BIT != 0]; - - /* See maximal size for user defined algorithm. */ - for (i = 0; i < MAX_STRINGOP_ALGS; i++) - { - enum stringop_alg candidate = algs->size[i].alg; - bool usable = alg_usable_p (candidate, memset, have_as); - any_alg_usable_p |= usable; - - if (candidate != libcall && candidate && usable) - max = algs->size[i].max; - } - - /* If expected size is not known but max size is small enough - so inline version is a win, set expected size into - the range. */ - if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1) - && expected_size == -1) - expected_size = min_size / 2 + max_size / 2; - - /* If user specified the algorithm, honor it if possible. */ - if (ix86_stringop_alg != no_stringop - && alg_usable_p (ix86_stringop_alg, memset, have_as)) - return ix86_stringop_alg; - /* rep; movq or rep; movl is the smallest variant. */ - else if (!optimize_for_speed) - { - *noalign = true; - if (!count || (count & 3) || (memset && !zero_memset)) - return alg_usable_p (rep_prefix_1_byte, memset, have_as) - ? rep_prefix_1_byte : loop_1_byte; - else - return alg_usable_p (rep_prefix_4_byte, memset, have_as) - ? rep_prefix_4_byte : loop; - } - /* Very tiny blocks are best handled via the loop, REP is expensive to - setup. */ - else if (expected_size != -1 && expected_size < 4) - return loop_1_byte; - else if (expected_size != -1) - { - enum stringop_alg alg = libcall; - bool alg_noalign = false; - for (i = 0; i < MAX_STRINGOP_ALGS; i++) - { - /* We get here if the algorithms that were not libcall-based - were rep-prefix based and we are unable to use rep prefixes - based on global register usage. Break out of the loop and - use the heuristic below. */ - if (algs->size[i].max == 0) - break; - if (algs->size[i].max >= expected_size || algs->size[i].max == -1) - { - enum stringop_alg candidate = algs->size[i].alg; - - if (candidate != libcall - && alg_usable_p (candidate, memset, have_as)) - { - alg = candidate; - alg_noalign = algs->size[i].noalign; - } - /* Honor TARGET_INLINE_ALL_STRINGOPS by picking - last non-libcall inline algorithm. */ - if (TARGET_INLINE_ALL_STRINGOPS) - { - /* When the current size is best to be copied by a libcall, - but we are still forced to inline, run the heuristic below - that will pick code for medium sized blocks. */ - if (alg != libcall) - { - *noalign = alg_noalign; - return alg; - } - else if (!any_alg_usable_p) - break; - } - else if (alg_usable_p (candidate, memset, have_as)) - { - *noalign = algs->size[i].noalign; - return candidate; - } - } - } - } - /* When asked to inline the call anyway, try to pick meaningful choice. - We look for maximal size of block that is faster to copy by hand and - take blocks of at most of that size guessing that average size will - be roughly half of the block. - - If this turns out to be bad, we might simply specify the preferred - choice in ix86_costs. */ - if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) - && (algs->unknown_size == libcall - || !alg_usable_p (algs->unknown_size, memset, have_as))) - { - enum stringop_alg alg; - HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2; - - /* If there aren't any usable algorithms or if recursing already, - then recursing on smaller sizes or same size isn't going to - find anything. Just return the simple byte-at-a-time copy loop. */ - if (!any_alg_usable_p || recur) - { - /* Pick something reasonable. */ - if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur) - *dynamic_check = 128; - return loop_1_byte; - } - alg = decide_alg (count, new_expected_size, min_size, max_size, memset, - zero_memset, have_as, dynamic_check, noalign, true); - gcc_assert (*dynamic_check == -1); - if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) - *dynamic_check = max; - else - gcc_assert (alg != libcall); - return alg; - } - return (alg_usable_p (algs->unknown_size, memset, have_as) - ? algs->unknown_size : libcall); -} - -/* Decide on alignment. We know that the operand is already aligned to ALIGN - (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */ -static int -decide_alignment (int align, - enum stringop_alg alg, - int expected_size, - machine_mode move_mode) -{ - int desired_align = 0; - - gcc_assert (alg != no_stringop); - - if (alg == libcall) - return 0; - if (move_mode == VOIDmode) - return 0; - - desired_align = GET_MODE_SIZE (move_mode); - /* PentiumPro has special logic triggering for 8 byte aligned blocks. - copying whole cacheline at once. */ - if (TARGET_PENTIUMPRO - && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte)) - desired_align = 8; - - if (optimize_size) - desired_align = 1; - if (desired_align < align) - desired_align = align; - if (expected_size != -1 && expected_size < 4) - desired_align = align; - - return desired_align; -} - - -/* Helper function for memcpy. For QImode value 0xXY produce - 0xXYXYXYXY of wide specified by MODE. This is essentially - a * 0x10101010, but we can do slightly better than - synth_mult by unwinding the sequence by hand on CPUs with - slow multiply. */ -static rtx -promote_duplicated_reg (machine_mode mode, rtx val) -{ - machine_mode valmode = GET_MODE (val); - rtx tmp; - int nops = mode == DImode ? 3 : 2; - - gcc_assert (mode == SImode || mode == DImode || val == const0_rtx); - if (val == const0_rtx) - return copy_to_mode_reg (mode, CONST0_RTX (mode)); - if (CONST_INT_P (val)) - { - HOST_WIDE_INT v = INTVAL (val) & 255; - - v |= v << 8; - v |= v << 16; - if (mode == DImode) - v |= (v << 16) << 16; - return copy_to_mode_reg (mode, gen_int_mode (v, mode)); - } - - if (valmode == VOIDmode) - valmode = QImode; - if (valmode != QImode) - val = gen_lowpart (QImode, val); - if (mode == QImode) - return val; - if (!TARGET_PARTIAL_REG_STALL) - nops--; - if (ix86_cost->mult_init[mode == DImode ? 3 : 2] - + ix86_cost->mult_bit * (mode == DImode ? 8 : 4) - <= (ix86_cost->shift_const + ix86_cost->add) * nops - + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0))) - { - rtx reg = convert_modes (mode, QImode, val, true); - tmp = promote_duplicated_reg (mode, const1_rtx); - return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1, - OPTAB_DIRECT); - } - else - { - rtx reg = convert_modes (mode, QImode, val, true); - - if (!TARGET_PARTIAL_REG_STALL) - if (mode == SImode) - emit_insn (gen_insvsi_1 (reg, reg)); - else - emit_insn (gen_insvdi_1 (reg, reg)); - else - { - tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8), - NULL, 1, OPTAB_DIRECT); - reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, - OPTAB_DIRECT); - } - tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16), - NULL, 1, OPTAB_DIRECT); - reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); - if (mode == SImode) - return reg; - tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32), - NULL, 1, OPTAB_DIRECT); - reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); - return reg; - } -} - -/* Duplicate value VAL using promote_duplicated_reg into maximal size that will - be needed by main loop copying SIZE_NEEDED chunks and prologue getting - alignment from ALIGN to DESIRED_ALIGN. */ -static rtx -promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, - int align) -{ - rtx promoted_val; - - if (TARGET_64BIT - && (size_needed > 4 || (desired_align > align && desired_align > 4))) - promoted_val = promote_duplicated_reg (DImode, val); - else if (size_needed > 2 || (desired_align > align && desired_align > 2)) - promoted_val = promote_duplicated_reg (SImode, val); - else if (size_needed > 1 || (desired_align > align && desired_align > 1)) - promoted_val = promote_duplicated_reg (HImode, val); - else - promoted_val = val; - - return promoted_val; -} - -/* Copy the address to a Pmode register. This is used for x32 to - truncate DImode TLS address to a SImode register. */ - -static rtx -ix86_copy_addr_to_reg (rtx addr) -{ - rtx reg; - if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode) - { - reg = copy_addr_to_reg (addr); - REG_POINTER (reg) = 1; - return reg; - } - else - { - gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode); - reg = copy_to_mode_reg (DImode, addr); - REG_POINTER (reg) = 1; - return gen_rtx_SUBREG (SImode, reg, 0); - } -} - -/* Expand string move (memcpy) ot store (memset) operation. Use i386 string - operations when profitable. The code depends upon architecture, block size - and alignment, but always has one of the following overall structures: - - Aligned move sequence: - - 1) Prologue guard: Conditional that jumps up to epilogues for small - blocks that can be handled by epilogue alone. This is faster - but also needed for correctness, since prologue assume the block - is larger than the desired alignment. - - Optional dynamic check for size and libcall for large - blocks is emitted here too, with -minline-stringops-dynamically. - - 2) Prologue: copy first few bytes in order to get destination - aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less - than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be - copied. We emit either a jump tree on power of two sized - blocks, or a byte loop. - - 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks - with specified algorithm. - - 4) Epilogue: code copying tail of the block that is too small to be - handled by main body (or up to size guarded by prologue guard). - - Misaligned move sequence - - 1) missaligned move prologue/epilogue containing: - a) Prologue handling small memory blocks and jumping to done_label - (skipped if blocks are known to be large enough) - b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is - needed by single possibly misaligned move - (skipped if alignment is not needed) - c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves - - 2) Zero size guard dispatching to done_label, if needed - - 3) dispatch to library call, if needed, - - 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks - with specified algorithm. */ -bool -ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, - rtx align_exp, rtx expected_align_exp, - rtx expected_size_exp, rtx min_size_exp, - rtx max_size_exp, rtx probable_max_size_exp, - bool issetmem) -{ - rtx destreg; - rtx srcreg = NULL; - rtx_code_label *label = NULL; - rtx tmp; - rtx_code_label *jump_around_label = NULL; - HOST_WIDE_INT align = 1; - unsigned HOST_WIDE_INT count = 0; - HOST_WIDE_INT expected_size = -1; - int size_needed = 0, epilogue_size_needed; - int desired_align = 0, align_bytes = 0; - enum stringop_alg alg; - rtx promoted_val = NULL; - rtx vec_promoted_val = NULL; - bool force_loopy_epilogue = false; - int dynamic_check; - bool need_zero_guard = false; - bool noalign; - machine_mode move_mode = VOIDmode; - machine_mode wider_mode; - int unroll_factor = 1; - /* TODO: Once value ranges are available, fill in proper data. */ - unsigned HOST_WIDE_INT min_size = 0; - unsigned HOST_WIDE_INT max_size = -1; - unsigned HOST_WIDE_INT probable_max_size = -1; - bool misaligned_prologue_used = false; - bool have_as; - - if (CONST_INT_P (align_exp)) - align = INTVAL (align_exp); - /* i386 can do misaligned access on reasonably increased cost. */ - if (CONST_INT_P (expected_align_exp) - && INTVAL (expected_align_exp) > align) - align = INTVAL (expected_align_exp); - /* ALIGN is the minimum of destination and source alignment, but we care here - just about destination alignment. */ - else if (!issetmem - && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT) - align = MEM_ALIGN (dst) / BITS_PER_UNIT; - - if (CONST_INT_P (count_exp)) - { - min_size = max_size = probable_max_size = count = expected_size - = INTVAL (count_exp); - /* When COUNT is 0, there is nothing to do. */ - if (!count) - return true; - } - else - { - if (min_size_exp) - min_size = INTVAL (min_size_exp); - if (max_size_exp) - max_size = INTVAL (max_size_exp); - if (probable_max_size_exp) - probable_max_size = INTVAL (probable_max_size_exp); - if (CONST_INT_P (expected_size_exp)) - expected_size = INTVAL (expected_size_exp); - } - - /* Make sure we don't need to care about overflow later on. */ - if (count > (HOST_WIDE_INT_1U << 30)) - return false; - - have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)); - if (!issetmem) - have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)); - - /* Step 0: Decide on preferred algorithm, desired alignment and - size of chunks to be copied by main loop. */ - alg = decide_alg (count, expected_size, min_size, probable_max_size, - issetmem, - issetmem && val_exp == const0_rtx, have_as, - &dynamic_check, &noalign, false); - - if (dump_file) - fprintf (dump_file, "Selected stringop expansion strategy: %s\n", - stringop_alg_names[alg]); - - if (alg == libcall) - return false; - gcc_assert (alg != no_stringop); - - /* For now vector-version of memset is generated only for memory zeroing, as - creating of promoted vector value is very cheap in this case. */ - if (issetmem && alg == vector_loop && val_exp != const0_rtx) - alg = unrolled_loop; - - if (!count) - count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp); - destreg = ix86_copy_addr_to_reg (XEXP (dst, 0)); - if (!issetmem) - srcreg = ix86_copy_addr_to_reg (XEXP (src, 0)); - - unroll_factor = 1; - move_mode = word_mode; - switch (alg) - { - case libcall: - case no_stringop: - case last_alg: - gcc_unreachable (); - case loop_1_byte: - need_zero_guard = true; - move_mode = QImode; - break; - case loop: - need_zero_guard = true; - break; - case unrolled_loop: - need_zero_guard = true; - unroll_factor = (TARGET_64BIT ? 4 : 2); - break; - case vector_loop: - need_zero_guard = true; - unroll_factor = 4; - /* Find the widest supported mode. */ - move_mode = word_mode; - while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode) - && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing) - move_mode = wider_mode; - - if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128) - move_mode = TImode; - - /* Find the corresponding vector mode with the same size as MOVE_MODE. - MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ - if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) - { - int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); - if (!mode_for_vector (word_mode, nunits).exists (&move_mode) - || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing) - move_mode = word_mode; - } - gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing); - break; - case rep_prefix_8_byte: - move_mode = DImode; - break; - case rep_prefix_4_byte: - move_mode = SImode; - break; - case rep_prefix_1_byte: - move_mode = QImode; - break; - } - size_needed = GET_MODE_SIZE (move_mode) * unroll_factor; - epilogue_size_needed = size_needed; - - /* If we are going to call any library calls conditionally, make sure any - pending stack adjustment happen before the first conditional branch, - otherwise they will be emitted before the library call only and won't - happen from the other branches. */ - if (dynamic_check != -1) - do_pending_stack_adjust (); - - desired_align = decide_alignment (align, alg, expected_size, move_mode); - if (!TARGET_ALIGN_STRINGOPS || noalign) - align = desired_align; - - /* Step 1: Prologue guard. */ - - /* Alignment code needs count to be in register. */ - if (CONST_INT_P (count_exp) && desired_align > align) - { - if (INTVAL (count_exp) > desired_align - && INTVAL (count_exp) > size_needed) - { - align_bytes - = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); - if (align_bytes <= 0) - align_bytes = 0; - else - align_bytes = desired_align - align_bytes; - } - if (align_bytes == 0) - count_exp = force_reg (counter_mode (count_exp), count_exp); - } - gcc_assert (desired_align >= 1 && align >= 1); - - /* Misaligned move sequences handle both prologue and epilogue at once. - Default code generation results in a smaller code for large alignments - and also avoids redundant job when sizes are known precisely. */ - misaligned_prologue_used - = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES - && MAX (desired_align, epilogue_size_needed) <= 32 - && desired_align <= epilogue_size_needed - && ((desired_align > align && !align_bytes) - || (!count && epilogue_size_needed > 1))); - - /* Do the cheap promotion to allow better CSE across the - main loop and epilogue (ie one load of the big constant in the - front of all code. - For now the misaligned move sequences do not have fast path - without broadcasting. */ - if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used))) - { - if (alg == vector_loop) - { - gcc_assert (val_exp == const0_rtx); - vec_promoted_val = promote_duplicated_reg (move_mode, val_exp); - promoted_val = promote_duplicated_reg_to_size (val_exp, - GET_MODE_SIZE (word_mode), - desired_align, align); - } - else - { - promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, - desired_align, align); - } - } - /* Misaligned move sequences handles both prologues and epilogues at once. - Default code generation results in smaller code for large alignments and - also avoids redundant job when sizes are known precisely. */ - if (misaligned_prologue_used) - { - /* Misaligned move prologue handled small blocks by itself. */ - expand_set_or_cpymem_prologue_epilogue_by_misaligned_moves - (dst, src, &destreg, &srcreg, - move_mode, promoted_val, vec_promoted_val, - &count_exp, - &jump_around_label, - desired_align < align - ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed, - desired_align, align, &min_size, dynamic_check, issetmem); - if (!issetmem) - src = change_address (src, BLKmode, srcreg); - dst = change_address (dst, BLKmode, destreg); - set_mem_align (dst, desired_align * BITS_PER_UNIT); - epilogue_size_needed = 0; - if (need_zero_guard - && min_size < (unsigned HOST_WIDE_INT) size_needed) - { - /* It is possible that we copied enough so the main loop will not - execute. */ - gcc_assert (size_needed > 1); - if (jump_around_label == NULL_RTX) - jump_around_label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, - GEN_INT (size_needed), - LTU, 0, counter_mode (count_exp), 1, jump_around_label); - if (expected_size == -1 - || expected_size < (desired_align - align) / 2 + size_needed) - predict_jump (REG_BR_PROB_BASE * 20 / 100); - else - predict_jump (REG_BR_PROB_BASE * 60 / 100); - } - } - /* Ensure that alignment prologue won't copy past end of block. */ - else if (size_needed > 1 || (desired_align > 1 && desired_align > align)) - { - epilogue_size_needed = MAX (size_needed - 1, desired_align - align); - /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. - Make sure it is power of 2. */ - epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1); - - /* To improve performance of small blocks, we jump around the VAL - promoting mode. This mean that if the promoted VAL is not constant, - we might not use it in the epilogue and have to use byte - loop variant. */ - if (issetmem && epilogue_size_needed > 2 && !promoted_val) - force_loopy_epilogue = true; - if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed) - || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) - { - /* If main algorithm works on QImode, no epilogue is needed. - For small sizes just don't align anything. */ - if (size_needed == 1) - desired_align = align; - else - goto epilogue; - } - else if (!count - && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) - { - label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, - GEN_INT (epilogue_size_needed), - LTU, 0, counter_mode (count_exp), 1, label); - if (expected_size == -1 || expected_size < epilogue_size_needed) - predict_jump (REG_BR_PROB_BASE * 60 / 100); - else - predict_jump (REG_BR_PROB_BASE * 20 / 100); - } - } - - /* Emit code to decide on runtime whether library call or inline should be - used. */ - if (dynamic_check != -1) - { - if (!issetmem && CONST_INT_P (count_exp)) - { - if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check) - { - emit_block_copy_via_libcall (dst, src, count_exp); - count_exp = const0_rtx; - goto epilogue; - } - } - else - { - rtx_code_label *hot_label = gen_label_rtx (); - if (jump_around_label == NULL_RTX) - jump_around_label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), - LEU, 0, counter_mode (count_exp), - 1, hot_label); - predict_jump (REG_BR_PROB_BASE * 90 / 100); - if (issetmem) - set_storage_via_libcall (dst, count_exp, val_exp); - else - emit_block_copy_via_libcall (dst, src, count_exp); - emit_jump (jump_around_label); - emit_label (hot_label); - } - } - - /* Step 2: Alignment prologue. */ - /* Do the expensive promotion once we branched off the small blocks. */ - if (issetmem && !promoted_val) - promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, - desired_align, align); - - if (desired_align > align && !misaligned_prologue_used) - { - if (align_bytes == 0) - { - /* Except for the first move in prologue, we no longer know - constant offset in aliasing info. It don't seems to worth - the pain to maintain it for the first move, so throw away - the info early. */ - dst = change_address (dst, BLKmode, destreg); - if (!issetmem) - src = change_address (src, BLKmode, srcreg); - dst = expand_set_or_cpymem_prologue (dst, src, destreg, srcreg, - promoted_val, vec_promoted_val, - count_exp, align, desired_align, - issetmem); - /* At most desired_align - align bytes are copied. */ - if (min_size < (unsigned)(desired_align - align)) - min_size = 0; - else - min_size -= desired_align - align; - } - else - { - /* If we know how many bytes need to be stored before dst is - sufficiently aligned, maintain aliasing info accurately. */ - dst = expand_set_or_cpymem_constant_prologue (dst, &src, destreg, - srcreg, - promoted_val, - vec_promoted_val, - desired_align, - align_bytes, - issetmem); - - count_exp = plus_constant (counter_mode (count_exp), - count_exp, -align_bytes); - count -= align_bytes; - min_size -= align_bytes; - max_size -= align_bytes; - } - if (need_zero_guard - && min_size < (unsigned HOST_WIDE_INT) size_needed - && (count < (unsigned HOST_WIDE_INT) size_needed - || (align_bytes == 0 - && count < ((unsigned HOST_WIDE_INT) size_needed - + desired_align - align)))) - { - /* It is possible that we copied enough so the main loop will not - execute. */ - gcc_assert (size_needed > 1); - if (label == NULL_RTX) - label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, - GEN_INT (size_needed), - LTU, 0, counter_mode (count_exp), 1, label); - if (expected_size == -1 - || expected_size < (desired_align - align) / 2 + size_needed) - predict_jump (REG_BR_PROB_BASE * 20 / 100); - else - predict_jump (REG_BR_PROB_BASE * 60 / 100); - } - } - if (label && size_needed == 1) - { - emit_label (label); - LABEL_NUSES (label) = 1; - label = NULL; - epilogue_size_needed = 1; - if (issetmem) - promoted_val = val_exp; - } - else if (label == NULL_RTX && !misaligned_prologue_used) - epilogue_size_needed = size_needed; - - /* Step 3: Main loop. */ - - switch (alg) - { - case libcall: - case no_stringop: - case last_alg: - gcc_unreachable (); - case loop_1_byte: - case loop: - case unrolled_loop: - expand_set_or_cpymem_via_loop (dst, src, destreg, srcreg, promoted_val, - count_exp, move_mode, unroll_factor, - expected_size, issetmem); - break; - case vector_loop: - expand_set_or_cpymem_via_loop (dst, src, destreg, srcreg, - vec_promoted_val, count_exp, move_mode, - unroll_factor, expected_size, issetmem); - break; - case rep_prefix_8_byte: - case rep_prefix_4_byte: - case rep_prefix_1_byte: - expand_set_or_cpymem_via_rep (dst, src, destreg, srcreg, promoted_val, - val_exp, count_exp, move_mode, issetmem); - break; - } - /* Adjust properly the offset of src and dest memory for aliasing. */ - if (CONST_INT_P (count_exp)) - { - if (!issetmem) - src = adjust_automodify_address_nv (src, BLKmode, srcreg, - (count / size_needed) * size_needed); - dst = adjust_automodify_address_nv (dst, BLKmode, destreg, - (count / size_needed) * size_needed); - } - else - { - if (!issetmem) - src = change_address (src, BLKmode, srcreg); - dst = change_address (dst, BLKmode, destreg); - } - - /* Step 4: Epilogue to copy the remaining bytes. */ - epilogue: - if (label) - { - /* When the main loop is done, COUNT_EXP might hold original count, - while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. - Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED - bytes. Compensate if needed. */ - - if (size_needed < epilogue_size_needed) - { - tmp = expand_simple_binop (counter_mode (count_exp), AND, count_exp, - GEN_INT (size_needed - 1), count_exp, 1, - OPTAB_DIRECT); - if (tmp != count_exp) - emit_move_insn (count_exp, tmp); - } - emit_label (label); - LABEL_NUSES (label) = 1; - } - - if (count_exp != const0_rtx && epilogue_size_needed > 1) - { - if (force_loopy_epilogue) - expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp, - epilogue_size_needed); - else - { - if (issetmem) - expand_setmem_epilogue (dst, destreg, promoted_val, - vec_promoted_val, count_exp, - epilogue_size_needed); - else - expand_cpymem_epilogue (dst, src, destreg, srcreg, count_exp, - epilogue_size_needed); - } - } - if (jump_around_label) - emit_label (jump_around_label); - return true; -} - - -/* Expand the appropriate insns for doing strlen if not just doing - repnz; scasb - - out = result, initialized with the start address - align_rtx = alignment of the address. - scratch = scratch register, initialized with the startaddress when - not aligned, otherwise undefined - - This is just the body. It needs the initializations mentioned above and - some address computing at the end. These things are done in i386.md. */ - -static void -ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) -{ - int align; - rtx tmp; - rtx_code_label *align_2_label = NULL; - rtx_code_label *align_3_label = NULL; - rtx_code_label *align_4_label = gen_label_rtx (); - rtx_code_label *end_0_label = gen_label_rtx (); - rtx mem; - rtx tmpreg = gen_reg_rtx (SImode); - rtx scratch = gen_reg_rtx (SImode); - rtx cmp; - - align = 0; - if (CONST_INT_P (align_rtx)) - align = INTVAL (align_rtx); - - /* Loop to check 1..3 bytes for null to get an aligned pointer. */ - - /* Is there a known alignment and is it less than 4? */ - if (align < 4) - { - rtx scratch1 = gen_reg_rtx (Pmode); - emit_move_insn (scratch1, out); - /* Is there a known alignment and is it not 2? */ - if (align != 2) - { - align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ - align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ - - /* Leave just the 3 lower bits. */ - align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), - NULL_RTX, 0, OPTAB_WIDEN); - - emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, - Pmode, 1, align_4_label); - emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, - Pmode, 1, align_2_label); - emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, - Pmode, 1, align_3_label); - } - else - { - /* Since the alignment is 2, we have to check 2 or 0 bytes; - check if is aligned to 4 - byte. */ - - align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, - NULL_RTX, 0, OPTAB_WIDEN); - - emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, - Pmode, 1, align_4_label); - } - - mem = change_address (src, QImode, out); - - /* Now compare the bytes. */ - - /* Compare the first n unaligned byte on a byte per byte basis. */ - emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, - QImode, 1, end_0_label); - - /* Increment the address. */ - emit_insn (gen_add2_insn (out, const1_rtx)); - - /* Not needed with an alignment of 2 */ - if (align != 2) - { - emit_label (align_2_label); - - emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, - end_0_label); - - emit_insn (gen_add2_insn (out, const1_rtx)); - - emit_label (align_3_label); - } - - emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, - end_0_label); - - emit_insn (gen_add2_insn (out, const1_rtx)); - } - - /* Generate loop to check 4 bytes at a time. It is not a good idea to - align this loop. It gives only huge programs, but does not help to - speed up. */ - emit_label (align_4_label); - - mem = change_address (src, SImode, out); - emit_move_insn (scratch, mem); - emit_insn (gen_add2_insn (out, GEN_INT (4))); - - /* This formula yields a nonzero result iff one of the bytes is zero. - This saves three branches inside loop and many cycles. */ - - emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); - emit_insn (gen_one_cmplsi2 (scratch, scratch)); - emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); - emit_insn (gen_andsi3 (tmpreg, tmpreg, - gen_int_mode (0x80808080, SImode))); - emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, - align_4_label); - - if (TARGET_CMOVE) - { - rtx reg = gen_reg_rtx (SImode); - rtx reg2 = gen_reg_rtx (Pmode); - emit_move_insn (reg, tmpreg); - emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); - - /* If zero is not in the first two bytes, move two bytes forward. */ - emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); - tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); - tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); - emit_insn (gen_rtx_SET (tmpreg, - gen_rtx_IF_THEN_ELSE (SImode, tmp, - reg, - tmpreg))); - /* Emit lea manually to avoid clobbering of flags. */ - emit_insn (gen_rtx_SET (reg2, plus_constant (Pmode, out, 2))); - - tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); - tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); - emit_insn (gen_rtx_SET (out, - gen_rtx_IF_THEN_ELSE (Pmode, tmp, - reg2, - out))); - } - else - { - rtx_code_label *end_2_label = gen_label_rtx (); - /* Is zero in the first two bytes? */ - - emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); - tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); - tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, end_2_label), - pc_rtx); - tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); - JUMP_LABEL (tmp) = end_2_label; - - /* Not in the first two. Move two bytes forward. */ - emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); - emit_insn (gen_add2_insn (out, const2_rtx)); - - emit_label (end_2_label); - - } - - /* Avoid branch in fixing the byte. */ - tmpreg = gen_lowpart (QImode, tmpreg); - emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg)); - tmp = gen_rtx_REG (CCmode, FLAGS_REG); - cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx); - emit_insn (gen_sub3_carry (Pmode, out, out, GEN_INT (3), tmp, cmp)); - - emit_label (end_0_label); -} - -/* Expand strlen. */ - -bool -ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) -{ -if (TARGET_UNROLL_STRLEN - && TARGET_INLINE_ALL_STRINGOPS - && eoschar == const0_rtx - && optimize > 1) - { - /* The generic case of strlen expander is long. Avoid it's - expanding unless TARGET_INLINE_ALL_STRINGOPS. */ - rtx addr = force_reg (Pmode, XEXP (src, 0)); - /* Well it seems that some optimizer does not combine a call like - foo(strlen(bar), strlen(bar)); - when the move and the subtraction is done here. It does calculate - the length just once when these instructions are done inside of - output_strlen_unroll(). But I think since &bar[strlen(bar)] is - often used and I use one fewer register for the lifetime of - output_strlen_unroll() this is better. */ - - emit_move_insn (out, addr); - - ix86_expand_strlensi_unroll_1 (out, src, align); - - /* strlensi_unroll_1 returns the address of the zero at the end of - the string, like memchr(), so compute the length by subtracting - the start address. */ - emit_insn (gen_sub2_insn (out, addr)); - return true; - } - else - return false; -} - -/* For given symbol (function) construct code to compute address of it's PLT - entry in large x86-64 PIC model. */ - -static rtx -construct_plt_address (rtx symbol) -{ - rtx tmp, unspec; - - gcc_assert (GET_CODE (symbol) == SYMBOL_REF); - gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF); - gcc_assert (Pmode == DImode); - - tmp = gen_reg_rtx (Pmode); - unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF); - - emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec)); - emit_insn (gen_add2_insn (tmp, pic_offset_table_rtx)); - return tmp; -} - -/* Additional registers that are clobbered by SYSV calls. */ - -static int const x86_64_ms_sysv_extra_clobbered_registers - [NUM_X86_64_MS_CLOBBERED_REGS] = -{ - SI_REG, DI_REG, - XMM6_REG, XMM7_REG, - XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG, - XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG -}; - -rtx_insn * -ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, - rtx callarg2, - rtx pop, bool sibcall) -{ - rtx vec[3]; - rtx use = NULL, call; - unsigned int vec_len = 0; - tree fndecl; - - if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) - { - fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); - if (fndecl - && (lookup_attribute ("interrupt", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))) - error ("interrupt service routine cannot be called directly"); - } - else - fndecl = NULL_TREE; - - if (pop == const0_rtx) - pop = NULL; - gcc_assert (!TARGET_64BIT || !pop); - - if (TARGET_MACHO && !TARGET_64BIT) - { -#if TARGET_MACHO - if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) - fnaddr = machopic_indirect_call_target (fnaddr); -#endif - } - else - { - /* Static functions and indirect calls don't need the pic register. Also, - check if PLT was explicitly avoided via no-plt or "noplt" attribute, making - it an indirect call. */ - rtx addr = XEXP (fnaddr, 0); - if (flag_pic - && GET_CODE (addr) == SYMBOL_REF - && !SYMBOL_REF_LOCAL_P (addr)) - { - if (flag_plt - && (SYMBOL_REF_DECL (addr) == NULL_TREE - || !lookup_attribute ("noplt", - DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr))))) - { - if (!TARGET_64BIT - || (ix86_cmodel == CM_LARGE_PIC - && DEFAULT_ABI != MS_ABI)) - { - use_reg (&use, gen_rtx_REG (Pmode, - REAL_PIC_OFFSET_TABLE_REGNUM)); - if (ix86_use_pseudo_pic_reg ()) - emit_move_insn (gen_rtx_REG (Pmode, - REAL_PIC_OFFSET_TABLE_REGNUM), - pic_offset_table_rtx); - } - } - else if (!TARGET_PECOFF && !TARGET_MACHO) - { - if (TARGET_64BIT) - { - fnaddr = gen_rtx_UNSPEC (Pmode, - gen_rtvec (1, addr), - UNSPEC_GOTPCREL); - fnaddr = gen_rtx_CONST (Pmode, fnaddr); - } - else - { - fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), - UNSPEC_GOT); - fnaddr = gen_rtx_CONST (Pmode, fnaddr); - fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, - fnaddr); - } - fnaddr = gen_const_mem (Pmode, fnaddr); - /* Pmode may not be the same as word_mode for x32, which - doesn't support indirect branch via 32-bit memory slot. - Since x32 GOT slot is 64 bit with zero upper 32 bits, - indirect branch via x32 GOT slot is OK. */ - if (GET_MODE (fnaddr) != word_mode) - fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); - fnaddr = gen_rtx_MEM (QImode, fnaddr); - } - } - } - - /* Skip setting up RAX register for -mskip-rax-setup when there are no - parameters passed in vector registers. */ - if (TARGET_64BIT - && (INTVAL (callarg2) > 0 - || (INTVAL (callarg2) == 0 - && (TARGET_SSE || !flag_skip_rax_setup)))) - { - rtx al = gen_rtx_REG (QImode, AX_REG); - emit_move_insn (al, callarg2); - use_reg (&use, al); - } - - if (ix86_cmodel == CM_LARGE_PIC - && !TARGET_PECOFF - && MEM_P (fnaddr) - && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF - && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) - fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); - /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect - branch via x32 GOT slot is OK. */ - else if (!(TARGET_X32 - && MEM_P (fnaddr) - && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND - && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)) - && (sibcall - ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode) - : !call_insn_operand (XEXP (fnaddr, 0), word_mode))) - { - fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1); - fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr)); - } - - call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); - - if (retval) - call = gen_rtx_SET (retval, call); - vec[vec_len++] = call; - - if (pop) - { - pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); - pop = gen_rtx_SET (stack_pointer_rtx, pop); - vec[vec_len++] = pop; - } - - if (cfun->machine->no_caller_saved_registers - && (!fndecl - || (!TREE_THIS_VOLATILE (fndecl) - && !lookup_attribute ("no_caller_saved_registers", - TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))) - { - static const char ix86_call_used_regs[] = CALL_USED_REGISTERS; - bool is_64bit_ms_abi = (TARGET_64BIT - && ix86_function_abi (fndecl) == MS_ABI); - char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi); - - /* If there are no caller-saved registers, add all registers - that are clobbered by the call which returns. */ - for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (!fixed_regs[i] - && (ix86_call_used_regs[i] == 1 - || (ix86_call_used_regs[i] & c_mask)) - && !STACK_REGNO_P (i) - && !MMX_REGNO_P (i)) - clobber_reg (&use, - gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i)); - } - else if (TARGET_64BIT_MS_ABI - && (!callarg2 || INTVAL (callarg2) != -2)) - { - unsigned i; - - for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++) - { - int regno = x86_64_ms_sysv_extra_clobbered_registers[i]; - machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode; - - clobber_reg (&use, gen_rtx_REG (mode, regno)); - } - - /* Set here, but it may get cleared later. */ - if (TARGET_CALL_MS2SYSV_XLOGUES) - { - if (!TARGET_SSE) - ; - - /* Don't break hot-patched functions. */ - else if (ix86_function_ms_hook_prologue (current_function_decl)) - ; - - /* TODO: Cases not yet examined. */ - else if (flag_split_stack) - warn_once_call_ms2sysv_xlogues ("-fsplit-stack"); - - else - { - gcc_assert (!reload_completed); - cfun->machine->call_ms2sysv = true; - } - } - } - - if (vec_len > 1) - call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec)); - rtx_insn *call_insn = emit_call_insn (call); - if (use) - CALL_INSN_FUNCTION_USAGE (call_insn) = use; - - return call_insn; -} - -/* Split simple return with popping POPC bytes from stack to indirect - branch with stack adjustment . */ - -void -ix86_split_simple_return_pop_internal (rtx popc) -{ - struct machine_function *m = cfun->machine; - rtx ecx = gen_rtx_REG (SImode, CX_REG); - rtx_insn *insn; - - /* There is no "pascal" calling convention in any 64bit ABI. */ - gcc_assert (!TARGET_64BIT); - - insn = emit_insn (gen_pop (ecx)); - m->fs.cfa_offset -= UNITS_PER_WORD; - m->fs.sp_offset -= UNITS_PER_WORD; - - rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); - x = gen_rtx_SET (stack_pointer_rtx, x); - add_reg_note (insn, REG_CFA_ADJUST_CFA, x); - add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); - RTX_FRAME_RELATED_P (insn) = 1; - - x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc); - x = gen_rtx_SET (stack_pointer_rtx, x); - insn = emit_insn (x); - add_reg_note (insn, REG_CFA_ADJUST_CFA, x); - RTX_FRAME_RELATED_P (insn) = 1; - - /* Now return address is in ECX. */ - emit_jump_insn (gen_simple_return_indirect_internal (ecx)); -} - -/* Errors in the source file can cause expand_expr to return const0_rtx - where we expect a vector. To avoid crashing, use one of the vector - clear instructions. */ - -static rtx -safe_vector_operand (rtx x, machine_mode mode) -{ - if (x == const0_rtx) - x = CONST0_RTX (mode); - return x; -} - -/* Subroutine of ix86_expand_builtin to take care of binop insns. */ - -static rtx -ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - machine_mode tmode = insn_data[icode].operand[0].mode; - machine_mode mode0 = insn_data[icode].operand[1].mode; - machine_mode mode1 = insn_data[icode].operand[2].mode; - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - if (optimize || !target - || GET_MODE (target) != tmode - || !insn_data[icode].operand[0].predicate (target, tmode)) - target = gen_reg_rtx (tmode); - - if (GET_MODE (op1) == SImode && mode1 == TImode) - { - rtx x = gen_reg_rtx (V4SImode); - emit_insn (gen_sse2_loadd (x, op1)); - op1 = gen_lowpart (TImode, x); - } - - if (!insn_data[icode].operand[1].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (!insn_data[icode].operand[2].predicate (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - - emit_insn (pat); - - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */ - -static rtx -ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, - enum ix86_builtin_func_type m_type, - enum rtx_code sub_code) -{ - rtx pat; - int i; - int nargs; - bool comparison_p = false; - bool tf_p = false; - bool last_arg_constant = false; - int num_memory = 0; - struct { - rtx op; - machine_mode mode; - } args[4]; - - machine_mode tmode = insn_data[icode].operand[0].mode; - - switch (m_type) - { - case MULTI_ARG_4_DF2_DI_I: - case MULTI_ARG_4_DF2_DI_I1: - case MULTI_ARG_4_SF2_SI_I: - case MULTI_ARG_4_SF2_SI_I1: - nargs = 4; - last_arg_constant = true; - break; - - case MULTI_ARG_3_SF: - case MULTI_ARG_3_DF: - case MULTI_ARG_3_SF2: - case MULTI_ARG_3_DF2: - case MULTI_ARG_3_DI: - case MULTI_ARG_3_SI: - case MULTI_ARG_3_SI_DI: - case MULTI_ARG_3_HI: - case MULTI_ARG_3_HI_SI: - case MULTI_ARG_3_QI: - case MULTI_ARG_3_DI2: - case MULTI_ARG_3_SI2: - case MULTI_ARG_3_HI2: - case MULTI_ARG_3_QI2: - nargs = 3; - break; - - case MULTI_ARG_2_SF: - case MULTI_ARG_2_DF: - case MULTI_ARG_2_DI: - case MULTI_ARG_2_SI: - case MULTI_ARG_2_HI: - case MULTI_ARG_2_QI: - nargs = 2; - break; - - case MULTI_ARG_2_DI_IMM: - case MULTI_ARG_2_SI_IMM: - case MULTI_ARG_2_HI_IMM: - case MULTI_ARG_2_QI_IMM: - nargs = 2; - last_arg_constant = true; - break; - - case MULTI_ARG_1_SF: - case MULTI_ARG_1_DF: - case MULTI_ARG_1_SF2: - case MULTI_ARG_1_DF2: - case MULTI_ARG_1_DI: - case MULTI_ARG_1_SI: - case MULTI_ARG_1_HI: - case MULTI_ARG_1_QI: - case MULTI_ARG_1_SI_DI: - case MULTI_ARG_1_HI_DI: - case MULTI_ARG_1_HI_SI: - case MULTI_ARG_1_QI_DI: - case MULTI_ARG_1_QI_SI: - case MULTI_ARG_1_QI_HI: - nargs = 1; - break; - - case MULTI_ARG_2_DI_CMP: - case MULTI_ARG_2_SI_CMP: - case MULTI_ARG_2_HI_CMP: - case MULTI_ARG_2_QI_CMP: - nargs = 2; - comparison_p = true; - break; - - case MULTI_ARG_2_SF_TF: - case MULTI_ARG_2_DF_TF: - case MULTI_ARG_2_DI_TF: - case MULTI_ARG_2_SI_TF: - case MULTI_ARG_2_HI_TF: - case MULTI_ARG_2_QI_TF: - nargs = 2; - tf_p = true; - break; - - default: - gcc_unreachable (); - } - - if (optimize || !target - || GET_MODE (target) != tmode - || !insn_data[icode].operand[0].predicate (target, tmode)) - target = gen_reg_rtx (tmode); - else if (memory_operand (target, tmode)) - num_memory++; - - gcc_assert (nargs <= 4); - - for (i = 0; i < nargs; i++) - { - tree arg = CALL_EXPR_ARG (exp, i); - rtx op = expand_normal (arg); - int adjust = (comparison_p) ? 1 : 0; - machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; - - if (last_arg_constant && i == nargs - 1) - { - if (!insn_data[icode].operand[i + 1].predicate (op, mode)) - { - enum insn_code new_icode = icode; - switch (icode) - { - case CODE_FOR_xop_vpermil2v2df3: - case CODE_FOR_xop_vpermil2v4sf3: - case CODE_FOR_xop_vpermil2v4df3: - case CODE_FOR_xop_vpermil2v8sf3: - error ("the last argument must be a 2-bit immediate"); - return gen_reg_rtx (tmode); - case CODE_FOR_xop_rotlv2di3: - new_icode = CODE_FOR_rotlv2di3; - goto xop_rotl; - case CODE_FOR_xop_rotlv4si3: - new_icode = CODE_FOR_rotlv4si3; - goto xop_rotl; - case CODE_FOR_xop_rotlv8hi3: - new_icode = CODE_FOR_rotlv8hi3; - goto xop_rotl; - case CODE_FOR_xop_rotlv16qi3: - new_icode = CODE_FOR_rotlv16qi3; - xop_rotl: - if (CONST_INT_P (op)) - { - int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1; - op = GEN_INT (INTVAL (op) & mask); - gcc_checking_assert - (insn_data[icode].operand[i + 1].predicate (op, mode)); - } - else - { - gcc_checking_assert - (nargs == 2 - && insn_data[new_icode].operand[0].mode == tmode - && insn_data[new_icode].operand[1].mode == tmode - && insn_data[new_icode].operand[2].mode == mode - && insn_data[new_icode].operand[0].predicate - == insn_data[icode].operand[0].predicate - && insn_data[new_icode].operand[1].predicate - == insn_data[icode].operand[1].predicate); - icode = new_icode; - goto non_constant; - } - break; - default: - gcc_unreachable (); - } - } - } - else - { - non_constant: - if (VECTOR_MODE_P (mode)) - op = safe_vector_operand (op, mode); - - /* If we aren't optimizing, only allow one memory operand to be - generated. */ - if (memory_operand (op, mode)) - num_memory++; - - gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); - - if (optimize - || !insn_data[icode].operand[i+adjust+1].predicate (op, mode) - || num_memory > 1) - op = force_reg (mode, op); - } - - args[i].op = op; - args[i].mode = mode; - } - - switch (nargs) - { - case 1: - pat = GEN_FCN (icode) (target, args[0].op); - break; - - case 2: - if (tf_p) - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, - GEN_INT ((int)sub_code)); - else if (! comparison_p) - pat = GEN_FCN (icode) (target, args[0].op, args[1].op); - else - { - rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target), - args[0].op, - args[1].op); - - pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op); - } - break; - - case 3: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); - break; - - case 4: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op); - break; - - default: - gcc_unreachable (); - } - - if (! pat) - return 0; - - emit_insn (pat); - return target; -} - -/* Subroutine of ix86_expand_args_builtin to take care of scalar unop - insns with vec_merge. */ - -static rtx -ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, - rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - rtx op1, op0 = expand_normal (arg0); - machine_mode tmode = insn_data[icode].operand[0].mode; - machine_mode mode0 = insn_data[icode].operand[1].mode; - - if (optimize || !target - || GET_MODE (target) != tmode - || !insn_data[icode].operand[0].predicate (target, tmode)) - target = gen_reg_rtx (tmode); - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - - if ((optimize && !register_operand (op0, mode0)) - || !insn_data[icode].operand[1].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - - op1 = op0; - if (!insn_data[icode].operand[2].predicate (op1, mode0)) - op1 = copy_to_mode_reg (mode0, op1); - - pat = GEN_FCN (icode) (target, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ - -static rtx -ix86_expand_sse_compare (const struct builtin_description *d, - tree exp, rtx target, bool swap) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - rtx op2; - machine_mode tmode = insn_data[d->icode].operand[0].mode; - machine_mode mode0 = insn_data[d->icode].operand[1].mode; - machine_mode mode1 = insn_data[d->icode].operand[2].mode; - enum rtx_code comparison = d->comparison; - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - /* Swap operands if we have a comparison that isn't available in - hardware. */ - if (swap) - std::swap (op0, op1); - - if (optimize || !target - || GET_MODE (target) != tmode - || !insn_data[d->icode].operand[0].predicate (target, tmode)) - target = gen_reg_rtx (tmode); - - if ((optimize && !register_operand (op0, mode0)) - || !insn_data[d->icode].operand[1].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if ((optimize && !register_operand (op1, mode1)) - || !insn_data[d->icode].operand[2].predicate (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); - pat = GEN_FCN (d->icode) (target, op0, op1, op2); - if (! pat) - return 0; - emit_insn (pat); - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of comi insns. */ - -static rtx -ix86_expand_sse_comi (const struct builtin_description *d, tree exp, - rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - machine_mode mode0 = insn_data[d->icode].operand[0].mode; - machine_mode mode1 = insn_data[d->icode].operand[1].mode; - enum rtx_code comparison = d->comparison; - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - /* Swap operands if we have a comparison that isn't available in - hardware. */ - if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) - std::swap (op0, op1); - - target = gen_reg_rtx (SImode); - emit_move_insn (target, const0_rtx); - target = gen_rtx_SUBREG (QImode, target, 0); - - if ((optimize && !register_operand (op0, mode0)) - || !insn_data[d->icode].operand[0].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if ((optimize && !register_operand (op1, mode1)) - || !insn_data[d->icode].operand[1].predicate (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - pat = GEN_FCN (d->icode) (op0, op1); - if (! pat) - return 0; - emit_insn (pat); - emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), - gen_rtx_fmt_ee (comparison, QImode, - SET_DEST (pat), - const0_rtx))); - - return SUBREG_REG (target); -} - -/* Subroutines of ix86_expand_args_builtin to take care of round insns. */ - -static rtx -ix86_expand_sse_round (const struct builtin_description *d, tree exp, - rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - rtx op1, op0 = expand_normal (arg0); - machine_mode tmode = insn_data[d->icode].operand[0].mode; - machine_mode mode0 = insn_data[d->icode].operand[1].mode; - - if (optimize || target == 0 - || GET_MODE (target) != tmode - || !insn_data[d->icode].operand[0].predicate (target, tmode)) - target = gen_reg_rtx (tmode); - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - - if ((optimize && !register_operand (op0, mode0)) - || !insn_data[d->icode].operand[0].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - - op1 = GEN_INT (d->comparison); - - pat = GEN_FCN (d->icode) (target, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - return target; -} - -static rtx -ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d, - tree exp, rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - rtx op2; - machine_mode tmode = insn_data[d->icode].operand[0].mode; - machine_mode mode0 = insn_data[d->icode].operand[1].mode; - machine_mode mode1 = insn_data[d->icode].operand[2].mode; - - if (optimize || target == 0 - || GET_MODE (target) != tmode - || !insn_data[d->icode].operand[0].predicate (target, tmode)) - target = gen_reg_rtx (tmode); - - op0 = safe_vector_operand (op0, mode0); - op1 = safe_vector_operand (op1, mode1); - - if ((optimize && !register_operand (op0, mode0)) - || !insn_data[d->icode].operand[0].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if ((optimize && !register_operand (op1, mode1)) - || !insn_data[d->icode].operand[1].predicate (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - op2 = GEN_INT (d->comparison); - - pat = GEN_FCN (d->icode) (target, op0, op1, op2); - if (! pat) - return 0; - emit_insn (pat); - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of ptest insns. */ - -static rtx -ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, - rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - machine_mode mode0 = insn_data[d->icode].operand[0].mode; - machine_mode mode1 = insn_data[d->icode].operand[1].mode; - enum rtx_code comparison = d->comparison; - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - target = gen_reg_rtx (SImode); - emit_move_insn (target, const0_rtx); - target = gen_rtx_SUBREG (QImode, target, 0); - - if ((optimize && !register_operand (op0, mode0)) - || !insn_data[d->icode].operand[0].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if ((optimize && !register_operand (op1, mode1)) - || !insn_data[d->icode].operand[1].predicate (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - pat = GEN_FCN (d->icode) (op0, op1); - if (! pat) - return 0; - emit_insn (pat); - emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), - gen_rtx_fmt_ee (comparison, QImode, - SET_DEST (pat), - const0_rtx))); - - return SUBREG_REG (target); -} - -/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ - -static rtx -ix86_expand_sse_pcmpestr (const struct builtin_description *d, - tree exp, rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - tree arg2 = CALL_EXPR_ARG (exp, 2); - tree arg3 = CALL_EXPR_ARG (exp, 3); - tree arg4 = CALL_EXPR_ARG (exp, 4); - rtx scratch0, scratch1; - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - rtx op2 = expand_normal (arg2); - rtx op3 = expand_normal (arg3); - rtx op4 = expand_normal (arg4); - machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; - - tmode0 = insn_data[d->icode].operand[0].mode; - tmode1 = insn_data[d->icode].operand[1].mode; - modev2 = insn_data[d->icode].operand[2].mode; - modei3 = insn_data[d->icode].operand[3].mode; - modev4 = insn_data[d->icode].operand[4].mode; - modei5 = insn_data[d->icode].operand[5].mode; - modeimm = insn_data[d->icode].operand[6].mode; - - if (VECTOR_MODE_P (modev2)) - op0 = safe_vector_operand (op0, modev2); - if (VECTOR_MODE_P (modev4)) - op2 = safe_vector_operand (op2, modev4); - - if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) - op0 = copy_to_mode_reg (modev2, op0); - if (!insn_data[d->icode].operand[3].predicate (op1, modei3)) - op1 = copy_to_mode_reg (modei3, op1); - if ((optimize && !register_operand (op2, modev4)) - || !insn_data[d->icode].operand[4].predicate (op2, modev4)) - op2 = copy_to_mode_reg (modev4, op2); - if (!insn_data[d->icode].operand[5].predicate (op3, modei5)) - op3 = copy_to_mode_reg (modei5, op3); - - if (!insn_data[d->icode].operand[6].predicate (op4, modeimm)) - { - error ("the fifth argument must be an 8-bit immediate"); - return const0_rtx; - } - - if (d->code == IX86_BUILTIN_PCMPESTRI128) - { - if (optimize || !target - || GET_MODE (target) != tmode0 - || !insn_data[d->icode].operand[0].predicate (target, tmode0)) - target = gen_reg_rtx (tmode0); - - scratch1 = gen_reg_rtx (tmode1); - - pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); - } - else if (d->code == IX86_BUILTIN_PCMPESTRM128) - { - if (optimize || !target - || GET_MODE (target) != tmode1 - || !insn_data[d->icode].operand[1].predicate (target, tmode1)) - target = gen_reg_rtx (tmode1); - - scratch0 = gen_reg_rtx (tmode0); - - pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); - } - else - { - gcc_assert (d->flag); - - scratch0 = gen_reg_rtx (tmode0); - scratch1 = gen_reg_rtx (tmode1); - - pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); - } - - if (! pat) - return 0; - - emit_insn (pat); - - if (d->flag) - { - target = gen_reg_rtx (SImode); - emit_move_insn (target, const0_rtx); - target = gen_rtx_SUBREG (QImode, target, 0); - - emit_insn - (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), - gen_rtx_fmt_ee (EQ, QImode, - gen_rtx_REG ((machine_mode) d->flag, - FLAGS_REG), - const0_rtx))); - return SUBREG_REG (target); - } - else - return target; -} - - -/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ - -static rtx -ix86_expand_sse_pcmpistr (const struct builtin_description *d, - tree exp, rtx target) -{ - rtx pat; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - tree arg2 = CALL_EXPR_ARG (exp, 2); - rtx scratch0, scratch1; - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - rtx op2 = expand_normal (arg2); - machine_mode tmode0, tmode1, modev2, modev3, modeimm; - - tmode0 = insn_data[d->icode].operand[0].mode; - tmode1 = insn_data[d->icode].operand[1].mode; - modev2 = insn_data[d->icode].operand[2].mode; - modev3 = insn_data[d->icode].operand[3].mode; - modeimm = insn_data[d->icode].operand[4].mode; - - if (VECTOR_MODE_P (modev2)) - op0 = safe_vector_operand (op0, modev2); - if (VECTOR_MODE_P (modev3)) - op1 = safe_vector_operand (op1, modev3); - - if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) - op0 = copy_to_mode_reg (modev2, op0); - if ((optimize && !register_operand (op1, modev3)) - || !insn_data[d->icode].operand[3].predicate (op1, modev3)) - op1 = copy_to_mode_reg (modev3, op1); - - if (!insn_data[d->icode].operand[4].predicate (op2, modeimm)) - { - error ("the third argument must be an 8-bit immediate"); - return const0_rtx; - } - - if (d->code == IX86_BUILTIN_PCMPISTRI128) - { - if (optimize || !target - || GET_MODE (target) != tmode0 - || !insn_data[d->icode].operand[0].predicate (target, tmode0)) - target = gen_reg_rtx (tmode0); - - scratch1 = gen_reg_rtx (tmode1); - - pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); - } - else if (d->code == IX86_BUILTIN_PCMPISTRM128) - { - if (optimize || !target - || GET_MODE (target) != tmode1 - || !insn_data[d->icode].operand[1].predicate (target, tmode1)) - target = gen_reg_rtx (tmode1); - - scratch0 = gen_reg_rtx (tmode0); - - pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); - } - else - { - gcc_assert (d->flag); - - scratch0 = gen_reg_rtx (tmode0); - scratch1 = gen_reg_rtx (tmode1); - - pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); - } - - if (! pat) - return 0; - - emit_insn (pat); - - if (d->flag) - { - target = gen_reg_rtx (SImode); - emit_move_insn (target, const0_rtx); - target = gen_rtx_SUBREG (QImode, target, 0); - - emit_insn - (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), - gen_rtx_fmt_ee (EQ, QImode, - gen_rtx_REG ((machine_mode) d->flag, - FLAGS_REG), - const0_rtx))); - return SUBREG_REG (target); - } - else - return target; -} - -/* Fixup modeless constants to fit required mode. */ - -static rtx -fixup_modeless_constant (rtx x, machine_mode mode) -{ - if (GET_MODE (x) == VOIDmode) - x = convert_to_mode (mode, x, 1); - return x; -} - -/* Subroutine of ix86_expand_builtin to take care of insns with - variable number of operands. */ - -static rtx -ix86_expand_args_builtin (const struct builtin_description *d, - tree exp, rtx target) -{ - rtx pat, real_target; - unsigned int i, nargs; - unsigned int nargs_constant = 0; - unsigned int mask_pos = 0; - int num_memory = 0; - struct - { - rtx op; - machine_mode mode; - } args[6]; - bool second_arg_count = false; - enum insn_code icode = d->icode; - const struct insn_data_d *insn_p = &insn_data[icode]; - machine_mode tmode = insn_p->operand[0].mode; - machine_mode rmode = VOIDmode; - bool swap = false; - enum rtx_code comparison = d->comparison; - - switch ((enum ix86_builtin_func_type) d->flag) - { - case V2DF_FTYPE_V2DF_ROUND: - case V4DF_FTYPE_V4DF_ROUND: - case V8DF_FTYPE_V8DF_ROUND: - case V4SF_FTYPE_V4SF_ROUND: - case V8SF_FTYPE_V8SF_ROUND: - case V16SF_FTYPE_V16SF_ROUND: - case V4SI_FTYPE_V4SF_ROUND: - case V8SI_FTYPE_V8SF_ROUND: - case V16SI_FTYPE_V16SF_ROUND: - return ix86_expand_sse_round (d, exp, target); - case V4SI_FTYPE_V2DF_V2DF_ROUND: - case V8SI_FTYPE_V4DF_V4DF_ROUND: - case V16SI_FTYPE_V8DF_V8DF_ROUND: - return ix86_expand_sse_round_vec_pack_sfix (d, exp, target); - case INT_FTYPE_V8SF_V8SF_PTEST: - case INT_FTYPE_V4DI_V4DI_PTEST: - case INT_FTYPE_V4DF_V4DF_PTEST: - case INT_FTYPE_V4SF_V4SF_PTEST: - case INT_FTYPE_V2DI_V2DI_PTEST: - case INT_FTYPE_V2DF_V2DF_PTEST: - return ix86_expand_sse_ptest (d, exp, target); - case FLOAT128_FTYPE_FLOAT128: - case FLOAT_FTYPE_FLOAT: - case INT_FTYPE_INT: - case UINT_FTYPE_UINT: - case UINT16_FTYPE_UINT16: - case UINT64_FTYPE_INT: - case UINT64_FTYPE_UINT64: - case INT64_FTYPE_INT64: - case INT64_FTYPE_V4SF: - case INT64_FTYPE_V2DF: - case INT_FTYPE_V16QI: - case INT_FTYPE_V8QI: - case INT_FTYPE_V8SF: - case INT_FTYPE_V4DF: - case INT_FTYPE_V4SF: - case INT_FTYPE_V2DF: - case INT_FTYPE_V32QI: - case V16QI_FTYPE_V16QI: - case V8SI_FTYPE_V8SF: - case V8SI_FTYPE_V4SI: - case V8HI_FTYPE_V8HI: - case V8HI_FTYPE_V16QI: - case V8QI_FTYPE_V8QI: - case V8SF_FTYPE_V8SF: - case V8SF_FTYPE_V8SI: - case V8SF_FTYPE_V4SF: - case V8SF_FTYPE_V8HI: - case V4SI_FTYPE_V4SI: - case V4SI_FTYPE_V16QI: - case V4SI_FTYPE_V4SF: - case V4SI_FTYPE_V8SI: - case V4SI_FTYPE_V8HI: - case V4SI_FTYPE_V4DF: - case V4SI_FTYPE_V2DF: - case V4HI_FTYPE_V4HI: - case V4DF_FTYPE_V4DF: - case V4DF_FTYPE_V4SI: - case V4DF_FTYPE_V4SF: - case V4DF_FTYPE_V2DF: - case V4SF_FTYPE_V4SF: - case V4SF_FTYPE_V4SI: - case V4SF_FTYPE_V8SF: - case V4SF_FTYPE_V4DF: - case V4SF_FTYPE_V8HI: - case V4SF_FTYPE_V2DF: - case V2DI_FTYPE_V2DI: - case V2DI_FTYPE_V16QI: - case V2DI_FTYPE_V8HI: - case V2DI_FTYPE_V4SI: - case V2DF_FTYPE_V2DF: - case V2DF_FTYPE_V4SI: - case V2DF_FTYPE_V4DF: - case V2DF_FTYPE_V4SF: - case V2DF_FTYPE_V2SI: - case V2SI_FTYPE_V2SI: - case V2SI_FTYPE_V4SF: - case V2SI_FTYPE_V2SF: - case V2SI_FTYPE_V2DF: - case V2SF_FTYPE_V2SF: - case V2SF_FTYPE_V2SI: - case V32QI_FTYPE_V32QI: - case V32QI_FTYPE_V16QI: - case V16HI_FTYPE_V16HI: - case V16HI_FTYPE_V8HI: - case V8SI_FTYPE_V8SI: - case V16HI_FTYPE_V16QI: - case V8SI_FTYPE_V16QI: - case V4DI_FTYPE_V16QI: - case V8SI_FTYPE_V8HI: - case V4DI_FTYPE_V8HI: - case V4DI_FTYPE_V4SI: - case V4DI_FTYPE_V2DI: - case UQI_FTYPE_UQI: - case UHI_FTYPE_UHI: - case USI_FTYPE_USI: - case USI_FTYPE_UQI: - case USI_FTYPE_UHI: - case UDI_FTYPE_UDI: - case UHI_FTYPE_V16QI: - case USI_FTYPE_V32QI: - case UDI_FTYPE_V64QI: - case V16QI_FTYPE_UHI: - case V32QI_FTYPE_USI: - case V64QI_FTYPE_UDI: - case V8HI_FTYPE_UQI: - case V16HI_FTYPE_UHI: - case V32HI_FTYPE_USI: - case V4SI_FTYPE_UQI: - case V8SI_FTYPE_UQI: - case V4SI_FTYPE_UHI: - case V8SI_FTYPE_UHI: - case UQI_FTYPE_V8HI: - case UHI_FTYPE_V16HI: - case USI_FTYPE_V32HI: - case UQI_FTYPE_V4SI: - case UQI_FTYPE_V8SI: - case UHI_FTYPE_V16SI: - case UQI_FTYPE_V2DI: - case UQI_FTYPE_V4DI: - case UQI_FTYPE_V8DI: - case V16SI_FTYPE_UHI: - case V2DI_FTYPE_UQI: - case V4DI_FTYPE_UQI: - case V16SI_FTYPE_INT: - case V16SF_FTYPE_V8SF: - case V16SI_FTYPE_V8SI: - case V16SF_FTYPE_V4SF: - case V16SI_FTYPE_V4SI: - case V16SI_FTYPE_V16SF: - case V16SI_FTYPE_V16SI: - case V64QI_FTYPE_V64QI: - case V32HI_FTYPE_V32HI: - case V16SF_FTYPE_V16SF: - case V8DI_FTYPE_UQI: - case V8DI_FTYPE_V8DI: - case V8DF_FTYPE_V4DF: - case V8DF_FTYPE_V2DF: - case V8DF_FTYPE_V8DF: - case V4DI_FTYPE_V4DI: - case V16HI_FTYPE_V16SF: - case V8HI_FTYPE_V8SF: - case V8HI_FTYPE_V4SF: - nargs = 1; - break; - case V4SF_FTYPE_V4SF_VEC_MERGE: - case V2DF_FTYPE_V2DF_VEC_MERGE: - return ix86_expand_unop_vec_merge_builtin (icode, exp, target); - case FLOAT128_FTYPE_FLOAT128_FLOAT128: - case V16QI_FTYPE_V16QI_V16QI: - case V16QI_FTYPE_V8HI_V8HI: - case V16SF_FTYPE_V16SF_V16SF: - case V8QI_FTYPE_V8QI_V8QI: - case V8QI_FTYPE_V4HI_V4HI: - case V8HI_FTYPE_V8HI_V8HI: - case V8HI_FTYPE_V16QI_V16QI: - case V8HI_FTYPE_V4SI_V4SI: - case V8SF_FTYPE_V8SF_V8SF: - case V8SF_FTYPE_V8SF_V8SI: - case V8DF_FTYPE_V8DF_V8DF: - case V4SI_FTYPE_V4SI_V4SI: - case V4SI_FTYPE_V8HI_V8HI: - case V4SI_FTYPE_V2DF_V2DF: - case V4HI_FTYPE_V4HI_V4HI: - case V4HI_FTYPE_V8QI_V8QI: - case V4HI_FTYPE_V2SI_V2SI: - case V4DF_FTYPE_V4DF_V4DF: - case V4DF_FTYPE_V4DF_V4DI: - case V4SF_FTYPE_V4SF_V4SF: - case V4SF_FTYPE_V4SF_V4SI: - case V4SF_FTYPE_V4SF_V2SI: - case V4SF_FTYPE_V4SF_V2DF: - case V4SF_FTYPE_V4SF_UINT: - case V4SF_FTYPE_V4SF_DI: - case V4SF_FTYPE_V4SF_SI: - case V2DI_FTYPE_V2DI_V2DI: - case V2DI_FTYPE_V16QI_V16QI: - case V2DI_FTYPE_V4SI_V4SI: - case V2DI_FTYPE_V2DI_V16QI: - case V2SI_FTYPE_V2SI_V2SI: - case V2SI_FTYPE_V4HI_V4HI: - case V2SI_FTYPE_V2SF_V2SF: - case V2DF_FTYPE_V2DF_V2DF: - case V2DF_FTYPE_V2DF_V4SF: - case V2DF_FTYPE_V2DF_V2DI: - case V2DF_FTYPE_V2DF_DI: - case V2DF_FTYPE_V2DF_SI: - case V2DF_FTYPE_V2DF_UINT: - case V2SF_FTYPE_V2SF_V2SF: - case V1DI_FTYPE_V1DI_V1DI: - case V1DI_FTYPE_V8QI_V8QI: - case V1DI_FTYPE_V2SI_V2SI: - case V32QI_FTYPE_V16HI_V16HI: - case V16HI_FTYPE_V8SI_V8SI: - case V64QI_FTYPE_V64QI_V64QI: - case V32QI_FTYPE_V32QI_V32QI: - case V16HI_FTYPE_V32QI_V32QI: - case V16HI_FTYPE_V16HI_V16HI: - case V8SI_FTYPE_V4DF_V4DF: - case V8SI_FTYPE_V8SI_V8SI: - case V8SI_FTYPE_V16HI_V16HI: - case V4DI_FTYPE_V4DI_V4DI: - case V4DI_FTYPE_V8SI_V8SI: - case V8DI_FTYPE_V64QI_V64QI: - if (comparison == UNKNOWN) - return ix86_expand_binop_builtin (icode, exp, target); - nargs = 2; - break; - case V4SF_FTYPE_V4SF_V4SF_SWAP: - case V2DF_FTYPE_V2DF_V2DF_SWAP: - gcc_assert (comparison != UNKNOWN); - nargs = 2; - swap = true; - break; - case V16HI_FTYPE_V16HI_V8HI_COUNT: - case V16HI_FTYPE_V16HI_SI_COUNT: - case V8SI_FTYPE_V8SI_V4SI_COUNT: - case V8SI_FTYPE_V8SI_SI_COUNT: - case V4DI_FTYPE_V4DI_V2DI_COUNT: - case V4DI_FTYPE_V4DI_INT_COUNT: - case V8HI_FTYPE_V8HI_V8HI_COUNT: - case V8HI_FTYPE_V8HI_SI_COUNT: - case V4SI_FTYPE_V4SI_V4SI_COUNT: - case V4SI_FTYPE_V4SI_SI_COUNT: - case V4HI_FTYPE_V4HI_V4HI_COUNT: - case V4HI_FTYPE_V4HI_SI_COUNT: - case V2DI_FTYPE_V2DI_V2DI_COUNT: - case V2DI_FTYPE_V2DI_SI_COUNT: - case V2SI_FTYPE_V2SI_V2SI_COUNT: - case V2SI_FTYPE_V2SI_SI_COUNT: - case V1DI_FTYPE_V1DI_V1DI_COUNT: - case V1DI_FTYPE_V1DI_SI_COUNT: - nargs = 2; - second_arg_count = true; - break; - case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT: - case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT: - case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT: - case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT: - case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT: - case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT: - case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT: - case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT: - case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT: - case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT: - case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT: - case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT: - case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT: - case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT: - case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT: - case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT: - case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT: - case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT: - nargs = 4; - second_arg_count = true; - break; - case UINT64_FTYPE_UINT64_UINT64: - case UINT_FTYPE_UINT_UINT: - case UINT_FTYPE_UINT_USHORT: - case UINT_FTYPE_UINT_UCHAR: - case UINT16_FTYPE_UINT16_INT: - case UINT8_FTYPE_UINT8_INT: - case UQI_FTYPE_UQI_UQI: - case UHI_FTYPE_UHI_UHI: - case USI_FTYPE_USI_USI: - case UDI_FTYPE_UDI_UDI: - case V16SI_FTYPE_V8DF_V8DF: - case V32HI_FTYPE_V16SF_V16SF: - case V16HI_FTYPE_V8SF_V8SF: - case V8HI_FTYPE_V4SF_V4SF: - case V16HI_FTYPE_V16SF_UHI: - case V8HI_FTYPE_V8SF_UQI: - case V8HI_FTYPE_V4SF_UQI: - nargs = 2; - break; - case V2DI_FTYPE_V2DI_INT_CONVERT: - nargs = 2; - rmode = V1TImode; - nargs_constant = 1; - break; - case V4DI_FTYPE_V4DI_INT_CONVERT: - nargs = 2; - rmode = V2TImode; - nargs_constant = 1; - break; - case V8DI_FTYPE_V8DI_INT_CONVERT: - nargs = 2; - rmode = V4TImode; - nargs_constant = 1; - break; - case V8HI_FTYPE_V8HI_INT: - case V8HI_FTYPE_V8SF_INT: - case V16HI_FTYPE_V16SF_INT: - case V8HI_FTYPE_V4SF_INT: - case V8SF_FTYPE_V8SF_INT: - case V4SF_FTYPE_V16SF_INT: - case V16SF_FTYPE_V16SF_INT: - case V4SI_FTYPE_V4SI_INT: - case V4SI_FTYPE_V8SI_INT: - case V4HI_FTYPE_V4HI_INT: - case V4DF_FTYPE_V4DF_INT: - case V4DF_FTYPE_V8DF_INT: - case V4SF_FTYPE_V4SF_INT: - case V4SF_FTYPE_V8SF_INT: - case V2DI_FTYPE_V2DI_INT: - case V2DF_FTYPE_V2DF_INT: - case V2DF_FTYPE_V4DF_INT: - case V16HI_FTYPE_V16HI_INT: - case V8SI_FTYPE_V8SI_INT: - case V16SI_FTYPE_V16SI_INT: - case V4SI_FTYPE_V16SI_INT: - case V4DI_FTYPE_V4DI_INT: - case V2DI_FTYPE_V4DI_INT: - case V4DI_FTYPE_V8DI_INT: - case UQI_FTYPE_UQI_UQI_CONST: - case UHI_FTYPE_UHI_UQI: - case USI_FTYPE_USI_UQI: - case UDI_FTYPE_UDI_UQI: - nargs = 2; - nargs_constant = 1; - break; - case V16QI_FTYPE_V16QI_V16QI_V16QI: - case V8SF_FTYPE_V8SF_V8SF_V8SF: - case V4DF_FTYPE_V4DF_V4DF_V4DF: - case V4SF_FTYPE_V4SF_V4SF_V4SF: - case V2DF_FTYPE_V2DF_V2DF_V2DF: - case V32QI_FTYPE_V32QI_V32QI_V32QI: - case UHI_FTYPE_V16SI_V16SI_UHI: - case UQI_FTYPE_V8DI_V8DI_UQI: - case V16HI_FTYPE_V16SI_V16HI_UHI: - case V16QI_FTYPE_V16SI_V16QI_UHI: - case V16QI_FTYPE_V8DI_V16QI_UQI: - case V16SF_FTYPE_V16SF_V16SF_UHI: - case V16SF_FTYPE_V4SF_V16SF_UHI: - case V16SI_FTYPE_SI_V16SI_UHI: - case V16SI_FTYPE_V16HI_V16SI_UHI: - case V16SI_FTYPE_V16QI_V16SI_UHI: - case V8SF_FTYPE_V4SF_V8SF_UQI: - case V4DF_FTYPE_V2DF_V4DF_UQI: - case V8SI_FTYPE_V4SI_V8SI_UQI: - case V8SI_FTYPE_SI_V8SI_UQI: - case V4SI_FTYPE_V4SI_V4SI_UQI: - case V4SI_FTYPE_SI_V4SI_UQI: - case V4DI_FTYPE_V2DI_V4DI_UQI: - case V4DI_FTYPE_DI_V4DI_UQI: - case V2DI_FTYPE_V2DI_V2DI_UQI: - case V2DI_FTYPE_DI_V2DI_UQI: - case V64QI_FTYPE_V64QI_V64QI_UDI: - case V64QI_FTYPE_V16QI_V64QI_UDI: - case V64QI_FTYPE_QI_V64QI_UDI: - case V32QI_FTYPE_V32QI_V32QI_USI: - case V32QI_FTYPE_V16QI_V32QI_USI: - case V32QI_FTYPE_QI_V32QI_USI: - case V16QI_FTYPE_V16QI_V16QI_UHI: - case V16QI_FTYPE_QI_V16QI_UHI: - case V32HI_FTYPE_V8HI_V32HI_USI: - case V32HI_FTYPE_HI_V32HI_USI: - case V16HI_FTYPE_V8HI_V16HI_UHI: - case V16HI_FTYPE_HI_V16HI_UHI: - case V8HI_FTYPE_V8HI_V8HI_UQI: - case V8HI_FTYPE_HI_V8HI_UQI: - case V8SF_FTYPE_V8HI_V8SF_UQI: - case V4SF_FTYPE_V8HI_V4SF_UQI: - case V8SI_FTYPE_V8SF_V8SI_UQI: - case V4SI_FTYPE_V4SF_V4SI_UQI: - case V4DI_FTYPE_V4SF_V4DI_UQI: - case V2DI_FTYPE_V4SF_V2DI_UQI: - case V4SF_FTYPE_V4DI_V4SF_UQI: - case V4SF_FTYPE_V2DI_V4SF_UQI: - case V4DF_FTYPE_V4DI_V4DF_UQI: - case V2DF_FTYPE_V2DI_V2DF_UQI: - case V16QI_FTYPE_V8HI_V16QI_UQI: - case V16QI_FTYPE_V16HI_V16QI_UHI: - case V16QI_FTYPE_V4SI_V16QI_UQI: - case V16QI_FTYPE_V8SI_V16QI_UQI: - case V8HI_FTYPE_V4SI_V8HI_UQI: - case V8HI_FTYPE_V8SI_V8HI_UQI: - case V16QI_FTYPE_V2DI_V16QI_UQI: - case V16QI_FTYPE_V4DI_V16QI_UQI: - case V8HI_FTYPE_V2DI_V8HI_UQI: - case V8HI_FTYPE_V4DI_V8HI_UQI: - case V4SI_FTYPE_V2DI_V4SI_UQI: - case V4SI_FTYPE_V4DI_V4SI_UQI: - case V32QI_FTYPE_V32HI_V32QI_USI: - case UHI_FTYPE_V16QI_V16QI_UHI: - case USI_FTYPE_V32QI_V32QI_USI: - case UDI_FTYPE_V64QI_V64QI_UDI: - case UQI_FTYPE_V8HI_V8HI_UQI: - case UHI_FTYPE_V16HI_V16HI_UHI: - case USI_FTYPE_V32HI_V32HI_USI: - case UQI_FTYPE_V4SI_V4SI_UQI: - case UQI_FTYPE_V8SI_V8SI_UQI: - case UQI_FTYPE_V2DI_V2DI_UQI: - case UQI_FTYPE_V4DI_V4DI_UQI: - case V4SF_FTYPE_V2DF_V4SF_UQI: - case V4SF_FTYPE_V4DF_V4SF_UQI: - case V16SI_FTYPE_V16SI_V16SI_UHI: - case V16SI_FTYPE_V4SI_V16SI_UHI: - case V2DI_FTYPE_V4SI_V2DI_UQI: - case V2DI_FTYPE_V8HI_V2DI_UQI: - case V2DI_FTYPE_V16QI_V2DI_UQI: - case V4DI_FTYPE_V4DI_V4DI_UQI: - case V4DI_FTYPE_V4SI_V4DI_UQI: - case V4DI_FTYPE_V8HI_V4DI_UQI: - case V4DI_FTYPE_V16QI_V4DI_UQI: - case V4DI_FTYPE_V4DF_V4DI_UQI: - case V2DI_FTYPE_V2DF_V2DI_UQI: - case V4SI_FTYPE_V4DF_V4SI_UQI: - case V4SI_FTYPE_V2DF_V4SI_UQI: - case V4SI_FTYPE_V8HI_V4SI_UQI: - case V4SI_FTYPE_V16QI_V4SI_UQI: - case V4DI_FTYPE_V4DI_V4DI_V4DI: - case V8DF_FTYPE_V2DF_V8DF_UQI: - case V8DF_FTYPE_V4DF_V8DF_UQI: - case V8DF_FTYPE_V8DF_V8DF_UQI: - case V8SF_FTYPE_V8SF_V8SF_UQI: - case V8SF_FTYPE_V8SI_V8SF_UQI: - case V4DF_FTYPE_V4DF_V4DF_UQI: - case V4SF_FTYPE_V4SF_V4SF_UQI: - case V2DF_FTYPE_V2DF_V2DF_UQI: - case V2DF_FTYPE_V4SF_V2DF_UQI: - case V2DF_FTYPE_V4SI_V2DF_UQI: - case V4SF_FTYPE_V4SI_V4SF_UQI: - case V4DF_FTYPE_V4SF_V4DF_UQI: - case V4DF_FTYPE_V4SI_V4DF_UQI: - case V8SI_FTYPE_V8SI_V8SI_UQI: - case V8SI_FTYPE_V8HI_V8SI_UQI: - case V8SI_FTYPE_V16QI_V8SI_UQI: - case V8DF_FTYPE_V8SI_V8DF_UQI: - case V8DI_FTYPE_DI_V8DI_UQI: - case V16SF_FTYPE_V8SF_V16SF_UHI: - case V16SI_FTYPE_V8SI_V16SI_UHI: - case V16HI_FTYPE_V16HI_V16HI_UHI: - case V8HI_FTYPE_V16QI_V8HI_UQI: - case V16HI_FTYPE_V16QI_V16HI_UHI: - case V32HI_FTYPE_V32HI_V32HI_USI: - case V32HI_FTYPE_V32QI_V32HI_USI: - case V8DI_FTYPE_V16QI_V8DI_UQI: - case V8DI_FTYPE_V2DI_V8DI_UQI: - case V8DI_FTYPE_V4DI_V8DI_UQI: - case V8DI_FTYPE_V8DI_V8DI_UQI: - case V8DI_FTYPE_V8HI_V8DI_UQI: - case V8DI_FTYPE_V8SI_V8DI_UQI: - case V8HI_FTYPE_V8DI_V8HI_UQI: - case V8SI_FTYPE_V8DI_V8SI_UQI: - case V4SI_FTYPE_V4SI_V4SI_V4SI: - case V16SI_FTYPE_V16SI_V16SI_V16SI: - case V8DI_FTYPE_V8DI_V8DI_V8DI: - case V32HI_FTYPE_V32HI_V32HI_V32HI: - case V2DI_FTYPE_V2DI_V2DI_V2DI: - case V16HI_FTYPE_V16HI_V16HI_V16HI: - case V8SI_FTYPE_V8SI_V8SI_V8SI: - case V8HI_FTYPE_V8HI_V8HI_V8HI: - case V32HI_FTYPE_V16SF_V16SF_USI: - case V16HI_FTYPE_V8SF_V8SF_UHI: - case V8HI_FTYPE_V4SF_V4SF_UQI: - case V16HI_FTYPE_V16SF_V16HI_UHI: - case V8HI_FTYPE_V8SF_V8HI_UQI: - case V8HI_FTYPE_V4SF_V8HI_UQI: - case V16SF_FTYPE_V16SF_V32HI_V32HI: - case V8SF_FTYPE_V8SF_V16HI_V16HI: - case V4SF_FTYPE_V4SF_V8HI_V8HI: - nargs = 3; - break; - case V32QI_FTYPE_V32QI_V32QI_INT: - case V16HI_FTYPE_V16HI_V16HI_INT: - case V16QI_FTYPE_V16QI_V16QI_INT: - case V4DI_FTYPE_V4DI_V4DI_INT: - case V8HI_FTYPE_V8HI_V8HI_INT: - case V8SI_FTYPE_V8SI_V8SI_INT: - case V8SI_FTYPE_V8SI_V4SI_INT: - case V8SF_FTYPE_V8SF_V8SF_INT: - case V8SF_FTYPE_V8SF_V4SF_INT: - case V4SI_FTYPE_V4SI_V4SI_INT: - case V4DF_FTYPE_V4DF_V4DF_INT: - case V16SF_FTYPE_V16SF_V16SF_INT: - case V16SF_FTYPE_V16SF_V4SF_INT: - case V16SI_FTYPE_V16SI_V4SI_INT: - case V4DF_FTYPE_V4DF_V2DF_INT: - case V4SF_FTYPE_V4SF_V4SF_INT: - case V2DI_FTYPE_V2DI_V2DI_INT: - case V4DI_FTYPE_V4DI_V2DI_INT: - case V2DF_FTYPE_V2DF_V2DF_INT: - case UQI_FTYPE_V8DI_V8UDI_INT: - case UQI_FTYPE_V8DF_V8DF_INT: - case UQI_FTYPE_V2DF_V2DF_INT: - case UQI_FTYPE_V4SF_V4SF_INT: - case UHI_FTYPE_V16SI_V16SI_INT: - case UHI_FTYPE_V16SF_V16SF_INT: - case V64QI_FTYPE_V64QI_V64QI_INT: - case V32HI_FTYPE_V32HI_V32HI_INT: - case V16SI_FTYPE_V16SI_V16SI_INT: - case V8DI_FTYPE_V8DI_V8DI_INT: - nargs = 3; - nargs_constant = 1; - break; - case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT: - nargs = 3; - rmode = V4DImode; - nargs_constant = 1; - break; - case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT: - nargs = 3; - rmode = V2DImode; - nargs_constant = 1; - break; - case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT: - nargs = 3; - rmode = DImode; - nargs_constant = 1; - break; - case V2DI_FTYPE_V2DI_UINT_UINT: - nargs = 3; - nargs_constant = 2; - break; - case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT: - nargs = 3; - rmode = V8DImode; - nargs_constant = 1; - break; - case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT: - nargs = 5; - rmode = V8DImode; - mask_pos = 2; - nargs_constant = 1; - break; - case QI_FTYPE_V8DF_INT_UQI: - case QI_FTYPE_V4DF_INT_UQI: - case QI_FTYPE_V2DF_INT_UQI: - case HI_FTYPE_V16SF_INT_UHI: - case QI_FTYPE_V8SF_INT_UQI: - case QI_FTYPE_V4SF_INT_UQI: - case V4SI_FTYPE_V4SI_V4SI_UHI: - case V8SI_FTYPE_V8SI_V8SI_UHI: - nargs = 3; - mask_pos = 1; - nargs_constant = 1; - break; - case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT: - nargs = 5; - rmode = V4DImode; - mask_pos = 2; - nargs_constant = 1; - break; - case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT: - nargs = 5; - rmode = V2DImode; - mask_pos = 2; - nargs_constant = 1; - break; - case V32QI_FTYPE_V32QI_V32QI_V32QI_USI: - case V32HI_FTYPE_V32HI_V32HI_V32HI_USI: - case V32HI_FTYPE_V64QI_V64QI_V32HI_USI: - case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI: - case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI: - case V32HI_FTYPE_V32HI_V8HI_V32HI_USI: - case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI: - case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI: - case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI: - case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI: - case V32QI_FTYPE_V16HI_V16HI_V32QI_USI: - case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI: - case V32HI_FTYPE_V16SI_V16SI_V32HI_USI: - case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI: - case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI: - case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI: - case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI: - case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI: - case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI: - case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI: - case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI: - case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI: - case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI: - case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI: - case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI: - case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI: - case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI: - case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI: - case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI: - case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI: - case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI: - case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI: - case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI: - case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI: - case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI: - case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI: - case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI: - case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI: - case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI: - case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI: - case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI: - case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI: - case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI: - case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI: - case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI: - case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI: - case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI: - case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI: - case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI: - case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI: - case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI: - case V32HI_FTYPE_V16SF_V16SF_V32HI_USI: - case V16HI_FTYPE_V8SF_V8SF_V16HI_UHI: - case V8HI_FTYPE_V4SF_V4SF_V8HI_UQI: - nargs = 4; - break; - case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: - case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: - case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: - case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: - case V16SF_FTYPE_V16SF_V16SF_V16SI_INT: - nargs = 4; - nargs_constant = 1; - break; - case UQI_FTYPE_V4DI_V4DI_INT_UQI: - case UQI_FTYPE_V8SI_V8SI_INT_UQI: - case QI_FTYPE_V4DF_V4DF_INT_UQI: - case QI_FTYPE_V8SF_V8SF_INT_UQI: - case UQI_FTYPE_V2DI_V2DI_INT_UQI: - case UQI_FTYPE_V4SI_V4SI_INT_UQI: - case UQI_FTYPE_V2DF_V2DF_INT_UQI: - case UQI_FTYPE_V4SF_V4SF_INT_UQI: - case UDI_FTYPE_V64QI_V64QI_INT_UDI: - case USI_FTYPE_V32QI_V32QI_INT_USI: - case UHI_FTYPE_V16QI_V16QI_INT_UHI: - case USI_FTYPE_V32HI_V32HI_INT_USI: - case UHI_FTYPE_V16HI_V16HI_INT_UHI: - case UQI_FTYPE_V8HI_V8HI_INT_UQI: - nargs = 4; - mask_pos = 1; - nargs_constant = 1; - break; - case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: - nargs = 4; - nargs_constant = 2; - break; - case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED: - case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG: - case V16SF_FTYPE_V16SF_V32HI_V32HI_UHI: - case V8SF_FTYPE_V8SF_V16HI_V16HI_UQI: - case V4SF_FTYPE_V4SF_V8HI_V8HI_UQI: - nargs = 4; - break; - case UQI_FTYPE_V8DI_V8DI_INT_UQI: - case UHI_FTYPE_V16SI_V16SI_INT_UHI: - mask_pos = 1; - nargs = 4; - nargs_constant = 1; - break; - case V8SF_FTYPE_V8SF_INT_V8SF_UQI: - case V4SF_FTYPE_V4SF_INT_V4SF_UQI: - case V2DF_FTYPE_V4DF_INT_V2DF_UQI: - case V2DI_FTYPE_V4DI_INT_V2DI_UQI: - case V8SF_FTYPE_V16SF_INT_V8SF_UQI: - case V8SI_FTYPE_V16SI_INT_V8SI_UQI: - case V2DF_FTYPE_V8DF_INT_V2DF_UQI: - case V2DI_FTYPE_V8DI_INT_V2DI_UQI: - case V4SF_FTYPE_V8SF_INT_V4SF_UQI: - case V4SI_FTYPE_V8SI_INT_V4SI_UQI: - case V8HI_FTYPE_V8SF_INT_V8HI_UQI: - case V8HI_FTYPE_V4SF_INT_V8HI_UQI: - case V32HI_FTYPE_V32HI_INT_V32HI_USI: - case V16HI_FTYPE_V16HI_INT_V16HI_UHI: - case V8HI_FTYPE_V8HI_INT_V8HI_UQI: - case V4DI_FTYPE_V4DI_INT_V4DI_UQI: - case V2DI_FTYPE_V2DI_INT_V2DI_UQI: - case V8SI_FTYPE_V8SI_INT_V8SI_UQI: - case V4SI_FTYPE_V4SI_INT_V4SI_UQI: - case V4DF_FTYPE_V4DF_INT_V4DF_UQI: - case V2DF_FTYPE_V2DF_INT_V2DF_UQI: - case V8DF_FTYPE_V8DF_INT_V8DF_UQI: - case V16SF_FTYPE_V16SF_INT_V16SF_UHI: - case V16HI_FTYPE_V16SF_INT_V16HI_UHI: - case V16SI_FTYPE_V16SI_INT_V16SI_UHI: - case V4SI_FTYPE_V16SI_INT_V4SI_UQI: - case V4DI_FTYPE_V8DI_INT_V4DI_UQI: - case V4DF_FTYPE_V8DF_INT_V4DF_UQI: - case V4SF_FTYPE_V16SF_INT_V4SF_UQI: - case V8DI_FTYPE_V8DI_INT_V8DI_UQI: - nargs = 4; - mask_pos = 2; - nargs_constant = 1; - break; - case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI: - case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI: - case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI: - case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI: - case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI: - case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI: - case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI: - case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI: - case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI: - case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI: - case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI: - case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI: - case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI: - case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI: - case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI: - case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI: - case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI: - case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI: - case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI: - case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI: - case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI: - case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI: - case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI: - case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI: - case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI: - case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI: - case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI: - nargs = 5; - mask_pos = 2; - nargs_constant = 1; - break; - case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI: - case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI: - case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI: - case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI: - case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI: - case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI: - case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI: - case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI: - case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI: - case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI: - nargs = 5; - mask_pos = 1; - nargs_constant = 1; - break; - case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI: - case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI: - case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI: - case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT: - case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT: - case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT: - case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT: - case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT: - case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT: - case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT: - case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT: - case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT: - nargs = 5; - mask_pos = 1; - nargs_constant = 2; - break; - - default: - gcc_unreachable (); - } - - gcc_assert (nargs <= ARRAY_SIZE (args)); - - if (comparison != UNKNOWN) - { - gcc_assert (nargs == 2); - return ix86_expand_sse_compare (d, exp, target, swap); - } - - if (rmode == VOIDmode || rmode == tmode) - { - if (optimize - || target == 0 - || GET_MODE (target) != tmode - || !insn_p->operand[0].predicate (target, tmode)) - target = gen_reg_rtx (tmode); - else if (memory_operand (target, tmode)) - num_memory++; - real_target = target; - } - else - { - real_target = gen_reg_rtx (tmode); - target = lowpart_subreg (rmode, real_target, tmode); - } - - for (i = 0; i < nargs; i++) - { - tree arg = CALL_EXPR_ARG (exp, i); - rtx op = expand_normal (arg); - machine_mode mode = insn_p->operand[i + 1].mode; - bool match = insn_p->operand[i + 1].predicate (op, mode); - - if (second_arg_count && i == 1) - { - /* SIMD shift insns take either an 8-bit immediate or - register as count. But builtin functions take int as - count. If count doesn't match, we put it in register. - The instructions are using 64-bit count, if op is just - 32-bit, zero-extend it, as negative shift counts - are undefined behavior and zero-extension is more - efficient. */ - if (!match) - { - if (SCALAR_INT_MODE_P (GET_MODE (op))) - op = convert_modes (mode, GET_MODE (op), op, 1); - else - op = lowpart_subreg (mode, op, GET_MODE (op)); - if (!insn_p->operand[i + 1].predicate (op, mode)) - op = copy_to_reg (op); - } - } - else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || - (!mask_pos && (nargs - i) <= nargs_constant)) - { - if (!match) - switch (icode) - { - case CODE_FOR_avx_vinsertf128v4di: - case CODE_FOR_avx_vextractf128v4di: - error ("the last argument must be an 1-bit immediate"); - return const0_rtx; - - case CODE_FOR_avx512f_cmpv8di3_mask: - case CODE_FOR_avx512f_cmpv16si3_mask: - case CODE_FOR_avx512f_ucmpv8di3_mask: - case CODE_FOR_avx512f_ucmpv16si3_mask: - case CODE_FOR_avx512vl_cmpv4di3_mask: - case CODE_FOR_avx512vl_cmpv8si3_mask: - case CODE_FOR_avx512vl_ucmpv4di3_mask: - case CODE_FOR_avx512vl_ucmpv8si3_mask: - case CODE_FOR_avx512vl_cmpv2di3_mask: - case CODE_FOR_avx512vl_cmpv4si3_mask: - case CODE_FOR_avx512vl_ucmpv2di3_mask: - case CODE_FOR_avx512vl_ucmpv4si3_mask: - error ("the last argument must be a 3-bit immediate"); - return const0_rtx; - - case CODE_FOR_sse4_1_roundsd: - case CODE_FOR_sse4_1_roundss: - - case CODE_FOR_sse4_1_roundpd: - case CODE_FOR_sse4_1_roundps: - case CODE_FOR_avx_roundpd256: - case CODE_FOR_avx_roundps256: - - case CODE_FOR_sse4_1_roundpd_vec_pack_sfix: - case CODE_FOR_sse4_1_roundps_sfix: - case CODE_FOR_avx_roundpd_vec_pack_sfix256: - case CODE_FOR_avx_roundps_sfix256: - - case CODE_FOR_sse4_1_blendps: - case CODE_FOR_avx_blendpd256: - case CODE_FOR_avx_vpermilv4df: - case CODE_FOR_avx_vpermilv4df_mask: - case CODE_FOR_avx512f_getmantv8df_mask: - case CODE_FOR_avx512f_getmantv16sf_mask: - case CODE_FOR_avx512vl_getmantv8sf_mask: - case CODE_FOR_avx512vl_getmantv4df_mask: - case CODE_FOR_avx512vl_getmantv4sf_mask: - case CODE_FOR_avx512vl_getmantv2df_mask: - case CODE_FOR_avx512dq_rangepv8df_mask_round: - case CODE_FOR_avx512dq_rangepv16sf_mask_round: - case CODE_FOR_avx512dq_rangepv4df_mask: - case CODE_FOR_avx512dq_rangepv8sf_mask: - case CODE_FOR_avx512dq_rangepv2df_mask: - case CODE_FOR_avx512dq_rangepv4sf_mask: - case CODE_FOR_avx_shufpd256_mask: - error ("the last argument must be a 4-bit immediate"); - return const0_rtx; - - case CODE_FOR_sha1rnds4: - case CODE_FOR_sse4_1_blendpd: - case CODE_FOR_avx_vpermilv2df: - case CODE_FOR_avx_vpermilv2df_mask: - case CODE_FOR_xop_vpermil2v2df3: - case CODE_FOR_xop_vpermil2v4sf3: - case CODE_FOR_xop_vpermil2v4df3: - case CODE_FOR_xop_vpermil2v8sf3: - case CODE_FOR_avx512f_vinsertf32x4_mask: - case CODE_FOR_avx512f_vinserti32x4_mask: - case CODE_FOR_avx512f_vextractf32x4_mask: - case CODE_FOR_avx512f_vextracti32x4_mask: - case CODE_FOR_sse2_shufpd: - case CODE_FOR_sse2_shufpd_mask: - case CODE_FOR_avx512dq_shuf_f64x2_mask: - case CODE_FOR_avx512dq_shuf_i64x2_mask: - case CODE_FOR_avx512vl_shuf_i32x4_mask: - case CODE_FOR_avx512vl_shuf_f32x4_mask: - error ("the last argument must be a 2-bit immediate"); - return const0_rtx; - - case CODE_FOR_avx_vextractf128v4df: - case CODE_FOR_avx_vextractf128v8sf: - case CODE_FOR_avx_vextractf128v8si: - case CODE_FOR_avx_vinsertf128v4df: - case CODE_FOR_avx_vinsertf128v8sf: - case CODE_FOR_avx_vinsertf128v8si: - case CODE_FOR_avx512f_vinsertf64x4_mask: - case CODE_FOR_avx512f_vinserti64x4_mask: - case CODE_FOR_avx512f_vextractf64x4_mask: - case CODE_FOR_avx512f_vextracti64x4_mask: - case CODE_FOR_avx512dq_vinsertf32x8_mask: - case CODE_FOR_avx512dq_vinserti32x8_mask: - case CODE_FOR_avx512vl_vinsertv4df: - case CODE_FOR_avx512vl_vinsertv4di: - case CODE_FOR_avx512vl_vinsertv8sf: - case CODE_FOR_avx512vl_vinsertv8si: - error ("the last argument must be a 1-bit immediate"); - return const0_rtx; - - case CODE_FOR_avx_vmcmpv2df3: - case CODE_FOR_avx_vmcmpv4sf3: - case CODE_FOR_avx_cmpv2df3: - case CODE_FOR_avx_cmpv4sf3: - case CODE_FOR_avx_cmpv4df3: - case CODE_FOR_avx_cmpv8sf3: - case CODE_FOR_avx512f_cmpv8df3_mask: - case CODE_FOR_avx512f_cmpv16sf3_mask: - case CODE_FOR_avx512f_vmcmpv2df3_mask: - case CODE_FOR_avx512f_vmcmpv4sf3_mask: - error ("the last argument must be a 5-bit immediate"); - return const0_rtx; - - default: - switch (nargs_constant) - { - case 2: - if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || - (!mask_pos && (nargs - i) == nargs_constant)) - { - error ("the next to last argument must be an 8-bit immediate"); - break; - } - /* FALLTHRU */ - case 1: - error ("the last argument must be an 8-bit immediate"); - break; - default: - gcc_unreachable (); - } - return const0_rtx; - } - } - else - { - if (VECTOR_MODE_P (mode)) - op = safe_vector_operand (op, mode); - - /* If we aren't optimizing, only allow one memory operand to - be generated. */ - if (memory_operand (op, mode)) - num_memory++; - - op = fixup_modeless_constant (op, mode); - - if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) - { - if (optimize || !match || num_memory > 1) - op = copy_to_mode_reg (mode, op); - } - else - { - op = copy_to_reg (op); - op = lowpart_subreg (mode, op, GET_MODE (op)); - } - } - - args[i].op = op; - args[i].mode = mode; - } - - switch (nargs) - { - case 1: - pat = GEN_FCN (icode) (real_target, args[0].op); - break; - case 2: - pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op); - break; - case 3: - pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, - args[2].op); - break; - case 4: - pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, - args[2].op, args[3].op); - break; - case 5: - pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, - args[2].op, args[3].op, args[4].op); - break; - case 6: - pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, - args[2].op, args[3].op, args[4].op, - args[5].op); - break; - default: - gcc_unreachable (); - } - - if (! pat) - return 0; - - emit_insn (pat); - return target; -} - -/* Transform pattern of following layout: - (set A - (unspec [B C] UNSPEC_EMBEDDED_ROUNDING)) - ) - into: - (set (A B)) */ - -static rtx -ix86_erase_embedded_rounding (rtx pat) -{ - if (GET_CODE (pat) == INSN) - pat = PATTERN (pat); - - gcc_assert (GET_CODE (pat) == SET); - rtx src = SET_SRC (pat); - gcc_assert (XVECLEN (src, 0) == 2); - rtx p0 = XVECEXP (src, 0, 0); - gcc_assert (GET_CODE (src) == UNSPEC - && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING); - rtx res = gen_rtx_SET (SET_DEST (pat), p0); - return res; -} - -/* Subroutine of ix86_expand_round_builtin to take care of comi insns - with rounding. */ -static rtx -ix86_expand_sse_comi_round (const struct builtin_description *d, - tree exp, rtx target) -{ - rtx pat, set_dst; - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree arg1 = CALL_EXPR_ARG (exp, 1); - tree arg2 = CALL_EXPR_ARG (exp, 2); - tree arg3 = CALL_EXPR_ARG (exp, 3); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - rtx op2 = expand_normal (arg2); - rtx op3 = expand_normal (arg3); - enum insn_code icode = d->icode; - const struct insn_data_d *insn_p = &insn_data[icode]; - machine_mode mode0 = insn_p->operand[0].mode; - machine_mode mode1 = insn_p->operand[1].mode; - - /* See avxintrin.h for values. */ - static const enum rtx_code comparisons[32] = - { - EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED, - UNEQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED, - EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED, - UNEQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED - }; - static const bool ordereds[32] = - { - true, true, true, false, false, false, false, true, - false, false, false, true, true, true, true, false, - true, true, true, false, false, false, false, true, - false, false, false, true, true, true, true, false - }; - static const bool non_signalings[32] = - { - true, false, false, true, true, false, false, true, - true, false, false, true, true, false, false, true, - false, true, true, false, false, true, true, false, - false, true, true, false, false, true, true, false - }; - - if (!CONST_INT_P (op2)) - { - error ("the third argument must be comparison constant"); - return const0_rtx; - } - if (INTVAL (op2) < 0 || INTVAL (op2) >= 32) - { - error ("incorrect comparison mode"); - return const0_rtx; - } - - if (!insn_p->operand[2].predicate (op3, SImode)) - { - error ("incorrect rounding operand"); - return const0_rtx; - } - - if (VECTOR_MODE_P (mode0)) - op0 = safe_vector_operand (op0, mode0); - if (VECTOR_MODE_P (mode1)) - op1 = safe_vector_operand (op1, mode1); - - enum rtx_code comparison = comparisons[INTVAL (op2)]; - bool ordered = ordereds[INTVAL (op2)]; - bool non_signaling = non_signalings[INTVAL (op2)]; - rtx const_val = const0_rtx; - - bool check_unordered = false; - machine_mode mode = CCFPmode; - switch (comparison) - { - case ORDERED: - if (!ordered) - { - /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US. */ - if (!non_signaling) - ordered = true; - mode = CCSmode; - } - else - { - /* NB: Use CCPmode/NE for _CMP_ORD_Q/_CMP_ORD_S. */ - if (non_signaling) - ordered = false; - mode = CCPmode; - } - comparison = NE; - break; - case UNORDERED: - if (ordered) - { - /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS. */ - if (non_signaling) - ordered = false; - mode = CCSmode; - } - else - { - /* NB: Use CCPmode/NE for _CMP_UNORD_Q/_CMP_UNORD_S. */ - if (!non_signaling) - ordered = true; - mode = CCPmode; - } - comparison = EQ; - break; - - case LE: /* -> GE */ - case LT: /* -> GT */ - case UNGE: /* -> UNLE */ - case UNGT: /* -> UNLT */ - std::swap (op0, op1); - comparison = swap_condition (comparison); - /* FALLTHRU */ - case GT: - case GE: - case UNEQ: - case UNLT: - case UNLE: - case LTGT: - /* These are supported by CCFPmode. NB: Use ordered/signaling - COMI or unordered/non-signaling UCOMI. Both set ZF, PF, CF - with NAN operands. */ - if (ordered == non_signaling) - ordered = !ordered; - break; - case EQ: - /* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for - _CMP_EQ_OQ/_CMP_EQ_OS. */ - check_unordered = true; - mode = CCZmode; - break; - case NE: - /* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for - _CMP_NEQ_UQ/_CMP_NEQ_US. */ - gcc_assert (!ordered); - check_unordered = true; - mode = CCZmode; - const_val = const1_rtx; - break; - default: - gcc_unreachable (); - } - - target = gen_reg_rtx (SImode); - emit_move_insn (target, const_val); - target = gen_rtx_SUBREG (QImode, target, 0); - - if ((optimize && !register_operand (op0, mode0)) - || !insn_p->operand[0].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if ((optimize && !register_operand (op1, mode1)) - || !insn_p->operand[1].predicate (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - /* - 1. COMI: ordered and signaling. - 2. UCOMI: unordered and non-signaling. - */ - if (non_signaling) - icode = (icode == CODE_FOR_sse_comi_round - ? CODE_FOR_sse_ucomi_round - : CODE_FOR_sse2_ucomi_round); - - pat = GEN_FCN (icode) (op0, op1, op3); - if (! pat) - return 0; - - /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */ - if (INTVAL (op3) == NO_ROUND) - { - pat = ix86_erase_embedded_rounding (pat); - if (! pat) - return 0; - - set_dst = SET_DEST (pat); - } - else - { - gcc_assert (GET_CODE (pat) == SET); - set_dst = SET_DEST (pat); - } - - emit_insn (pat); - - rtx_code_label *label = NULL; - - /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient - with NAN operands. */ - if (check_unordered) - { - gcc_assert (comparison == EQ || comparison == NE); - - rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG); - label = gen_label_rtx (); - rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx); - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, - gen_rtx_LABEL_REF (VOIDmode, label), - pc_rtx); - emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); - } - - /* NB: Set CCFPmode and check a different CCmode which is in subset - of CCFPmode. */ - if (GET_MODE (set_dst) != mode) - { - gcc_assert (mode == CCAmode || mode == CCCmode - || mode == CCOmode || mode == CCPmode - || mode == CCSmode || mode == CCZmode); - set_dst = gen_rtx_REG (mode, FLAGS_REG); - } - - emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), - gen_rtx_fmt_ee (comparison, QImode, - set_dst, - const0_rtx))); - - if (label) - emit_label (label); - - return SUBREG_REG (target); -} - -static rtx -ix86_expand_round_builtin (const struct builtin_description *d, - tree exp, rtx target) -{ - rtx pat; - unsigned int i, nargs; - struct - { - rtx op; - machine_mode mode; - } args[6]; - enum insn_code icode = d->icode; - const struct insn_data_d *insn_p = &insn_data[icode]; - machine_mode tmode = insn_p->operand[0].mode; - unsigned int nargs_constant = 0; - unsigned int redundant_embed_rnd = 0; - - switch ((enum ix86_builtin_func_type) d->flag) - { - case UINT64_FTYPE_V2DF_INT: - case UINT64_FTYPE_V4SF_INT: - case UINT_FTYPE_V2DF_INT: - case UINT_FTYPE_V4SF_INT: - case INT64_FTYPE_V2DF_INT: - case INT64_FTYPE_V4SF_INT: - case INT_FTYPE_V2DF_INT: - case INT_FTYPE_V4SF_INT: - nargs = 2; - break; - case V4SF_FTYPE_V4SF_UINT_INT: - case V4SF_FTYPE_V4SF_UINT64_INT: - case V2DF_FTYPE_V2DF_UINT64_INT: - case V4SF_FTYPE_V4SF_INT_INT: - case V4SF_FTYPE_V4SF_INT64_INT: - case V2DF_FTYPE_V2DF_INT64_INT: - case V4SF_FTYPE_V4SF_V4SF_INT: - case V2DF_FTYPE_V2DF_V2DF_INT: - case V4SF_FTYPE_V4SF_V2DF_INT: - case V2DF_FTYPE_V2DF_V4SF_INT: - nargs = 3; - break; - case V8SF_FTYPE_V8DF_V8SF_QI_INT: - case V8DF_FTYPE_V8DF_V8DF_QI_INT: - case V8SI_FTYPE_V8DF_V8SI_QI_INT: - case V8DI_FTYPE_V8DF_V8DI_QI_INT: - case V8SF_FTYPE_V8DI_V8SF_QI_INT: - case V8DF_FTYPE_V8DI_V8DF_QI_INT: - case V16SF_FTYPE_V16SF_V16SF_HI_INT: - case V8DI_FTYPE_V8SF_V8DI_QI_INT: - case V16SF_FTYPE_V16SI_V16SF_HI_INT: - case V16SI_FTYPE_V16SF_V16SI_HI_INT: - case V8DF_FTYPE_V8SF_V8DF_QI_INT: - case V16SF_FTYPE_V16HI_V16SF_HI_INT: - case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: - case V4SF_FTYPE_V4SF_V4SF_V4SF_INT: - nargs = 4; - break; - case V4SF_FTYPE_V4SF_V4SF_INT_INT: - case V2DF_FTYPE_V2DF_V2DF_INT_INT: - nargs_constant = 2; - nargs = 4; - break; - case INT_FTYPE_V4SF_V4SF_INT_INT: - case INT_FTYPE_V2DF_V2DF_INT_INT: - return ix86_expand_sse_comi_round (d, exp, target); - case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT: - case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT: - case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT: - case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT: - case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT: - case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT: - case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT: - case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT: - nargs = 5; - break; - case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT: - case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT: - nargs_constant = 4; - nargs = 5; - break; - case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT: - case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT: - case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT: - case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT: - nargs_constant = 3; - nargs = 5; - break; - case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT: - case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT: - case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT: - case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT: - case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT: - case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT: - nargs = 6; - nargs_constant = 4; - break; - case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT: - case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT: - case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT: - case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT: - nargs = 6; - nargs_constant = 3; - break; - default: - gcc_unreachable (); - } - gcc_assert (nargs <= ARRAY_SIZE (args)); - - if (optimize - || target == 0 - || GET_MODE (target) != tmode - || !insn_p->operand[0].predicate (target, tmode)) - target = gen_reg_rtx (tmode); - - for (i = 0; i < nargs; i++) - { - tree arg = CALL_EXPR_ARG (exp, i); - rtx op = expand_normal (arg); - machine_mode mode = insn_p->operand[i + 1].mode; - bool match = insn_p->operand[i + 1].predicate (op, mode); - - if (i == nargs - nargs_constant) - { - if (!match) - { - switch (icode) - { - case CODE_FOR_avx512f_getmantv8df_mask_round: - case CODE_FOR_avx512f_getmantv16sf_mask_round: - case CODE_FOR_avx512f_vgetmantv2df_round: - case CODE_FOR_avx512f_vgetmantv2df_mask_round: - case CODE_FOR_avx512f_vgetmantv4sf_round: - case CODE_FOR_avx512f_vgetmantv4sf_mask_round: - error ("the immediate argument must be a 4-bit immediate"); - return const0_rtx; - case CODE_FOR_avx512f_cmpv8df3_mask_round: - case CODE_FOR_avx512f_cmpv16sf3_mask_round: - case CODE_FOR_avx512f_vmcmpv2df3_mask_round: - case CODE_FOR_avx512f_vmcmpv4sf3_mask_round: - error ("the immediate argument must be a 5-bit immediate"); - return const0_rtx; - default: - error ("the immediate argument must be an 8-bit immediate"); - return const0_rtx; - } - } - } - else if (i == nargs-1) - { - if (!insn_p->operand[nargs].predicate (op, SImode)) - { - error ("incorrect rounding operand"); - return const0_rtx; - } - - /* If there is no rounding use normal version of the pattern. */ - if (INTVAL (op) == NO_ROUND) - redundant_embed_rnd = 1; - } - else - { - if (VECTOR_MODE_P (mode)) - op = safe_vector_operand (op, mode); - - op = fixup_modeless_constant (op, mode); - - if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) - { - if (optimize || !match) - op = copy_to_mode_reg (mode, op); - } - else - { - op = copy_to_reg (op); - op = lowpart_subreg (mode, op, GET_MODE (op)); - } - } - - args[i].op = op; - args[i].mode = mode; - } - - switch (nargs) - { - case 1: - pat = GEN_FCN (icode) (target, args[0].op); - break; - case 2: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op); - break; - case 3: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, - args[2].op); - break; - case 4: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, - args[2].op, args[3].op); - break; - case 5: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, - args[2].op, args[3].op, args[4].op); - break; - case 6: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, - args[2].op, args[3].op, args[4].op, - args[5].op); - break; - default: - gcc_unreachable (); - } - - if (!pat) - return 0; - - if (redundant_embed_rnd) - pat = ix86_erase_embedded_rounding (pat); - - emit_insn (pat); - return target; -} - -/* Subroutine of ix86_expand_builtin to take care of special insns - with variable number of operands. */ - -static rtx -ix86_expand_special_args_builtin (const struct builtin_description *d, - tree exp, rtx target) -{ - tree arg; - rtx pat, op; - unsigned int i, nargs, arg_adjust, memory; - bool aligned_mem = false; - struct - { - rtx op; - machine_mode mode; - } args[3]; - enum insn_code icode = d->icode; - bool last_arg_constant = false; - const struct insn_data_d *insn_p = &insn_data[icode]; - machine_mode tmode = insn_p->operand[0].mode; - enum { load, store } klass; - - switch ((enum ix86_builtin_func_type) d->flag) - { - case VOID_FTYPE_VOID: - emit_insn (GEN_FCN (icode) (target)); - return 0; - case VOID_FTYPE_UINT64: - case VOID_FTYPE_UNSIGNED: - nargs = 0; - klass = store; - memory = 0; - break; - - case INT_FTYPE_VOID: - case USHORT_FTYPE_VOID: - case UINT64_FTYPE_VOID: - case UINT_FTYPE_VOID: - case UNSIGNED_FTYPE_VOID: - nargs = 0; - klass = load; - memory = 0; - break; - case UINT64_FTYPE_PUNSIGNED: - case V2DI_FTYPE_PV2DI: - case V4DI_FTYPE_PV4DI: - case V32QI_FTYPE_PCCHAR: - case V16QI_FTYPE_PCCHAR: - case V8SF_FTYPE_PCV4SF: - case V8SF_FTYPE_PCFLOAT: - case V4SF_FTYPE_PCFLOAT: - case V4DF_FTYPE_PCV2DF: - case V4DF_FTYPE_PCDOUBLE: - case V2DF_FTYPE_PCDOUBLE: - case VOID_FTYPE_PVOID: - case V8DI_FTYPE_PV8DI: - nargs = 1; - klass = load; - memory = 0; - switch (icode) - { - case CODE_FOR_sse4_1_movntdqa: - case CODE_FOR_avx2_movntdqa: - case CODE_FOR_avx512f_movntdqa: - aligned_mem = true; - break; - default: - break; - } - break; - case VOID_FTYPE_PV2SF_V4SF: - case VOID_FTYPE_PV8DI_V8DI: - case VOID_FTYPE_PV4DI_V4DI: - case VOID_FTYPE_PV2DI_V2DI: - case VOID_FTYPE_PCHAR_V32QI: - case VOID_FTYPE_PCHAR_V16QI: - case VOID_FTYPE_PFLOAT_V16SF: - case VOID_FTYPE_PFLOAT_V8SF: - case VOID_FTYPE_PFLOAT_V4SF: - case VOID_FTYPE_PDOUBLE_V8DF: - case VOID_FTYPE_PDOUBLE_V4DF: - case VOID_FTYPE_PDOUBLE_V2DF: - case VOID_FTYPE_PLONGLONG_LONGLONG: - case VOID_FTYPE_PULONGLONG_ULONGLONG: - case VOID_FTYPE_PUNSIGNED_UNSIGNED: - case VOID_FTYPE_PINT_INT: - nargs = 1; - klass = store; - /* Reserve memory operand for target. */ - memory = ARRAY_SIZE (args); - switch (icode) - { - /* These builtins and instructions require the memory - to be properly aligned. */ - case CODE_FOR_avx_movntv4di: - case CODE_FOR_sse2_movntv2di: - case CODE_FOR_avx_movntv8sf: - case CODE_FOR_sse_movntv4sf: - case CODE_FOR_sse4a_vmmovntv4sf: - case CODE_FOR_avx_movntv4df: - case CODE_FOR_sse2_movntv2df: - case CODE_FOR_sse4a_vmmovntv2df: - case CODE_FOR_sse2_movntidi: - case CODE_FOR_sse_movntq: - case CODE_FOR_sse2_movntisi: - case CODE_FOR_avx512f_movntv16sf: - case CODE_FOR_avx512f_movntv8df: - case CODE_FOR_avx512f_movntv8di: - aligned_mem = true; - break; - default: - break; - } - break; - case VOID_FTYPE_PVOID_PCVOID: - nargs = 1; - klass = store; - memory = 0; - - break; - case V4SF_FTYPE_V4SF_PCV2SF: - case V2DF_FTYPE_V2DF_PCDOUBLE: - nargs = 2; - klass = load; - memory = 1; - break; - case V8SF_FTYPE_PCV8SF_V8SI: - case V4DF_FTYPE_PCV4DF_V4DI: - case V4SF_FTYPE_PCV4SF_V4SI: - case V2DF_FTYPE_PCV2DF_V2DI: - case V8SI_FTYPE_PCV8SI_V8SI: - case V4DI_FTYPE_PCV4DI_V4DI: - case V4SI_FTYPE_PCV4SI_V4SI: - case V2DI_FTYPE_PCV2DI_V2DI: - case VOID_FTYPE_INT_INT64: - nargs = 2; - klass = load; - memory = 0; - break; - case VOID_FTYPE_PV8DF_V8DF_UQI: - case VOID_FTYPE_PV4DF_V4DF_UQI: - case VOID_FTYPE_PV2DF_V2DF_UQI: - case VOID_FTYPE_PV16SF_V16SF_UHI: - case VOID_FTYPE_PV8SF_V8SF_UQI: - case VOID_FTYPE_PV4SF_V4SF_UQI: - case VOID_FTYPE_PV8DI_V8DI_UQI: - case VOID_FTYPE_PV4DI_V4DI_UQI: - case VOID_FTYPE_PV2DI_V2DI_UQI: - case VOID_FTYPE_PV16SI_V16SI_UHI: - case VOID_FTYPE_PV8SI_V8SI_UQI: - case VOID_FTYPE_PV4SI_V4SI_UQI: - case VOID_FTYPE_PV64QI_V64QI_UDI: - case VOID_FTYPE_PV32HI_V32HI_USI: - case VOID_FTYPE_PV32QI_V32QI_USI: - case VOID_FTYPE_PV16QI_V16QI_UHI: - case VOID_FTYPE_PV16HI_V16HI_UHI: - case VOID_FTYPE_PV8HI_V8HI_UQI: - switch (icode) - { - /* These builtins and instructions require the memory - to be properly aligned. */ - case CODE_FOR_avx512f_storev16sf_mask: - case CODE_FOR_avx512f_storev16si_mask: - case CODE_FOR_avx512f_storev8df_mask: - case CODE_FOR_avx512f_storev8di_mask: - case CODE_FOR_avx512vl_storev8sf_mask: - case CODE_FOR_avx512vl_storev8si_mask: - case CODE_FOR_avx512vl_storev4df_mask: - case CODE_FOR_avx512vl_storev4di_mask: - case CODE_FOR_avx512vl_storev4sf_mask: - case CODE_FOR_avx512vl_storev4si_mask: - case CODE_FOR_avx512vl_storev2df_mask: - case CODE_FOR_avx512vl_storev2di_mask: - aligned_mem = true; - break; - default: - break; - } - /* FALLTHRU */ - case VOID_FTYPE_PV8SF_V8SI_V8SF: - case VOID_FTYPE_PV4DF_V4DI_V4DF: - case VOID_FTYPE_PV4SF_V4SI_V4SF: - case VOID_FTYPE_PV2DF_V2DI_V2DF: - case VOID_FTYPE_PV8SI_V8SI_V8SI: - case VOID_FTYPE_PV4DI_V4DI_V4DI: - case VOID_FTYPE_PV4SI_V4SI_V4SI: - case VOID_FTYPE_PV2DI_V2DI_V2DI: - case VOID_FTYPE_PV8SI_V8DI_UQI: - case VOID_FTYPE_PV8HI_V8DI_UQI: - case VOID_FTYPE_PV16HI_V16SI_UHI: - case VOID_FTYPE_PUDI_V8DI_UQI: - case VOID_FTYPE_PV16QI_V16SI_UHI: - case VOID_FTYPE_PV4SI_V4DI_UQI: - case VOID_FTYPE_PUDI_V2DI_UQI: - case VOID_FTYPE_PUDI_V4DI_UQI: - case VOID_FTYPE_PUSI_V2DI_UQI: - case VOID_FTYPE_PV8HI_V8SI_UQI: - case VOID_FTYPE_PUDI_V4SI_UQI: - case VOID_FTYPE_PUSI_V4DI_UQI: - case VOID_FTYPE_PUHI_V2DI_UQI: - case VOID_FTYPE_PUDI_V8SI_UQI: - case VOID_FTYPE_PUSI_V4SI_UQI: - case VOID_FTYPE_PCHAR_V64QI_UDI: - case VOID_FTYPE_PCHAR_V32QI_USI: - case VOID_FTYPE_PCHAR_V16QI_UHI: - case VOID_FTYPE_PSHORT_V32HI_USI: - case VOID_FTYPE_PSHORT_V16HI_UHI: - case VOID_FTYPE_PSHORT_V8HI_UQI: - case VOID_FTYPE_PINT_V16SI_UHI: - case VOID_FTYPE_PINT_V8SI_UQI: - case VOID_FTYPE_PINT_V4SI_UQI: - case VOID_FTYPE_PINT64_V8DI_UQI: - case VOID_FTYPE_PINT64_V4DI_UQI: - case VOID_FTYPE_PINT64_V2DI_UQI: - case VOID_FTYPE_PDOUBLE_V8DF_UQI: - case VOID_FTYPE_PDOUBLE_V4DF_UQI: - case VOID_FTYPE_PDOUBLE_V2DF_UQI: - case VOID_FTYPE_PFLOAT_V16SF_UHI: - case VOID_FTYPE_PFLOAT_V8SF_UQI: - case VOID_FTYPE_PFLOAT_V4SF_UQI: - case VOID_FTYPE_PV32QI_V32HI_USI: - case VOID_FTYPE_PV16QI_V16HI_UHI: - case VOID_FTYPE_PUDI_V8HI_UQI: - nargs = 2; - klass = store; - /* Reserve memory operand for target. */ - memory = ARRAY_SIZE (args); - break; - case V4SF_FTYPE_PCV4SF_V4SF_UQI: - case V8SF_FTYPE_PCV8SF_V8SF_UQI: - case V16SF_FTYPE_PCV16SF_V16SF_UHI: - case V4SI_FTYPE_PCV4SI_V4SI_UQI: - case V8SI_FTYPE_PCV8SI_V8SI_UQI: - case V16SI_FTYPE_PCV16SI_V16SI_UHI: - case V2DF_FTYPE_PCV2DF_V2DF_UQI: - case V4DF_FTYPE_PCV4DF_V4DF_UQI: - case V8DF_FTYPE_PCV8DF_V8DF_UQI: - case V2DI_FTYPE_PCV2DI_V2DI_UQI: - case V4DI_FTYPE_PCV4DI_V4DI_UQI: - case V8DI_FTYPE_PCV8DI_V8DI_UQI: - case V64QI_FTYPE_PCV64QI_V64QI_UDI: - case V32HI_FTYPE_PCV32HI_V32HI_USI: - case V32QI_FTYPE_PCV32QI_V32QI_USI: - case V16QI_FTYPE_PCV16QI_V16QI_UHI: - case V16HI_FTYPE_PCV16HI_V16HI_UHI: - case V8HI_FTYPE_PCV8HI_V8HI_UQI: - switch (icode) - { - /* These builtins and instructions require the memory - to be properly aligned. */ - case CODE_FOR_avx512f_loadv16sf_mask: - case CODE_FOR_avx512f_loadv16si_mask: - case CODE_FOR_avx512f_loadv8df_mask: - case CODE_FOR_avx512f_loadv8di_mask: - case CODE_FOR_avx512vl_loadv8sf_mask: - case CODE_FOR_avx512vl_loadv8si_mask: - case CODE_FOR_avx512vl_loadv4df_mask: - case CODE_FOR_avx512vl_loadv4di_mask: - case CODE_FOR_avx512vl_loadv4sf_mask: - case CODE_FOR_avx512vl_loadv4si_mask: - case CODE_FOR_avx512vl_loadv2df_mask: - case CODE_FOR_avx512vl_loadv2di_mask: - case CODE_FOR_avx512bw_loadv64qi_mask: - case CODE_FOR_avx512vl_loadv32qi_mask: - case CODE_FOR_avx512vl_loadv16qi_mask: - case CODE_FOR_avx512bw_loadv32hi_mask: - case CODE_FOR_avx512vl_loadv16hi_mask: - case CODE_FOR_avx512vl_loadv8hi_mask: - aligned_mem = true; - break; - default: - break; - } - /* FALLTHRU */ - case V64QI_FTYPE_PCCHAR_V64QI_UDI: - case V32QI_FTYPE_PCCHAR_V32QI_USI: - case V16QI_FTYPE_PCCHAR_V16QI_UHI: - case V32HI_FTYPE_PCSHORT_V32HI_USI: - case V16HI_FTYPE_PCSHORT_V16HI_UHI: - case V8HI_FTYPE_PCSHORT_V8HI_UQI: - case V16SI_FTYPE_PCINT_V16SI_UHI: - case V8SI_FTYPE_PCINT_V8SI_UQI: - case V4SI_FTYPE_PCINT_V4SI_UQI: - case V8DI_FTYPE_PCINT64_V8DI_UQI: - case V4DI_FTYPE_PCINT64_V4DI_UQI: - case V2DI_FTYPE_PCINT64_V2DI_UQI: - case V8DF_FTYPE_PCDOUBLE_V8DF_UQI: - case V4DF_FTYPE_PCDOUBLE_V4DF_UQI: - case V2DF_FTYPE_PCDOUBLE_V2DF_UQI: - case V16SF_FTYPE_PCFLOAT_V16SF_UHI: - case V8SF_FTYPE_PCFLOAT_V8SF_UQI: - case V4SF_FTYPE_PCFLOAT_V4SF_UQI: - nargs = 3; - klass = load; - memory = 0; - break; - case VOID_FTYPE_UINT_UINT_UINT: - case VOID_FTYPE_UINT64_UINT_UINT: - case UCHAR_FTYPE_UINT_UINT_UINT: - case UCHAR_FTYPE_UINT64_UINT_UINT: - nargs = 3; - klass = load; - memory = ARRAY_SIZE (args); - last_arg_constant = true; - break; - default: - gcc_unreachable (); - } - - gcc_assert (nargs <= ARRAY_SIZE (args)); - - if (klass == store) - { - arg = CALL_EXPR_ARG (exp, 0); - op = expand_normal (arg); - gcc_assert (target == 0); - if (memory) - { - op = ix86_zero_extend_to_Pmode (op); - target = gen_rtx_MEM (tmode, op); - /* target at this point has just BITS_PER_UNIT MEM_ALIGN - on it. Try to improve it using get_pointer_alignment, - and if the special builtin is one that requires strict - mode alignment, also from it's GET_MODE_ALIGNMENT. - Failure to do so could lead to ix86_legitimate_combined_insn - rejecting all changes to such insns. */ - unsigned int align = get_pointer_alignment (arg); - if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode)) - align = GET_MODE_ALIGNMENT (tmode); - if (MEM_ALIGN (target) < align) - set_mem_align (target, align); - } - else - target = force_reg (tmode, op); - arg_adjust = 1; - } - else - { - arg_adjust = 0; - if (optimize - || target == 0 - || !register_operand (target, tmode) - || GET_MODE (target) != tmode) - target = gen_reg_rtx (tmode); - } - - for (i = 0; i < nargs; i++) - { - machine_mode mode = insn_p->operand[i + 1].mode; - bool match; - - arg = CALL_EXPR_ARG (exp, i + arg_adjust); - op = expand_normal (arg); - match = insn_p->operand[i + 1].predicate (op, mode); - - if (last_arg_constant && (i + 1) == nargs) - { - if (!match) - { - if (icode == CODE_FOR_lwp_lwpvalsi3 - || icode == CODE_FOR_lwp_lwpinssi3 - || icode == CODE_FOR_lwp_lwpvaldi3 - || icode == CODE_FOR_lwp_lwpinsdi3) - error ("the last argument must be a 32-bit immediate"); - else - error ("the last argument must be an 8-bit immediate"); - return const0_rtx; - } - } - else - { - if (i == memory) - { - /* This must be the memory operand. */ - op = ix86_zero_extend_to_Pmode (op); - op = gen_rtx_MEM (mode, op); - /* op at this point has just BITS_PER_UNIT MEM_ALIGN - on it. Try to improve it using get_pointer_alignment, - and if the special builtin is one that requires strict - mode alignment, also from it's GET_MODE_ALIGNMENT. - Failure to do so could lead to ix86_legitimate_combined_insn - rejecting all changes to such insns. */ - unsigned int align = get_pointer_alignment (arg); - if (aligned_mem && align < GET_MODE_ALIGNMENT (mode)) - align = GET_MODE_ALIGNMENT (mode); - if (MEM_ALIGN (op) < align) - set_mem_align (op, align); - } - else - { - /* This must be register. */ - if (VECTOR_MODE_P (mode)) - op = safe_vector_operand (op, mode); - - op = fixup_modeless_constant (op, mode); - - if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) - op = copy_to_mode_reg (mode, op); - else - { - op = copy_to_reg (op); - op = lowpart_subreg (mode, op, GET_MODE (op)); - } - } - } - - args[i].op = op; - args[i].mode = mode; - } - - switch (nargs) - { - case 0: - pat = GEN_FCN (icode) (target); - break; - case 1: - pat = GEN_FCN (icode) (target, args[0].op); - break; - case 2: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op); - break; - case 3: - pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); - break; - default: - gcc_unreachable (); - } - - if (! pat) - return 0; - emit_insn (pat); - return klass == store ? 0 : target; -} - -/* Return the integer constant in ARG. Constrain it to be in the range - of the subparts of VEC_TYPE; issue an error if not. */ - -static int -get_element_number (tree vec_type, tree arg) -{ - unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; - - if (!tree_fits_uhwi_p (arg) - || (elt = tree_to_uhwi (arg), elt > max)) - { - error ("selector must be an integer constant in the range " - "[0, %wi]", max); - return 0; - } - - return elt; -} - -/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around - ix86_expand_vector_init. We DO have language-level syntax for this, in - the form of (type){ init-list }. Except that since we can't place emms - instructions from inside the compiler, we can't allow the use of MMX - registers unless the user explicitly asks for it. So we do *not* define - vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead - we have builtins invoked by mmintrin.h that gives us license to emit - these sorts of instructions. */ - -static rtx -ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) -{ - machine_mode tmode = TYPE_MODE (type); - machine_mode inner_mode = GET_MODE_INNER (tmode); - int i, n_elt = GET_MODE_NUNITS (tmode); - rtvec v = rtvec_alloc (n_elt); - - gcc_assert (VECTOR_MODE_P (tmode)); - gcc_assert (call_expr_nargs (exp) == n_elt); - - for (i = 0; i < n_elt; ++i) - { - rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); - RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); - } - - if (!target || !register_operand (target, tmode)) - target = gen_reg_rtx (tmode); - - ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); - return target; -} - -/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around - ix86_expand_vector_extract. They would be redundant (for non-MMX) if we - had a language-level syntax for referencing vector elements. */ - -static rtx -ix86_expand_vec_ext_builtin (tree exp, rtx target) -{ - machine_mode tmode, mode0; - tree arg0, arg1; - int elt; - rtx op0; - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - - op0 = expand_normal (arg0); - elt = get_element_number (TREE_TYPE (arg0), arg1); - - tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); - mode0 = TYPE_MODE (TREE_TYPE (arg0)); - gcc_assert (VECTOR_MODE_P (mode0)); - - op0 = force_reg (mode0, op0); - - if (optimize || !target || !register_operand (target, tmode)) - target = gen_reg_rtx (tmode); - - ix86_expand_vector_extract (true, target, op0, elt); - - return target; -} - -/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around - ix86_expand_vector_set. They would be redundant (for non-MMX) if we had - a language-level syntax for referencing vector elements. */ - -static rtx -ix86_expand_vec_set_builtin (tree exp) -{ - machine_mode tmode, mode1; - tree arg0, arg1, arg2; - int elt; - rtx op0, op1, target; - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - - tmode = TYPE_MODE (TREE_TYPE (arg0)); - mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); - gcc_assert (VECTOR_MODE_P (tmode)); - - op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); - op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); - elt = get_element_number (TREE_TYPE (arg0), arg2); - - if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) - op1 = convert_modes (mode1, GET_MODE (op1), op1, true); - - op0 = force_reg (tmode, op0); - op1 = force_reg (mode1, op1); - - /* OP0 is the source of these builtin functions and shouldn't be - modified. Create a copy, use it and return it as target. */ - target = gen_reg_rtx (tmode); - emit_move_insn (target, op0); - ix86_expand_vector_set (true, target, op1, elt); - - return target; -} - -/* Expand an expression EXP that calls a built-in function, - with result going to TARGET if that's convenient - (and in mode MODE if that's convenient). - SUBTARGET may be used as the target for computing one of EXP's operands. - IGNORE is nonzero if the value is to be ignored. */ - -rtx -ix86_expand_builtin (tree exp, rtx target, rtx subtarget, - machine_mode mode, int ignore) -{ - size_t i; - enum insn_code icode, icode2; - tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - tree arg0, arg1, arg2, arg3, arg4; - rtx op0, op1, op2, op3, op4, pat, pat2, insn; - machine_mode mode0, mode1, mode2, mode3, mode4; - unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); - - /* For CPU builtins that can be folded, fold first and expand the fold. */ - switch (fcode) - { - case IX86_BUILTIN_CPU_INIT: - { - /* Make it call __cpu_indicator_init in libgcc. */ - tree call_expr, fndecl, type; - type = build_function_type_list (integer_type_node, NULL_TREE); - fndecl = build_fn_decl ("__cpu_indicator_init", type); - call_expr = build_call_expr (fndecl, 0); - return expand_expr (call_expr, target, mode, EXPAND_NORMAL); - } - case IX86_BUILTIN_CPU_IS: - case IX86_BUILTIN_CPU_SUPPORTS: - { - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree fold_expr = fold_builtin_cpu (fndecl, &arg0); - gcc_assert (fold_expr != NULL_TREE); - return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); - } - } - - HOST_WIDE_INT isa = ix86_isa_flags; - HOST_WIDE_INT isa2 = ix86_isa_flags2; - HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa; - HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2; - /* The general case is we require all the ISAs specified in bisa{,2} - to be enabled. - The exceptions are: - OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A - OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 - OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4 - where for each such pair it is sufficient if either of the ISAs is - enabled, plus if it is ored with other options also those others. - OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */ - if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) - == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) - && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0) - isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A); - if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) - == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) - && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0) - isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32); - if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) - == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) - && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0) - isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4); - if ((bisa & OPTION_MASK_ISA_MMX) && !TARGET_MMX && TARGET_MMX_WITH_SSE) - { - bisa &= ~OPTION_MASK_ISA_MMX; - bisa |= OPTION_MASK_ISA_SSE2; - } - if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2) - { - bool add_abi_p = bisa & OPTION_MASK_ISA_64BIT; - if (TARGET_ABI_X32) - bisa |= OPTION_MASK_ABI_X32; - else - bisa |= OPTION_MASK_ABI_64; - char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL, - (enum fpmath_unit) 0, - (enum prefer_vector_width) 0, - false, add_abi_p); - if (!opts) - error ("%qE needs unknown isa option", fndecl); - else - { - gcc_assert (opts != NULL); - error ("%qE needs isa option %s", fndecl, opts); - free (opts); - } - return expand_call (exp, target, ignore); - } - - switch (fcode) - { - case IX86_BUILTIN_MASKMOVQ: - case IX86_BUILTIN_MASKMOVDQU: - icode = (fcode == IX86_BUILTIN_MASKMOVQ - ? CODE_FOR_mmx_maskmovq - : CODE_FOR_sse2_maskmovdqu); - /* Note the arg order is different from the operand order. */ - arg1 = CALL_EXPR_ARG (exp, 0); - arg2 = CALL_EXPR_ARG (exp, 1); - arg0 = CALL_EXPR_ARG (exp, 2); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - mode0 = insn_data[icode].operand[0].mode; - mode1 = insn_data[icode].operand[1].mode; - mode2 = insn_data[icode].operand[2].mode; - - op0 = ix86_zero_extend_to_Pmode (op0); - op0 = gen_rtx_MEM (mode1, op0); - - if (!insn_data[icode].operand[0].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (!insn_data[icode].operand[1].predicate (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - if (!insn_data[icode].operand[2].predicate (op2, mode2)) - op2 = copy_to_mode_reg (mode2, op2); - pat = GEN_FCN (icode) (op0, op1, op2); - if (! pat) - return 0; - emit_insn (pat); - return 0; - - case IX86_BUILTIN_LDMXCSR: - op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); - target = assign_386_stack_local (SImode, SLOT_TEMP); - emit_move_insn (target, op0); - emit_insn (gen_sse_ldmxcsr (target)); - return 0; - - case IX86_BUILTIN_STMXCSR: - target = assign_386_stack_local (SImode, SLOT_TEMP); - emit_insn (gen_sse_stmxcsr (target)); - return copy_to_mode_reg (SImode, target); - - case IX86_BUILTIN_CLFLUSH: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - icode = CODE_FOR_sse2_clflush; - if (!insn_data[icode].operand[0].predicate (op0, Pmode)) - op0 = ix86_zero_extend_to_Pmode (op0); - - emit_insn (gen_sse2_clflush (op0)); - return 0; - - case IX86_BUILTIN_CLWB: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - icode = CODE_FOR_clwb; - if (!insn_data[icode].operand[0].predicate (op0, Pmode)) - op0 = ix86_zero_extend_to_Pmode (op0); - - emit_insn (gen_clwb (op0)); - return 0; - - case IX86_BUILTIN_CLFLUSHOPT: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - icode = CODE_FOR_clflushopt; - if (!insn_data[icode].operand[0].predicate (op0, Pmode)) - op0 = ix86_zero_extend_to_Pmode (op0); - - emit_insn (gen_clflushopt (op0)); - return 0; - - case IX86_BUILTIN_MONITOR: - case IX86_BUILTIN_MONITORX: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - if (!REG_P (op0)) - op0 = ix86_zero_extend_to_Pmode (op0); - if (!REG_P (op1)) - op1 = copy_to_mode_reg (SImode, op1); - if (!REG_P (op2)) - op2 = copy_to_mode_reg (SImode, op2); - - emit_insn (fcode == IX86_BUILTIN_MONITOR - ? gen_sse3_monitor (Pmode, op0, op1, op2) - : gen_monitorx (Pmode, op0, op1, op2)); - return 0; - - case IX86_BUILTIN_MWAIT: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - if (!REG_P (op0)) - op0 = copy_to_mode_reg (SImode, op0); - if (!REG_P (op1)) - op1 = copy_to_mode_reg (SImode, op1); - emit_insn (gen_sse3_mwait (op0, op1)); - return 0; - - case IX86_BUILTIN_MWAITX: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - if (!REG_P (op0)) - op0 = copy_to_mode_reg (SImode, op0); - if (!REG_P (op1)) - op1 = copy_to_mode_reg (SImode, op1); - if (!REG_P (op2)) - op2 = copy_to_mode_reg (SImode, op2); - emit_insn (gen_mwaitx (op0, op1, op2)); - return 0; - - case IX86_BUILTIN_UMONITOR: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - - op0 = ix86_zero_extend_to_Pmode (op0); - emit_insn (gen_umonitor (Pmode, op0)); - return 0; - - case IX86_BUILTIN_UMWAIT: - case IX86_BUILTIN_TPAUSE: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - - if (!REG_P (op0)) - op0 = copy_to_mode_reg (SImode, op0); - - op1 = force_reg (DImode, op1); - - if (TARGET_64BIT) - { - op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), - NULL, 1, OPTAB_DIRECT); - switch (fcode) - { - case IX86_BUILTIN_UMWAIT: - icode = CODE_FOR_umwait_rex64; - break; - case IX86_BUILTIN_TPAUSE: - icode = CODE_FOR_tpause_rex64; - break; - default: - gcc_unreachable (); - } - - op2 = gen_lowpart (SImode, op2); - op1 = gen_lowpart (SImode, op1); - pat = GEN_FCN (icode) (op0, op1, op2); - } - else - { - switch (fcode) - { - case IX86_BUILTIN_UMWAIT: - icode = CODE_FOR_umwait; - break; - case IX86_BUILTIN_TPAUSE: - icode = CODE_FOR_tpause; - break; - default: - gcc_unreachable (); - } - pat = GEN_FCN (icode) (op0, op1); - } - - if (!pat) - return 0; - - emit_insn (pat); - - if (target == 0 - || !register_operand (target, QImode)) - target = gen_reg_rtx (QImode); - - pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), - const0_rtx); - emit_insn (gen_rtx_SET (target, pat)); - - return target; - - case IX86_BUILTIN_CLZERO: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - if (!REG_P (op0)) - op0 = ix86_zero_extend_to_Pmode (op0); - emit_insn (gen_clzero (Pmode, op0)); - return 0; - - case IX86_BUILTIN_CLDEMOTE: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - icode = CODE_FOR_cldemote; - if (!insn_data[icode].operand[0].predicate (op0, Pmode)) - op0 = ix86_zero_extend_to_Pmode (op0); - - emit_insn (gen_cldemote (op0)); - return 0; - - case IX86_BUILTIN_VEC_INIT_V2SI: - case IX86_BUILTIN_VEC_INIT_V4HI: - case IX86_BUILTIN_VEC_INIT_V8QI: - return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); - - case IX86_BUILTIN_VEC_EXT_V2DF: - case IX86_BUILTIN_VEC_EXT_V2DI: - case IX86_BUILTIN_VEC_EXT_V4SF: - case IX86_BUILTIN_VEC_EXT_V4SI: - case IX86_BUILTIN_VEC_EXT_V8HI: - case IX86_BUILTIN_VEC_EXT_V2SI: - case IX86_BUILTIN_VEC_EXT_V4HI: - case IX86_BUILTIN_VEC_EXT_V16QI: - return ix86_expand_vec_ext_builtin (exp, target); - - case IX86_BUILTIN_VEC_SET_V2DI: - case IX86_BUILTIN_VEC_SET_V4SF: - case IX86_BUILTIN_VEC_SET_V4SI: - case IX86_BUILTIN_VEC_SET_V8HI: - case IX86_BUILTIN_VEC_SET_V4HI: - case IX86_BUILTIN_VEC_SET_V16QI: - return ix86_expand_vec_set_builtin (exp); - - case IX86_BUILTIN_NANQ: - case IX86_BUILTIN_NANSQ: - return expand_call (exp, target, ignore); - - case IX86_BUILTIN_RDPID: - - op0 = gen_reg_rtx (word_mode); - - if (TARGET_64BIT) - { - insn = gen_rdpid_rex64 (op0); - op0 = convert_to_mode (SImode, op0, 1); - } - else - insn = gen_rdpid (op0); - - emit_insn (insn); - - if (target == 0 - || !register_operand (target, SImode)) - target = gen_reg_rtx (SImode); - - emit_move_insn (target, op0); - return target; - - case IX86_BUILTIN_2INTERSECTD512: - case IX86_BUILTIN_2INTERSECTQ512: - case IX86_BUILTIN_2INTERSECTD256: - case IX86_BUILTIN_2INTERSECTQ256: - case IX86_BUILTIN_2INTERSECTD128: - case IX86_BUILTIN_2INTERSECTQ128: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - arg3 = CALL_EXPR_ARG (exp, 3); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - op3 = expand_normal (arg3); - - if (!address_operand (op0, VOIDmode)) - { - op0 = convert_memory_address (Pmode, op0); - op0 = copy_addr_to_reg (op0); - } - if (!address_operand (op1, VOIDmode)) - { - op1 = convert_memory_address (Pmode, op1); - op1 = copy_addr_to_reg (op1); - } - - switch (fcode) - { - case IX86_BUILTIN_2INTERSECTD512: - mode4 = P2HImode; - icode = CODE_FOR_avx512vp2intersect_2intersectv16si; - break; - case IX86_BUILTIN_2INTERSECTQ512: - mode4 = P2QImode; - icode = CODE_FOR_avx512vp2intersect_2intersectv8di; - break; - case IX86_BUILTIN_2INTERSECTD256: - mode4 = P2QImode; - icode = CODE_FOR_avx512vp2intersect_2intersectv8si; - break; - case IX86_BUILTIN_2INTERSECTQ256: - mode4 = P2QImode; - icode = CODE_FOR_avx512vp2intersect_2intersectv4di; - break; - case IX86_BUILTIN_2INTERSECTD128: - mode4 = P2QImode; - icode = CODE_FOR_avx512vp2intersect_2intersectv4si; - break; - case IX86_BUILTIN_2INTERSECTQ128: - mode4 = P2QImode; - icode = CODE_FOR_avx512vp2intersect_2intersectv2di; - break; - default: - gcc_unreachable (); - } - - mode2 = insn_data[icode].operand[1].mode; - mode3 = insn_data[icode].operand[2].mode; - if (!insn_data[icode].operand[1].predicate (op2, mode2)) - op2 = copy_to_mode_reg (mode2, op2); - if (!insn_data[icode].operand[2].predicate (op3, mode3)) - op3 = copy_to_mode_reg (mode3, op3); - - op4 = gen_reg_rtx (mode4); - emit_insn (GEN_FCN (icode) (op4, op2, op3)); - mode0 = mode4 == P2HImode ? HImode : QImode; - emit_move_insn (gen_rtx_MEM (mode0, op0), - gen_lowpart (mode0, op4)); - emit_move_insn (gen_rtx_MEM (mode0, op1), - gen_highpart (mode0, op4)); - - return 0; - - case IX86_BUILTIN_RDPMC: - case IX86_BUILTIN_RDTSC: - case IX86_BUILTIN_RDTSCP: - case IX86_BUILTIN_XGETBV: - - op0 = gen_reg_rtx (DImode); - op1 = gen_reg_rtx (DImode); - - if (fcode == IX86_BUILTIN_RDPMC) - { - arg0 = CALL_EXPR_ARG (exp, 0); - op2 = expand_normal (arg0); - if (!register_operand (op2, SImode)) - op2 = copy_to_mode_reg (SImode, op2); - - insn = (TARGET_64BIT - ? gen_rdpmc_rex64 (op0, op1, op2) - : gen_rdpmc (op0, op2)); - emit_insn (insn); - } - else if (fcode == IX86_BUILTIN_XGETBV) - { - arg0 = CALL_EXPR_ARG (exp, 0); - op2 = expand_normal (arg0); - if (!register_operand (op2, SImode)) - op2 = copy_to_mode_reg (SImode, op2); - - insn = (TARGET_64BIT - ? gen_xgetbv_rex64 (op0, op1, op2) - : gen_xgetbv (op0, op2)); - emit_insn (insn); - } - else if (fcode == IX86_BUILTIN_RDTSC) - { - insn = (TARGET_64BIT - ? gen_rdtsc_rex64 (op0, op1) - : gen_rdtsc (op0)); - emit_insn (insn); - } - else - { - op2 = gen_reg_rtx (SImode); - - insn = (TARGET_64BIT - ? gen_rdtscp_rex64 (op0, op1, op2) - : gen_rdtscp (op0, op2)); - emit_insn (insn); - - arg0 = CALL_EXPR_ARG (exp, 0); - op4 = expand_normal (arg0); - if (!address_operand (op4, VOIDmode)) - { - op4 = convert_memory_address (Pmode, op4); - op4 = copy_addr_to_reg (op4); - } - emit_move_insn (gen_rtx_MEM (SImode, op4), op2); - } - - if (target == 0 - || !register_operand (target, DImode)) - target = gen_reg_rtx (DImode); - - if (TARGET_64BIT) - { - op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32), - op1, 1, OPTAB_DIRECT); - op0 = expand_simple_binop (DImode, IOR, op0, op1, - op0, 1, OPTAB_DIRECT); - } - - emit_move_insn (target, op0); - return target; - - case IX86_BUILTIN_ENQCMD: - case IX86_BUILTIN_ENQCMDS: - case IX86_BUILTIN_MOVDIR64B: - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - - op0 = ix86_zero_extend_to_Pmode (op0); - if (!address_operand (op1, VOIDmode)) - { - op1 = convert_memory_address (Pmode, op1); - op1 = copy_addr_to_reg (op1); - } - op1 = gen_rtx_MEM (XImode, op1); - - if (fcode == IX86_BUILTIN_MOVDIR64B) - { - emit_insn (gen_movdir64b (Pmode, op0, op1)); - return 0; - } - else - { - rtx pat; - - target = gen_reg_rtx (SImode); - emit_move_insn (target, const0_rtx); - target = gen_rtx_SUBREG (QImode, target, 0); - - if (fcode == IX86_BUILTIN_ENQCMD) - pat = gen_enqcmd (UNSPECV_ENQCMD, Pmode, op0, op1); - else - pat = gen_enqcmd (UNSPECV_ENQCMDS, Pmode, op0, op1); - - emit_insn (pat); - - emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), - gen_rtx_fmt_ee (EQ, QImode, - SET_DEST (pat), - const0_rtx))); - - return SUBREG_REG (target); - } - - case IX86_BUILTIN_FXSAVE: - case IX86_BUILTIN_FXRSTOR: - case IX86_BUILTIN_FXSAVE64: - case IX86_BUILTIN_FXRSTOR64: - case IX86_BUILTIN_FNSTENV: - case IX86_BUILTIN_FLDENV: - mode0 = BLKmode; - switch (fcode) - { - case IX86_BUILTIN_FXSAVE: - icode = CODE_FOR_fxsave; - break; - case IX86_BUILTIN_FXRSTOR: - icode = CODE_FOR_fxrstor; - break; - case IX86_BUILTIN_FXSAVE64: - icode = CODE_FOR_fxsave64; - break; - case IX86_BUILTIN_FXRSTOR64: - icode = CODE_FOR_fxrstor64; - break; - case IX86_BUILTIN_FNSTENV: - icode = CODE_FOR_fnstenv; - break; - case IX86_BUILTIN_FLDENV: - icode = CODE_FOR_fldenv; - break; - default: - gcc_unreachable (); - } - - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - - if (!address_operand (op0, VOIDmode)) - { - op0 = convert_memory_address (Pmode, op0); - op0 = copy_addr_to_reg (op0); - } - op0 = gen_rtx_MEM (mode0, op0); - - pat = GEN_FCN (icode) (op0); - if (pat) - emit_insn (pat); - return 0; - - case IX86_BUILTIN_XSETBV: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - - if (!REG_P (op0)) - op0 = copy_to_mode_reg (SImode, op0); - - op1 = force_reg (DImode, op1); - - if (TARGET_64BIT) - { - op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), - NULL, 1, OPTAB_DIRECT); - - icode = CODE_FOR_xsetbv_rex64; - - op2 = gen_lowpart (SImode, op2); - op1 = gen_lowpart (SImode, op1); - pat = GEN_FCN (icode) (op0, op1, op2); - } - else - { - icode = CODE_FOR_xsetbv; - - pat = GEN_FCN (icode) (op0, op1); - } - if (pat) - emit_insn (pat); - return 0; - - case IX86_BUILTIN_XSAVE: - case IX86_BUILTIN_XRSTOR: - case IX86_BUILTIN_XSAVE64: - case IX86_BUILTIN_XRSTOR64: - case IX86_BUILTIN_XSAVEOPT: - case IX86_BUILTIN_XSAVEOPT64: - case IX86_BUILTIN_XSAVES: - case IX86_BUILTIN_XRSTORS: - case IX86_BUILTIN_XSAVES64: - case IX86_BUILTIN_XRSTORS64: - case IX86_BUILTIN_XSAVEC: - case IX86_BUILTIN_XSAVEC64: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - - if (!address_operand (op0, VOIDmode)) - { - op0 = convert_memory_address (Pmode, op0); - op0 = copy_addr_to_reg (op0); - } - op0 = gen_rtx_MEM (BLKmode, op0); - - op1 = force_reg (DImode, op1); - - if (TARGET_64BIT) - { - op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), - NULL, 1, OPTAB_DIRECT); - switch (fcode) - { - case IX86_BUILTIN_XSAVE: - icode = CODE_FOR_xsave_rex64; - break; - case IX86_BUILTIN_XRSTOR: - icode = CODE_FOR_xrstor_rex64; - break; - case IX86_BUILTIN_XSAVE64: - icode = CODE_FOR_xsave64; - break; - case IX86_BUILTIN_XRSTOR64: - icode = CODE_FOR_xrstor64; - break; - case IX86_BUILTIN_XSAVEOPT: - icode = CODE_FOR_xsaveopt_rex64; - break; - case IX86_BUILTIN_XSAVEOPT64: - icode = CODE_FOR_xsaveopt64; - break; - case IX86_BUILTIN_XSAVES: - icode = CODE_FOR_xsaves_rex64; - break; - case IX86_BUILTIN_XRSTORS: - icode = CODE_FOR_xrstors_rex64; - break; - case IX86_BUILTIN_XSAVES64: - icode = CODE_FOR_xsaves64; - break; - case IX86_BUILTIN_XRSTORS64: - icode = CODE_FOR_xrstors64; - break; - case IX86_BUILTIN_XSAVEC: - icode = CODE_FOR_xsavec_rex64; - break; - case IX86_BUILTIN_XSAVEC64: - icode = CODE_FOR_xsavec64; - break; - default: - gcc_unreachable (); - } - - op2 = gen_lowpart (SImode, op2); - op1 = gen_lowpart (SImode, op1); - pat = GEN_FCN (icode) (op0, op1, op2); - } - else - { - switch (fcode) - { - case IX86_BUILTIN_XSAVE: - icode = CODE_FOR_xsave; - break; - case IX86_BUILTIN_XRSTOR: - icode = CODE_FOR_xrstor; - break; - case IX86_BUILTIN_XSAVEOPT: - icode = CODE_FOR_xsaveopt; - break; - case IX86_BUILTIN_XSAVES: - icode = CODE_FOR_xsaves; - break; - case IX86_BUILTIN_XRSTORS: - icode = CODE_FOR_xrstors; - break; - case IX86_BUILTIN_XSAVEC: - icode = CODE_FOR_xsavec; - break; - default: - gcc_unreachable (); - } - pat = GEN_FCN (icode) (op0, op1); - } - - if (pat) - emit_insn (pat); - return 0; - - case IX86_BUILTIN_LLWPCB: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - icode = CODE_FOR_lwp_llwpcb; - if (!insn_data[icode].operand[0].predicate (op0, Pmode)) - op0 = ix86_zero_extend_to_Pmode (op0); - emit_insn (gen_lwp_llwpcb (op0)); - return 0; - - case IX86_BUILTIN_SLWPCB: - icode = CODE_FOR_lwp_slwpcb; - if (!target - || !insn_data[icode].operand[0].predicate (target, Pmode)) - target = gen_reg_rtx (Pmode); - emit_insn (gen_lwp_slwpcb (target)); - return target; - - case IX86_BUILTIN_BEXTRI32: - case IX86_BUILTIN_BEXTRI64: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - icode = (fcode == IX86_BUILTIN_BEXTRI32 - ? CODE_FOR_tbm_bextri_si - : CODE_FOR_tbm_bextri_di); - if (!CONST_INT_P (op1)) - { - error ("last argument must be an immediate"); - return const0_rtx; - } - else - { - unsigned char length = (INTVAL (op1) >> 8) & 0xFF; - unsigned char lsb_index = INTVAL (op1) & 0xFF; - op1 = GEN_INT (length); - op2 = GEN_INT (lsb_index); - - mode1 = insn_data[icode].operand[1].mode; - if (!insn_data[icode].operand[1].predicate (op0, mode1)) - op0 = copy_to_mode_reg (mode1, op0); - - mode0 = insn_data[icode].operand[0].mode; - if (target == 0 - || !register_operand (target, mode0)) - target = gen_reg_rtx (mode0); - - pat = GEN_FCN (icode) (target, op0, op1, op2); - if (pat) - emit_insn (pat); - return target; - } - - case IX86_BUILTIN_RDRAND16_STEP: - icode = CODE_FOR_rdrandhi_1; - mode0 = HImode; - goto rdrand_step; - - case IX86_BUILTIN_RDRAND32_STEP: - icode = CODE_FOR_rdrandsi_1; - mode0 = SImode; - goto rdrand_step; - - case IX86_BUILTIN_RDRAND64_STEP: - icode = CODE_FOR_rdranddi_1; - mode0 = DImode; - -rdrand_step: - arg0 = CALL_EXPR_ARG (exp, 0); - op1 = expand_normal (arg0); - if (!address_operand (op1, VOIDmode)) - { - op1 = convert_memory_address (Pmode, op1); - op1 = copy_addr_to_reg (op1); - } - - op0 = gen_reg_rtx (mode0); - emit_insn (GEN_FCN (icode) (op0)); - - emit_move_insn (gen_rtx_MEM (mode0, op1), op0); - - op1 = gen_reg_rtx (SImode); - emit_move_insn (op1, CONST1_RTX (SImode)); - - /* Emit SImode conditional move. */ - if (mode0 == HImode) - { - if (TARGET_ZERO_EXTEND_WITH_AND - && optimize_function_for_speed_p (cfun)) - { - op2 = force_reg (SImode, const0_rtx); - - emit_insn (gen_movstricthi - (gen_lowpart (HImode, op2), op0)); - } - else - { - op2 = gen_reg_rtx (SImode); - - emit_insn (gen_zero_extendhisi2 (op2, op0)); - } - } - else if (mode0 == SImode) - op2 = op0; - else - op2 = gen_rtx_SUBREG (SImode, op0, 0); - - if (target == 0 - || !register_operand (target, SImode)) - target = gen_reg_rtx (SImode); - - pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG), - const0_rtx); - emit_insn (gen_rtx_SET (target, - gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1))); - return target; - - case IX86_BUILTIN_RDSEED16_STEP: - icode = CODE_FOR_rdseedhi_1; - mode0 = HImode; - goto rdseed_step; - - case IX86_BUILTIN_RDSEED32_STEP: - icode = CODE_FOR_rdseedsi_1; - mode0 = SImode; - goto rdseed_step; - - case IX86_BUILTIN_RDSEED64_STEP: - icode = CODE_FOR_rdseeddi_1; - mode0 = DImode; - -rdseed_step: - arg0 = CALL_EXPR_ARG (exp, 0); - op1 = expand_normal (arg0); - if (!address_operand (op1, VOIDmode)) - { - op1 = convert_memory_address (Pmode, op1); - op1 = copy_addr_to_reg (op1); - } - - op0 = gen_reg_rtx (mode0); - emit_insn (GEN_FCN (icode) (op0)); - - emit_move_insn (gen_rtx_MEM (mode0, op1), op0); - - op2 = gen_reg_rtx (QImode); - - pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), - const0_rtx); - emit_insn (gen_rtx_SET (op2, pat)); - - if (target == 0 - || !register_operand (target, SImode)) - target = gen_reg_rtx (SImode); - - emit_insn (gen_zero_extendqisi2 (target, op2)); - return target; - - case IX86_BUILTIN_SBB32: - icode = CODE_FOR_subborrowsi; - icode2 = CODE_FOR_subborrowsi_0; - mode0 = SImode; - mode1 = DImode; - mode2 = CCmode; - goto handlecarry; - - case IX86_BUILTIN_SBB64: - icode = CODE_FOR_subborrowdi; - icode2 = CODE_FOR_subborrowdi_0; - mode0 = DImode; - mode1 = TImode; - mode2 = CCmode; - goto handlecarry; - - case IX86_BUILTIN_ADDCARRYX32: - icode = CODE_FOR_addcarrysi; - icode2 = CODE_FOR_addcarrysi_0; - mode0 = SImode; - mode1 = DImode; - mode2 = CCCmode; - goto handlecarry; - - case IX86_BUILTIN_ADDCARRYX64: - icode = CODE_FOR_addcarrydi; - icode2 = CODE_FOR_addcarrydi_0; - mode0 = DImode; - mode1 = TImode; - mode2 = CCCmode; - - handlecarry: - arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */ - arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */ - arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */ - arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */ - - op1 = expand_normal (arg0); - if (!integer_zerop (arg0)) - op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); - - op2 = expand_normal (arg1); - if (!register_operand (op2, mode0)) - op2 = copy_to_mode_reg (mode0, op2); - - op3 = expand_normal (arg2); - if (!register_operand (op3, mode0)) - op3 = copy_to_mode_reg (mode0, op3); - - op4 = expand_normal (arg3); - if (!address_operand (op4, VOIDmode)) - { - op4 = convert_memory_address (Pmode, op4); - op4 = copy_addr_to_reg (op4); - } - - op0 = gen_reg_rtx (mode0); - if (integer_zerop (arg0)) - { - /* If arg0 is 0, optimize right away into add or sub - instruction that sets CCCmode flags. */ - op1 = gen_rtx_REG (mode2, FLAGS_REG); - emit_insn (GEN_FCN (icode2) (op0, op2, op3)); - } - else - { - /* Generate CF from input operand. */ - emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); - - /* Generate instruction that consumes CF. */ - op1 = gen_rtx_REG (CCCmode, FLAGS_REG); - pat = gen_rtx_LTU (mode1, op1, const0_rtx); - pat2 = gen_rtx_LTU (mode0, op1, const0_rtx); - emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2)); - } - - /* Return current CF value. */ - if (target == 0) - target = gen_reg_rtx (QImode); - - pat = gen_rtx_LTU (QImode, op1, const0_rtx); - emit_insn (gen_rtx_SET (target, pat)); - - /* Store the result. */ - emit_move_insn (gen_rtx_MEM (mode0, op4), op0); - - return target; - - case IX86_BUILTIN_READ_FLAGS: - emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG))); - - if (optimize - || target == NULL_RTX - || !nonimmediate_operand (target, word_mode) - || GET_MODE (target) != word_mode) - target = gen_reg_rtx (word_mode); - - emit_insn (gen_pop (target)); - return target; - - case IX86_BUILTIN_WRITE_FLAGS: - - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - if (!general_no_elim_operand (op0, word_mode)) - op0 = copy_to_mode_reg (word_mode, op0); - - emit_insn (gen_push (op0)); - emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG))); - return 0; - - case IX86_BUILTIN_KTESTC8: - icode = CODE_FOR_ktestqi; - mode3 = CCCmode; - goto kortest; - - case IX86_BUILTIN_KTESTZ8: - icode = CODE_FOR_ktestqi; - mode3 = CCZmode; - goto kortest; - - case IX86_BUILTIN_KTESTC16: - icode = CODE_FOR_ktesthi; - mode3 = CCCmode; - goto kortest; - - case IX86_BUILTIN_KTESTZ16: - icode = CODE_FOR_ktesthi; - mode3 = CCZmode; - goto kortest; - - case IX86_BUILTIN_KTESTC32: - icode = CODE_FOR_ktestsi; - mode3 = CCCmode; - goto kortest; - - case IX86_BUILTIN_KTESTZ32: - icode = CODE_FOR_ktestsi; - mode3 = CCZmode; - goto kortest; - - case IX86_BUILTIN_KTESTC64: - icode = CODE_FOR_ktestdi; - mode3 = CCCmode; - goto kortest; - - case IX86_BUILTIN_KTESTZ64: - icode = CODE_FOR_ktestdi; - mode3 = CCZmode; - goto kortest; - - case IX86_BUILTIN_KORTESTC8: - icode = CODE_FOR_kortestqi; - mode3 = CCCmode; - goto kortest; - - case IX86_BUILTIN_KORTESTZ8: - icode = CODE_FOR_kortestqi; - mode3 = CCZmode; - goto kortest; - - case IX86_BUILTIN_KORTESTC16: - icode = CODE_FOR_kortesthi; - mode3 = CCCmode; - goto kortest; - - case IX86_BUILTIN_KORTESTZ16: - icode = CODE_FOR_kortesthi; - mode3 = CCZmode; - goto kortest; - - case IX86_BUILTIN_KORTESTC32: - icode = CODE_FOR_kortestsi; - mode3 = CCCmode; - goto kortest; - - case IX86_BUILTIN_KORTESTZ32: - icode = CODE_FOR_kortestsi; - mode3 = CCZmode; - goto kortest; - - case IX86_BUILTIN_KORTESTC64: - icode = CODE_FOR_kortestdi; - mode3 = CCCmode; - goto kortest; - - case IX86_BUILTIN_KORTESTZ64: - icode = CODE_FOR_kortestdi; - mode3 = CCZmode; - - kortest: - arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */ - arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */ - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - - mode0 = insn_data[icode].operand[0].mode; - mode1 = insn_data[icode].operand[1].mode; - - if (GET_MODE (op0) != VOIDmode) - op0 = force_reg (GET_MODE (op0), op0); - - op0 = gen_lowpart (mode0, op0); - - if (!insn_data[icode].operand[0].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - - if (GET_MODE (op1) != VOIDmode) - op1 = force_reg (GET_MODE (op1), op1); - - op1 = gen_lowpart (mode1, op1); - - if (!insn_data[icode].operand[1].predicate (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - target = gen_reg_rtx (QImode); - - /* Emit kortest. */ - emit_insn (GEN_FCN (icode) (op0, op1)); - /* And use setcc to return result from flags. */ - ix86_expand_setcc (target, EQ, - gen_rtx_REG (mode3, FLAGS_REG), const0_rtx); - return target; - - case IX86_BUILTIN_GATHERSIV2DF: - icode = CODE_FOR_avx2_gathersiv2df; - goto gather_gen; - case IX86_BUILTIN_GATHERSIV4DF: - icode = CODE_FOR_avx2_gathersiv4df; - goto gather_gen; - case IX86_BUILTIN_GATHERDIV2DF: - icode = CODE_FOR_avx2_gatherdiv2df; - goto gather_gen; - case IX86_BUILTIN_GATHERDIV4DF: - icode = CODE_FOR_avx2_gatherdiv4df; - goto gather_gen; - case IX86_BUILTIN_GATHERSIV4SF: - icode = CODE_FOR_avx2_gathersiv4sf; - goto gather_gen; - case IX86_BUILTIN_GATHERSIV8SF: - icode = CODE_FOR_avx2_gathersiv8sf; - goto gather_gen; - case IX86_BUILTIN_GATHERDIV4SF: - icode = CODE_FOR_avx2_gatherdiv4sf; - goto gather_gen; - case IX86_BUILTIN_GATHERDIV8SF: - icode = CODE_FOR_avx2_gatherdiv8sf; - goto gather_gen; - case IX86_BUILTIN_GATHERSIV2DI: - icode = CODE_FOR_avx2_gathersiv2di; - goto gather_gen; - case IX86_BUILTIN_GATHERSIV4DI: - icode = CODE_FOR_avx2_gathersiv4di; - goto gather_gen; - case IX86_BUILTIN_GATHERDIV2DI: - icode = CODE_FOR_avx2_gatherdiv2di; - goto gather_gen; - case IX86_BUILTIN_GATHERDIV4DI: - icode = CODE_FOR_avx2_gatherdiv4di; - goto gather_gen; - case IX86_BUILTIN_GATHERSIV4SI: - icode = CODE_FOR_avx2_gathersiv4si; - goto gather_gen; - case IX86_BUILTIN_GATHERSIV8SI: - icode = CODE_FOR_avx2_gathersiv8si; - goto gather_gen; - case IX86_BUILTIN_GATHERDIV4SI: - icode = CODE_FOR_avx2_gatherdiv4si; - goto gather_gen; - case IX86_BUILTIN_GATHERDIV8SI: - icode = CODE_FOR_avx2_gatherdiv8si; - goto gather_gen; - case IX86_BUILTIN_GATHERALTSIV4DF: - icode = CODE_FOR_avx2_gathersiv4df; - goto gather_gen; - case IX86_BUILTIN_GATHERALTDIV8SF: - icode = CODE_FOR_avx2_gatherdiv8sf; - goto gather_gen; - case IX86_BUILTIN_GATHERALTSIV4DI: - icode = CODE_FOR_avx2_gathersiv4di; - goto gather_gen; - case IX86_BUILTIN_GATHERALTDIV8SI: - icode = CODE_FOR_avx2_gatherdiv8si; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV16SF: - icode = CODE_FOR_avx512f_gathersiv16sf; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV8DF: - icode = CODE_FOR_avx512f_gathersiv8df; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV16SF: - icode = CODE_FOR_avx512f_gatherdiv16sf; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV8DF: - icode = CODE_FOR_avx512f_gatherdiv8df; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV16SI: - icode = CODE_FOR_avx512f_gathersiv16si; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV8DI: - icode = CODE_FOR_avx512f_gathersiv8di; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV16SI: - icode = CODE_FOR_avx512f_gatherdiv16si; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV8DI: - icode = CODE_FOR_avx512f_gatherdiv8di; - goto gather_gen; - case IX86_BUILTIN_GATHER3ALTSIV8DF: - icode = CODE_FOR_avx512f_gathersiv8df; - goto gather_gen; - case IX86_BUILTIN_GATHER3ALTDIV16SF: - icode = CODE_FOR_avx512f_gatherdiv16sf; - goto gather_gen; - case IX86_BUILTIN_GATHER3ALTSIV8DI: - icode = CODE_FOR_avx512f_gathersiv8di; - goto gather_gen; - case IX86_BUILTIN_GATHER3ALTDIV16SI: - icode = CODE_FOR_avx512f_gatherdiv16si; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV2DF: - icode = CODE_FOR_avx512vl_gathersiv2df; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV4DF: - icode = CODE_FOR_avx512vl_gathersiv4df; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV2DF: - icode = CODE_FOR_avx512vl_gatherdiv2df; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV4DF: - icode = CODE_FOR_avx512vl_gatherdiv4df; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV4SF: - icode = CODE_FOR_avx512vl_gathersiv4sf; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV8SF: - icode = CODE_FOR_avx512vl_gathersiv8sf; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV4SF: - icode = CODE_FOR_avx512vl_gatherdiv4sf; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV8SF: - icode = CODE_FOR_avx512vl_gatherdiv8sf; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV2DI: - icode = CODE_FOR_avx512vl_gathersiv2di; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV4DI: - icode = CODE_FOR_avx512vl_gathersiv4di; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV2DI: - icode = CODE_FOR_avx512vl_gatherdiv2di; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV4DI: - icode = CODE_FOR_avx512vl_gatherdiv4di; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV4SI: - icode = CODE_FOR_avx512vl_gathersiv4si; - goto gather_gen; - case IX86_BUILTIN_GATHER3SIV8SI: - icode = CODE_FOR_avx512vl_gathersiv8si; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV4SI: - icode = CODE_FOR_avx512vl_gatherdiv4si; - goto gather_gen; - case IX86_BUILTIN_GATHER3DIV8SI: - icode = CODE_FOR_avx512vl_gatherdiv8si; - goto gather_gen; - case IX86_BUILTIN_GATHER3ALTSIV4DF: - icode = CODE_FOR_avx512vl_gathersiv4df; - goto gather_gen; - case IX86_BUILTIN_GATHER3ALTDIV8SF: - icode = CODE_FOR_avx512vl_gatherdiv8sf; - goto gather_gen; - case IX86_BUILTIN_GATHER3ALTSIV4DI: - icode = CODE_FOR_avx512vl_gathersiv4di; - goto gather_gen; - case IX86_BUILTIN_GATHER3ALTDIV8SI: - icode = CODE_FOR_avx512vl_gatherdiv8si; - goto gather_gen; - case IX86_BUILTIN_SCATTERSIV16SF: - icode = CODE_FOR_avx512f_scattersiv16sf; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV8DF: - icode = CODE_FOR_avx512f_scattersiv8df; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV16SF: - icode = CODE_FOR_avx512f_scatterdiv16sf; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV8DF: - icode = CODE_FOR_avx512f_scatterdiv8df; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV16SI: - icode = CODE_FOR_avx512f_scattersiv16si; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV8DI: - icode = CODE_FOR_avx512f_scattersiv8di; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV16SI: - icode = CODE_FOR_avx512f_scatterdiv16si; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV8DI: - icode = CODE_FOR_avx512f_scatterdiv8di; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV8SF: - icode = CODE_FOR_avx512vl_scattersiv8sf; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV4SF: - icode = CODE_FOR_avx512vl_scattersiv4sf; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV4DF: - icode = CODE_FOR_avx512vl_scattersiv4df; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV2DF: - icode = CODE_FOR_avx512vl_scattersiv2df; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV8SF: - icode = CODE_FOR_avx512vl_scatterdiv8sf; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV4SF: - icode = CODE_FOR_avx512vl_scatterdiv4sf; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV4DF: - icode = CODE_FOR_avx512vl_scatterdiv4df; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV2DF: - icode = CODE_FOR_avx512vl_scatterdiv2df; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV8SI: - icode = CODE_FOR_avx512vl_scattersiv8si; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV4SI: - icode = CODE_FOR_avx512vl_scattersiv4si; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV4DI: - icode = CODE_FOR_avx512vl_scattersiv4di; - goto scatter_gen; - case IX86_BUILTIN_SCATTERSIV2DI: - icode = CODE_FOR_avx512vl_scattersiv2di; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV8SI: - icode = CODE_FOR_avx512vl_scatterdiv8si; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV4SI: - icode = CODE_FOR_avx512vl_scatterdiv4si; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV4DI: - icode = CODE_FOR_avx512vl_scatterdiv4di; - goto scatter_gen; - case IX86_BUILTIN_SCATTERDIV2DI: - icode = CODE_FOR_avx512vl_scatterdiv2di; - goto scatter_gen; - case IX86_BUILTIN_GATHERPFDPD: - icode = CODE_FOR_avx512pf_gatherpfv8sidf; - goto vec_prefetch_gen; - case IX86_BUILTIN_SCATTERALTSIV8DF: - icode = CODE_FOR_avx512f_scattersiv8df; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTDIV16SF: - icode = CODE_FOR_avx512f_scatterdiv16sf; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTSIV8DI: - icode = CODE_FOR_avx512f_scattersiv8di; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTDIV16SI: - icode = CODE_FOR_avx512f_scatterdiv16si; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTSIV4DF: - icode = CODE_FOR_avx512vl_scattersiv4df; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTDIV8SF: - icode = CODE_FOR_avx512vl_scatterdiv8sf; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTSIV4DI: - icode = CODE_FOR_avx512vl_scattersiv4di; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTDIV8SI: - icode = CODE_FOR_avx512vl_scatterdiv8si; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTSIV2DF: - icode = CODE_FOR_avx512vl_scattersiv2df; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTDIV4SF: - icode = CODE_FOR_avx512vl_scatterdiv4sf; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTSIV2DI: - icode = CODE_FOR_avx512vl_scattersiv2di; - goto scatter_gen; - case IX86_BUILTIN_SCATTERALTDIV4SI: - icode = CODE_FOR_avx512vl_scatterdiv4si; - goto scatter_gen; - case IX86_BUILTIN_GATHERPFDPS: - icode = CODE_FOR_avx512pf_gatherpfv16sisf; - goto vec_prefetch_gen; - case IX86_BUILTIN_GATHERPFQPD: - icode = CODE_FOR_avx512pf_gatherpfv8didf; - goto vec_prefetch_gen; - case IX86_BUILTIN_GATHERPFQPS: - icode = CODE_FOR_avx512pf_gatherpfv8disf; - goto vec_prefetch_gen; - case IX86_BUILTIN_SCATTERPFDPD: - icode = CODE_FOR_avx512pf_scatterpfv8sidf; - goto vec_prefetch_gen; - case IX86_BUILTIN_SCATTERPFDPS: - icode = CODE_FOR_avx512pf_scatterpfv16sisf; - goto vec_prefetch_gen; - case IX86_BUILTIN_SCATTERPFQPD: - icode = CODE_FOR_avx512pf_scatterpfv8didf; - goto vec_prefetch_gen; - case IX86_BUILTIN_SCATTERPFQPS: - icode = CODE_FOR_avx512pf_scatterpfv8disf; - goto vec_prefetch_gen; - - gather_gen: - rtx half; - rtx (*gen) (rtx, rtx); - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - arg3 = CALL_EXPR_ARG (exp, 3); - arg4 = CALL_EXPR_ARG (exp, 4); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - op3 = expand_normal (arg3); - op4 = expand_normal (arg4); - /* Note the arg order is different from the operand order. */ - mode0 = insn_data[icode].operand[1].mode; - mode2 = insn_data[icode].operand[3].mode; - mode3 = insn_data[icode].operand[4].mode; - mode4 = insn_data[icode].operand[5].mode; - - if (target == NULL_RTX - || GET_MODE (target) != insn_data[icode].operand[0].mode - || !insn_data[icode].operand[0].predicate (target, - GET_MODE (target))) - subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode); - else - subtarget = target; - - switch (fcode) - { - case IX86_BUILTIN_GATHER3ALTSIV8DF: - case IX86_BUILTIN_GATHER3ALTSIV8DI: - half = gen_reg_rtx (V8SImode); - if (!nonimmediate_operand (op2, V16SImode)) - op2 = copy_to_mode_reg (V16SImode, op2); - emit_insn (gen_vec_extract_lo_v16si (half, op2)); - op2 = half; - break; - case IX86_BUILTIN_GATHER3ALTSIV4DF: - case IX86_BUILTIN_GATHER3ALTSIV4DI: - case IX86_BUILTIN_GATHERALTSIV4DF: - case IX86_BUILTIN_GATHERALTSIV4DI: - half = gen_reg_rtx (V4SImode); - if (!nonimmediate_operand (op2, V8SImode)) - op2 = copy_to_mode_reg (V8SImode, op2); - emit_insn (gen_vec_extract_lo_v8si (half, op2)); - op2 = half; - break; - case IX86_BUILTIN_GATHER3ALTDIV16SF: - case IX86_BUILTIN_GATHER3ALTDIV16SI: - half = gen_reg_rtx (mode0); - if (mode0 == V8SFmode) - gen = gen_vec_extract_lo_v16sf; - else - gen = gen_vec_extract_lo_v16si; - if (!nonimmediate_operand (op0, GET_MODE (op0))) - op0 = copy_to_mode_reg (GET_MODE (op0), op0); - emit_insn (gen (half, op0)); - op0 = half; - op3 = lowpart_subreg (QImode, op3, HImode); - break; - case IX86_BUILTIN_GATHER3ALTDIV8SF: - case IX86_BUILTIN_GATHER3ALTDIV8SI: - case IX86_BUILTIN_GATHERALTDIV8SF: - case IX86_BUILTIN_GATHERALTDIV8SI: - half = gen_reg_rtx (mode0); - if (mode0 == V4SFmode) - gen = gen_vec_extract_lo_v8sf; - else - gen = gen_vec_extract_lo_v8si; - if (!nonimmediate_operand (op0, GET_MODE (op0))) - op0 = copy_to_mode_reg (GET_MODE (op0), op0); - emit_insn (gen (half, op0)); - op0 = half; - if (VECTOR_MODE_P (GET_MODE (op3))) - { - half = gen_reg_rtx (mode0); - if (!nonimmediate_operand (op3, GET_MODE (op3))) - op3 = copy_to_mode_reg (GET_MODE (op3), op3); - emit_insn (gen (half, op3)); - op3 = half; - } - break; - default: - break; - } - - /* Force memory operand only with base register here. But we - don't want to do it on memory operand for other builtin - functions. */ - op1 = ix86_zero_extend_to_Pmode (op1); - - if (!insn_data[icode].operand[1].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (!insn_data[icode].operand[2].predicate (op1, Pmode)) - op1 = copy_to_mode_reg (Pmode, op1); - if (!insn_data[icode].operand[3].predicate (op2, mode2)) - op2 = copy_to_mode_reg (mode2, op2); - - op3 = fixup_modeless_constant (op3, mode3); - - if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode) - { - if (!insn_data[icode].operand[4].predicate (op3, mode3)) - op3 = copy_to_mode_reg (mode3, op3); - } - else - { - op3 = copy_to_reg (op3); - op3 = lowpart_subreg (mode3, op3, GET_MODE (op3)); - } - if (!insn_data[icode].operand[5].predicate (op4, mode4)) - { - error ("the last argument must be scale 1, 2, 4, 8"); - return const0_rtx; - } - - /* Optimize. If mask is known to have all high bits set, - replace op0 with pc_rtx to signal that the instruction - overwrites the whole destination and doesn't use its - previous contents. */ - if (optimize) - { - if (TREE_CODE (arg3) == INTEGER_CST) - { - if (integer_all_onesp (arg3)) - op0 = pc_rtx; - } - else if (TREE_CODE (arg3) == VECTOR_CST) - { - unsigned int negative = 0; - for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i) - { - tree cst = VECTOR_CST_ELT (arg3, i); - if (TREE_CODE (cst) == INTEGER_CST - && tree_int_cst_sign_bit (cst)) - negative++; - else if (TREE_CODE (cst) == REAL_CST - && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst))) - negative++; - } - if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3))) - op0 = pc_rtx; - } - else if (TREE_CODE (arg3) == SSA_NAME - && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE) - { - /* Recognize also when mask is like: - __v2df src = _mm_setzero_pd (); - __v2df mask = _mm_cmpeq_pd (src, src); - or - __v8sf src = _mm256_setzero_ps (); - __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ); - as that is a cheaper way to load all ones into - a register than having to load a constant from - memory. */ - gimple *def_stmt = SSA_NAME_DEF_STMT (arg3); - if (is_gimple_call (def_stmt)) - { - tree fndecl = gimple_call_fndecl (def_stmt); - if (fndecl - && fndecl_built_in_p (fndecl, BUILT_IN_MD)) - switch (DECL_MD_FUNCTION_CODE (fndecl)) - { - case IX86_BUILTIN_CMPPD: - case IX86_BUILTIN_CMPPS: - case IX86_BUILTIN_CMPPD256: - case IX86_BUILTIN_CMPPS256: - if (!integer_zerop (gimple_call_arg (def_stmt, 2))) - break; - /* FALLTHRU */ - case IX86_BUILTIN_CMPEQPD: - case IX86_BUILTIN_CMPEQPS: - if (initializer_zerop (gimple_call_arg (def_stmt, 0)) - && initializer_zerop (gimple_call_arg (def_stmt, - 1))) - op0 = pc_rtx; - break; - default: - break; - } - } - } - } - - pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); - if (! pat) - return const0_rtx; - emit_insn (pat); - - switch (fcode) - { - case IX86_BUILTIN_GATHER3DIV16SF: - if (target == NULL_RTX) - target = gen_reg_rtx (V8SFmode); - emit_insn (gen_vec_extract_lo_v16sf (target, subtarget)); - break; - case IX86_BUILTIN_GATHER3DIV16SI: - if (target == NULL_RTX) - target = gen_reg_rtx (V8SImode); - emit_insn (gen_vec_extract_lo_v16si (target, subtarget)); - break; - case IX86_BUILTIN_GATHER3DIV8SF: - case IX86_BUILTIN_GATHERDIV8SF: - if (target == NULL_RTX) - target = gen_reg_rtx (V4SFmode); - emit_insn (gen_vec_extract_lo_v8sf (target, subtarget)); - break; - case IX86_BUILTIN_GATHER3DIV8SI: - case IX86_BUILTIN_GATHERDIV8SI: - if (target == NULL_RTX) - target = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_extract_lo_v8si (target, subtarget)); - break; - default: - target = subtarget; - break; - } - return target; - - scatter_gen: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - arg3 = CALL_EXPR_ARG (exp, 3); - arg4 = CALL_EXPR_ARG (exp, 4); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - op3 = expand_normal (arg3); - op4 = expand_normal (arg4); - mode1 = insn_data[icode].operand[1].mode; - mode2 = insn_data[icode].operand[2].mode; - mode3 = insn_data[icode].operand[3].mode; - mode4 = insn_data[icode].operand[4].mode; - - /* Scatter instruction stores operand op3 to memory with - indices from op2 and scale from op4 under writemask op1. - If index operand op2 has more elements then source operand - op3 one need to use only its low half. And vice versa. */ - switch (fcode) - { - case IX86_BUILTIN_SCATTERALTSIV8DF: - case IX86_BUILTIN_SCATTERALTSIV8DI: - half = gen_reg_rtx (V8SImode); - if (!nonimmediate_operand (op2, V16SImode)) - op2 = copy_to_mode_reg (V16SImode, op2); - emit_insn (gen_vec_extract_lo_v16si (half, op2)); - op2 = half; - break; - case IX86_BUILTIN_SCATTERALTDIV16SF: - case IX86_BUILTIN_SCATTERALTDIV16SI: - half = gen_reg_rtx (mode3); - if (mode3 == V8SFmode) - gen = gen_vec_extract_lo_v16sf; - else - gen = gen_vec_extract_lo_v16si; - if (!nonimmediate_operand (op3, GET_MODE (op3))) - op3 = copy_to_mode_reg (GET_MODE (op3), op3); - emit_insn (gen (half, op3)); - op3 = half; - break; - case IX86_BUILTIN_SCATTERALTSIV4DF: - case IX86_BUILTIN_SCATTERALTSIV4DI: - half = gen_reg_rtx (V4SImode); - if (!nonimmediate_operand (op2, V8SImode)) - op2 = copy_to_mode_reg (V8SImode, op2); - emit_insn (gen_vec_extract_lo_v8si (half, op2)); - op2 = half; - break; - case IX86_BUILTIN_SCATTERALTDIV8SF: - case IX86_BUILTIN_SCATTERALTDIV8SI: - half = gen_reg_rtx (mode3); - if (mode3 == V4SFmode) - gen = gen_vec_extract_lo_v8sf; - else - gen = gen_vec_extract_lo_v8si; - if (!nonimmediate_operand (op3, GET_MODE (op3))) - op3 = copy_to_mode_reg (GET_MODE (op3), op3); - emit_insn (gen (half, op3)); - op3 = half; - break; - case IX86_BUILTIN_SCATTERALTSIV2DF: - case IX86_BUILTIN_SCATTERALTSIV2DI: - if (!nonimmediate_operand (op2, V4SImode)) - op2 = copy_to_mode_reg (V4SImode, op2); - break; - case IX86_BUILTIN_SCATTERALTDIV4SF: - case IX86_BUILTIN_SCATTERALTDIV4SI: - if (!nonimmediate_operand (op3, GET_MODE (op3))) - op3 = copy_to_mode_reg (GET_MODE (op3), op3); - break; - default: - break; - } - - /* Force memory operand only with base register here. But we - don't want to do it on memory operand for other builtin - functions. */ - op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1)); - - if (!insn_data[icode].operand[0].predicate (op0, Pmode)) - op0 = copy_to_mode_reg (Pmode, op0); - - op1 = fixup_modeless_constant (op1, mode1); - - if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode) - { - if (!insn_data[icode].operand[1].predicate (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - } - else - { - op1 = copy_to_reg (op1); - op1 = lowpart_subreg (mode1, op1, GET_MODE (op1)); - } - - if (!insn_data[icode].operand[2].predicate (op2, mode2)) - op2 = copy_to_mode_reg (mode2, op2); - - if (!insn_data[icode].operand[3].predicate (op3, mode3)) - op3 = copy_to_mode_reg (mode3, op3); - - if (!insn_data[icode].operand[4].predicate (op4, mode4)) - { - error ("the last argument must be scale 1, 2, 4, 8"); - return const0_rtx; - } - - pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); - if (! pat) - return const0_rtx; - - emit_insn (pat); - return 0; - - vec_prefetch_gen: - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - arg3 = CALL_EXPR_ARG (exp, 3); - arg4 = CALL_EXPR_ARG (exp, 4); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - op3 = expand_normal (arg3); - op4 = expand_normal (arg4); - mode0 = insn_data[icode].operand[0].mode; - mode1 = insn_data[icode].operand[1].mode; - mode3 = insn_data[icode].operand[3].mode; - mode4 = insn_data[icode].operand[4].mode; - - op0 = fixup_modeless_constant (op0, mode0); - - if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) - { - if (!insn_data[icode].operand[0].predicate (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - } - else - { - op0 = copy_to_reg (op0); - op0 = lowpart_subreg (mode0, op0, GET_MODE (op0)); - } - - if (!insn_data[icode].operand[1].predicate (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - /* Force memory operand only with base register here. But we - don't want to do it on memory operand for other builtin - functions. */ - op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1)); - - if (!insn_data[icode].operand[2].predicate (op2, Pmode)) - op2 = copy_to_mode_reg (Pmode, op2); - - if (!insn_data[icode].operand[3].predicate (op3, mode3)) - { - error ("the forth argument must be scale 1, 2, 4, 8"); - return const0_rtx; - } - - if (!insn_data[icode].operand[4].predicate (op4, mode4)) - { - error ("incorrect hint operand"); - return const0_rtx; - } - - pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); - if (! pat) - return const0_rtx; - - emit_insn (pat); - - return 0; - - case IX86_BUILTIN_XABORT: - icode = CODE_FOR_xabort; - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - mode0 = insn_data[icode].operand[0].mode; - if (!insn_data[icode].operand[0].predicate (op0, mode0)) - { - error ("the argument to % intrinsic must " - "be an 8-bit immediate"); - return const0_rtx; - } - emit_insn (gen_xabort (op0)); - return 0; - - case IX86_BUILTIN_RSTORSSP: - case IX86_BUILTIN_CLRSSBSY: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - icode = (fcode == IX86_BUILTIN_RSTORSSP - ? CODE_FOR_rstorssp - : CODE_FOR_clrssbsy); - if (!address_operand (op0, VOIDmode)) - { - op1 = convert_memory_address (Pmode, op0); - op0 = copy_addr_to_reg (op1); - } - emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0))); - return 0; - - case IX86_BUILTIN_WRSSD: - case IX86_BUILTIN_WRSSQ: - case IX86_BUILTIN_WRUSSD: - case IX86_BUILTIN_WRUSSQ: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = expand_normal (arg0); - arg1 = CALL_EXPR_ARG (exp, 1); - op1 = expand_normal (arg1); - switch (fcode) - { - case IX86_BUILTIN_WRSSD: - icode = CODE_FOR_wrsssi; - mode = SImode; - break; - case IX86_BUILTIN_WRSSQ: - icode = CODE_FOR_wrssdi; - mode = DImode; - break; - case IX86_BUILTIN_WRUSSD: - icode = CODE_FOR_wrusssi; - mode = SImode; - break; - case IX86_BUILTIN_WRUSSQ: - icode = CODE_FOR_wrussdi; - mode = DImode; - break; - } - op0 = force_reg (mode, op0); - if (!address_operand (op1, VOIDmode)) - { - op2 = convert_memory_address (Pmode, op1); - op1 = copy_addr_to_reg (op2); - } - emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1))); - return 0; - - default: - break; - } - - if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST - && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST) - { - i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST; - return ix86_expand_special_args_builtin (bdesc_special_args + i, exp, - target); - } - - if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST - && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST) - { - i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST; - rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL; - rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx); - rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx); - int masked = 1; - machine_mode mode, wide_mode, nar_mode; - - nar_mode = V4SFmode; - mode = V16SFmode; - wide_mode = V64SFmode; - fcn_mask = gen_avx5124fmaddps_4fmaddps_mask; - fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz; - - switch (fcode) - { - case IX86_BUILTIN_4FMAPS: - fcn = gen_avx5124fmaddps_4fmaddps; - masked = 0; - goto v4fma_expand; - - case IX86_BUILTIN_4DPWSSD: - nar_mode = V4SImode; - mode = V16SImode; - wide_mode = V64SImode; - fcn = gen_avx5124vnniw_vp4dpwssd; - masked = 0; - goto v4fma_expand; - - case IX86_BUILTIN_4DPWSSDS: - nar_mode = V4SImode; - mode = V16SImode; - wide_mode = V64SImode; - fcn = gen_avx5124vnniw_vp4dpwssds; - masked = 0; - goto v4fma_expand; - - case IX86_BUILTIN_4FNMAPS: - fcn = gen_avx5124fmaddps_4fnmaddps; - masked = 0; - goto v4fma_expand; - - case IX86_BUILTIN_4FNMAPS_MASK: - fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask; - fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz; - goto v4fma_expand; - - case IX86_BUILTIN_4DPWSSD_MASK: - nar_mode = V4SImode; - mode = V16SImode; - wide_mode = V64SImode; - fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask; - fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz; - goto v4fma_expand; - - case IX86_BUILTIN_4DPWSSDS_MASK: - nar_mode = V4SImode; - mode = V16SImode; - wide_mode = V64SImode; - fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask; - fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz; - goto v4fma_expand; - - case IX86_BUILTIN_4FMAPS_MASK: - { - tree args[4]; - rtx ops[4]; - rtx wide_reg; - rtx accum; - rtx addr; - rtx mem; - -v4fma_expand: - wide_reg = gen_reg_rtx (wide_mode); - for (i = 0; i < 4; i++) - { - args[i] = CALL_EXPR_ARG (exp, i); - ops[i] = expand_normal (args[i]); - - emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64), - ops[i]); - } - - accum = expand_normal (CALL_EXPR_ARG (exp, 4)); - accum = force_reg (mode, accum); - - addr = expand_normal (CALL_EXPR_ARG (exp, 5)); - addr = force_reg (Pmode, addr); - - mem = gen_rtx_MEM (nar_mode, addr); - - target = gen_reg_rtx (mode); - - emit_move_insn (target, accum); - - if (! masked) - emit_insn (fcn (target, accum, wide_reg, mem)); - else - { - rtx merge, mask; - merge = expand_normal (CALL_EXPR_ARG (exp, 6)); - - mask = expand_normal (CALL_EXPR_ARG (exp, 7)); - - if (CONST_INT_P (mask)) - mask = fixup_modeless_constant (mask, HImode); - - mask = force_reg (HImode, mask); - - if (GET_MODE (mask) != HImode) - mask = gen_rtx_SUBREG (HImode, mask, 0); - - /* If merge is 0 then we're about to emit z-masked variant. */ - if (const0_operand (merge, mode)) - emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask)); - /* If merge is the same as accum then emit merge-masked variant. */ - else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4)) - { - merge = force_reg (mode, merge); - emit_insn (fcn_mask (target, wide_reg, mem, merge, mask)); - } - /* Merge with something unknown might happen if we z-mask w/ -O0. */ - else - { - target = gen_reg_rtx (mode); - emit_move_insn (target, merge); - emit_insn (fcn_mask (target, wide_reg, mem, target, mask)); - } - } - return target; - } - - case IX86_BUILTIN_4FNMASS: - fcn = gen_avx5124fmaddps_4fnmaddss; - masked = 0; - goto s4fma_expand; - - case IX86_BUILTIN_4FMASS: - fcn = gen_avx5124fmaddps_4fmaddss; - masked = 0; - goto s4fma_expand; - - case IX86_BUILTIN_4FNMASS_MASK: - fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask; - fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz; - goto s4fma_expand; - - case IX86_BUILTIN_4FMASS_MASK: - { - tree args[4]; - rtx ops[4]; - rtx wide_reg; - rtx accum; - rtx addr; - rtx mem; - - fcn_mask = gen_avx5124fmaddps_4fmaddss_mask; - fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz; - -s4fma_expand: - mode = V4SFmode; - wide_reg = gen_reg_rtx (V64SFmode); - for (i = 0; i < 4; i++) - { - rtx tmp; - args[i] = CALL_EXPR_ARG (exp, i); - ops[i] = expand_normal (args[i]); - - tmp = gen_reg_rtx (SFmode); - emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0)); - - emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64), - gen_rtx_SUBREG (V16SFmode, tmp, 0)); - } - - accum = expand_normal (CALL_EXPR_ARG (exp, 4)); - accum = force_reg (V4SFmode, accum); - - addr = expand_normal (CALL_EXPR_ARG (exp, 5)); - addr = force_reg (Pmode, addr); - - mem = gen_rtx_MEM (V4SFmode, addr); - - target = gen_reg_rtx (V4SFmode); - - emit_move_insn (target, accum); - - if (! masked) - emit_insn (fcn (target, accum, wide_reg, mem)); - else - { - rtx merge, mask; - merge = expand_normal (CALL_EXPR_ARG (exp, 6)); - - mask = expand_normal (CALL_EXPR_ARG (exp, 7)); - - if (CONST_INT_P (mask)) - mask = fixup_modeless_constant (mask, QImode); - - mask = force_reg (QImode, mask); - - if (GET_MODE (mask) != QImode) - mask = gen_rtx_SUBREG (QImode, mask, 0); - - /* If merge is 0 then we're about to emit z-masked variant. */ - if (const0_operand (merge, mode)) - emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask)); - /* If merge is the same as accum then emit merge-masked - variant. */ - else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4)) - { - merge = force_reg (mode, merge); - emit_insn (fcn_mask (target, wide_reg, mem, merge, mask)); - } - /* Merge with something unknown might happen if we z-mask - w/ -O0. */ - else - { - target = gen_reg_rtx (mode); - emit_move_insn (target, merge); - emit_insn (fcn_mask (target, wide_reg, mem, target, mask)); - } - } - return target; - } - case IX86_BUILTIN_RDPID: - return ix86_expand_special_args_builtin (bdesc_args + i, exp, - target); - case IX86_BUILTIN_FABSQ: - case IX86_BUILTIN_COPYSIGNQ: - if (!TARGET_SSE) - /* Emit a normal call if SSE isn't available. */ - return expand_call (exp, target, ignore); - /* FALLTHRU */ - default: - return ix86_expand_args_builtin (bdesc_args + i, exp, target); - } - } - - if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST - && fcode <= IX86_BUILTIN__BDESC_COMI_LAST) - { - i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST; - return ix86_expand_sse_comi (bdesc_comi + i, exp, target); - } - - if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST - && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST) - { - i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST; - return ix86_expand_round_builtin (bdesc_round_args + i, exp, target); - } - - if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST - && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST) - { - i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST; - return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target); - } - - if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST - && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST) - { - i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST; - return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target); - } - - if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST - && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST) - { - i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST; - const struct builtin_description *d = bdesc_multi_arg + i; - return ix86_expand_multi_arg_builtin (d->icode, exp, target, - (enum ix86_builtin_func_type) - d->flag, d->comparison); - } - - if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST - && fcode <= IX86_BUILTIN__BDESC_CET_LAST) - { - i = fcode - IX86_BUILTIN__BDESC_CET_FIRST; - return ix86_expand_special_args_builtin (bdesc_cet + i, exp, - target); - } - - if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST - && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST) - { - i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST; - return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp, - target); - } - - gcc_unreachable (); -} - -/* A subroutine of ix86_expand_vector_init_duplicate. Tries to - fill target with val via vec_duplicate. */ - -static bool -ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) -{ - bool ok; - rtx_insn *insn; - rtx dup; - - /* First attempt to recognize VAL as-is. */ - dup = gen_vec_duplicate (mode, val); - insn = emit_insn (gen_rtx_SET (target, dup)); - if (recog_memoized (insn) < 0) - { - rtx_insn *seq; - machine_mode innermode = GET_MODE_INNER (mode); - rtx reg; - - /* If that fails, force VAL into a register. */ - - start_sequence (); - reg = force_reg (innermode, val); - if (GET_MODE (reg) != innermode) - reg = gen_lowpart (innermode, reg); - SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg); - seq = get_insns (); - end_sequence (); - if (seq) - emit_insn_before (seq, insn); - - ok = recog_memoized (insn) >= 0; - gcc_assert (ok); - } - return true; -} - -/* Get a vector mode of the same size as the original but with elements - twice as wide. This is only guaranteed to apply to integral vectors. */ - -static machine_mode -get_mode_wider_vector (machine_mode o) -{ - /* ??? Rely on the ordering that genmodes.c gives to vectors. */ - machine_mode n = GET_MODE_WIDER_MODE (o).require (); - gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2); - gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n)); - return n; -} - -static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d); -static bool expand_vec_perm_1 (struct expand_vec_perm_d *d); - -/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector - with all elements equal to VAR. Return true if successful. */ - -static bool -ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, - rtx target, rtx val) -{ - bool ok; - - switch (mode) - { - case E_V2SImode: - case E_V2SFmode: - if (!mmx_ok) - return false; - /* FALLTHRU */ - - case E_V4DFmode: - case E_V4DImode: - case E_V8SFmode: - case E_V8SImode: - case E_V2DFmode: - case E_V2DImode: - case E_V4SFmode: - case E_V4SImode: - case E_V16SImode: - case E_V8DImode: - case E_V16SFmode: - case E_V8DFmode: - return ix86_vector_duplicate_value (mode, target, val); - - case E_V4HImode: - if (!mmx_ok) - return false; - if (TARGET_SSE || TARGET_3DNOW_A) - { - rtx x; - - val = gen_lowpart (SImode, val); - x = gen_rtx_TRUNCATE (HImode, val); - x = gen_rtx_VEC_DUPLICATE (mode, x); - emit_insn (gen_rtx_SET (target, x)); - return true; - } - goto widen; - - case E_V8QImode: - if (!mmx_ok) - return false; - goto widen; - - case E_V8HImode: - if (TARGET_AVX2) - return ix86_vector_duplicate_value (mode, target, val); - - if (TARGET_SSE2) - { - struct expand_vec_perm_d dperm; - rtx tmp1, tmp2; - - permute: - memset (&dperm, 0, sizeof (dperm)); - dperm.target = target; - dperm.vmode = mode; - dperm.nelt = GET_MODE_NUNITS (mode); - dperm.op0 = dperm.op1 = gen_reg_rtx (mode); - dperm.one_operand_p = true; - - /* Extend to SImode using a paradoxical SUBREG. */ - tmp1 = gen_reg_rtx (SImode); - emit_move_insn (tmp1, gen_lowpart (SImode, val)); - - /* Insert the SImode value as low element of a V4SImode vector. */ - tmp2 = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); - emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2)); - - ok = (expand_vec_perm_1 (&dperm) - || expand_vec_perm_broadcast_1 (&dperm)); - gcc_assert (ok); - return ok; - } - goto widen; - - case E_V16QImode: - if (TARGET_AVX2) - return ix86_vector_duplicate_value (mode, target, val); - - if (TARGET_SSE2) - goto permute; - goto widen; - - widen: - /* Replicate the value once into the next wider mode and recurse. */ - { - machine_mode smode, wsmode, wvmode; - rtx x; - - smode = GET_MODE_INNER (mode); - wvmode = get_mode_wider_vector (mode); - wsmode = GET_MODE_INNER (wvmode); - - val = convert_modes (wsmode, smode, val, true); - x = expand_simple_binop (wsmode, ASHIFT, val, - GEN_INT (GET_MODE_BITSIZE (smode)), - NULL_RTX, 1, OPTAB_LIB_WIDEN); - val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); - - x = gen_reg_rtx (wvmode); - ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val); - gcc_assert (ok); - emit_move_insn (target, gen_lowpart (GET_MODE (target), x)); - return ok; - } - - case E_V16HImode: - case E_V32QImode: - if (TARGET_AVX2) - return ix86_vector_duplicate_value (mode, target, val); - else - { - machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); - rtx x = gen_reg_rtx (hvmode); - - ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); - gcc_assert (ok); - - x = gen_rtx_VEC_CONCAT (mode, x, x); - emit_insn (gen_rtx_SET (target, x)); - } - return true; - - case E_V64QImode: - case E_V32HImode: - if (TARGET_AVX512BW) - return ix86_vector_duplicate_value (mode, target, val); - else - { - machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode); - rtx x = gen_reg_rtx (hvmode); - - ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); - gcc_assert (ok); - - x = gen_rtx_VEC_CONCAT (mode, x, x); - emit_insn (gen_rtx_SET (target, x)); - } - return true; - - default: - return false; - } -} - -/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector - whose ONE_VAR element is VAR, and other elements are zero. Return true - if successful. */ - -static bool -ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, - rtx target, rtx var, int one_var) -{ - machine_mode vsimode; - rtx new_target; - rtx x, tmp; - bool use_vector_set = false; - rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; - - switch (mode) - { - case E_V2DImode: - /* For SSE4.1, we normally use vector set. But if the second - element is zero and inter-unit moves are OK, we use movq - instead. */ - use_vector_set = (TARGET_64BIT && TARGET_SSE4_1 - && !(TARGET_INTER_UNIT_MOVES_TO_VEC - && one_var == 0)); - break; - case E_V16QImode: - case E_V4SImode: - case E_V4SFmode: - use_vector_set = TARGET_SSE4_1; - break; - case E_V8HImode: - use_vector_set = TARGET_SSE2; - break; - case E_V8QImode: - use_vector_set = TARGET_MMX_WITH_SSE && TARGET_SSE4_1; - break; - case E_V4HImode: - use_vector_set = TARGET_SSE || TARGET_3DNOW_A; - break; - case E_V32QImode: - case E_V16HImode: - use_vector_set = TARGET_AVX; - break; - case E_V8SImode: - use_vector_set = TARGET_AVX; - gen_vec_set_0 = gen_vec_setv8si_0; - break; - case E_V8SFmode: - use_vector_set = TARGET_AVX; - gen_vec_set_0 = gen_vec_setv8sf_0; - break; - case E_V4DFmode: - use_vector_set = TARGET_AVX; - gen_vec_set_0 = gen_vec_setv4df_0; - break; - case E_V4DImode: - /* Use ix86_expand_vector_set in 64bit mode only. */ - use_vector_set = TARGET_AVX && TARGET_64BIT; - gen_vec_set_0 = gen_vec_setv4di_0; - break; - case E_V16SImode: - use_vector_set = TARGET_AVX512F && one_var == 0; - gen_vec_set_0 = gen_vec_setv16si_0; - break; - case E_V16SFmode: - use_vector_set = TARGET_AVX512F && one_var == 0; - gen_vec_set_0 = gen_vec_setv16sf_0; - break; - case E_V8DFmode: - use_vector_set = TARGET_AVX512F && one_var == 0; - gen_vec_set_0 = gen_vec_setv8df_0; - break; - case E_V8DImode: - /* Use ix86_expand_vector_set in 64bit mode only. */ - use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0; - gen_vec_set_0 = gen_vec_setv8di_0; - break; - default: - break; - } - - if (use_vector_set) - { - if (gen_vec_set_0 && one_var == 0) - { - var = force_reg (GET_MODE_INNER (mode), var); - emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var)); - return true; - } - emit_insn (gen_rtx_SET (target, CONST0_RTX (mode))); - var = force_reg (GET_MODE_INNER (mode), var); - ix86_expand_vector_set (mmx_ok, target, var, one_var); - return true; - } - - switch (mode) - { - case E_V2SFmode: - case E_V2SImode: - if (!mmx_ok) - return false; - /* FALLTHRU */ - - case E_V2DFmode: - case E_V2DImode: - if (one_var != 0) - return false; - var = force_reg (GET_MODE_INNER (mode), var); - x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); - emit_insn (gen_rtx_SET (target, x)); - return true; - - case E_V4SFmode: - case E_V4SImode: - if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) - new_target = gen_reg_rtx (mode); - else - new_target = target; - var = force_reg (GET_MODE_INNER (mode), var); - x = gen_rtx_VEC_DUPLICATE (mode, var); - x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); - emit_insn (gen_rtx_SET (new_target, x)); - if (one_var != 0) - { - /* We need to shuffle the value to the correct position, so - create a new pseudo to store the intermediate result. */ - - /* With SSE2, we can use the integer shuffle insns. */ - if (mode != V4SFmode && TARGET_SSE2) - { - emit_insn (gen_sse2_pshufd_1 (new_target, new_target, - const1_rtx, - GEN_INT (one_var == 1 ? 0 : 1), - GEN_INT (one_var == 2 ? 0 : 1), - GEN_INT (one_var == 3 ? 0 : 1))); - if (target != new_target) - emit_move_insn (target, new_target); - return true; - } - - /* Otherwise convert the intermediate result to V4SFmode and - use the SSE1 shuffle instructions. */ - if (mode != V4SFmode) - { - tmp = gen_reg_rtx (V4SFmode); - emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); - } - else - tmp = new_target; - - emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, - const1_rtx, - GEN_INT (one_var == 1 ? 0 : 1), - GEN_INT (one_var == 2 ? 0+4 : 1+4), - GEN_INT (one_var == 3 ? 0+4 : 1+4))); - - if (mode != V4SFmode) - emit_move_insn (target, gen_lowpart (V4SImode, tmp)); - else if (tmp != target) - emit_move_insn (target, tmp); - } - else if (target != new_target) - emit_move_insn (target, new_target); - return true; - - case E_V8HImode: - case E_V16QImode: - vsimode = V4SImode; - goto widen; - case E_V4HImode: - case E_V8QImode: - if (!mmx_ok) - return false; - vsimode = V2SImode; - goto widen; - widen: - if (one_var != 0) - return false; - - /* Zero extend the variable element to SImode and recurse. */ - var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); - - x = gen_reg_rtx (vsimode); - if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, - var, one_var)) - gcc_unreachable (); - - emit_move_insn (target, gen_lowpart (mode, x)); - return true; - - default: - return false; - } -} - -/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector - consisting of the values in VALS. It is known that all elements - except ONE_VAR are constants. Return true if successful. */ - -static bool -ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, - rtx target, rtx vals, int one_var) -{ - rtx var = XVECEXP (vals, 0, one_var); - machine_mode wmode; - rtx const_vec, x; - - const_vec = copy_rtx (vals); - XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); - const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); - - switch (mode) - { - case E_V2DFmode: - case E_V2DImode: - case E_V2SFmode: - case E_V2SImode: - /* For the two element vectors, it's just as easy to use - the general case. */ - return false; - - case E_V4DImode: - /* Use ix86_expand_vector_set in 64bit mode only. */ - if (!TARGET_64BIT) - return false; - /* FALLTHRU */ - case E_V4DFmode: - case E_V8SFmode: - case E_V8SImode: - case E_V16HImode: - case E_V32QImode: - case E_V4SFmode: - case E_V4SImode: - case E_V8HImode: - case E_V4HImode: - break; - - case E_V16QImode: - if (TARGET_SSE4_1) - break; - wmode = V8HImode; - goto widen; - case E_V8QImode: - if (TARGET_MMX_WITH_SSE && TARGET_SSE4_1) - break; - wmode = V4HImode; - goto widen; - widen: - /* There's no way to set one QImode entry easily. Combine - the variable value with its adjacent constant value, and - promote to an HImode set. */ - x = XVECEXP (vals, 0, one_var ^ 1); - if (one_var & 1) - { - var = convert_modes (HImode, QImode, var, true); - var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), - NULL_RTX, 1, OPTAB_LIB_WIDEN); - x = GEN_INT (INTVAL (x) & 0xff); - } - else - { - var = convert_modes (HImode, QImode, var, true); - x = gen_int_mode (UINTVAL (x) << 8, HImode); - } - if (x != const0_rtx) - var = expand_simple_binop (HImode, IOR, var, x, var, - 1, OPTAB_LIB_WIDEN); - - x = gen_reg_rtx (wmode); - emit_move_insn (x, gen_lowpart (wmode, const_vec)); - ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); - - emit_move_insn (target, gen_lowpart (mode, x)); - return true; - - default: - return false; - } - - emit_move_insn (target, const_vec); - ix86_expand_vector_set (mmx_ok, target, var, one_var); - return true; -} - -/* A subroutine of ix86_expand_vector_init_general. Use vector - concatenate to handle the most general case: all values variable, - and none identical. */ - -static void -ix86_expand_vector_init_concat (machine_mode mode, - rtx target, rtx *ops, int n) -{ - machine_mode half_mode = VOIDmode; - rtx half[2]; - rtvec v; - int i, j; - - switch (n) - { - case 2: - switch (mode) - { - case E_V16SImode: - half_mode = V8SImode; - break; - case E_V16SFmode: - half_mode = V8SFmode; - break; - case E_V8DImode: - half_mode = V4DImode; - break; - case E_V8DFmode: - half_mode = V4DFmode; - break; - case E_V8SImode: - half_mode = V4SImode; - break; - case E_V8SFmode: - half_mode = V4SFmode; - break; - case E_V4DImode: - half_mode = V2DImode; - break; - case E_V4DFmode: - half_mode = V2DFmode; - break; - case E_V4SImode: - half_mode = V2SImode; - break; - case E_V4SFmode: - half_mode = V2SFmode; - break; - case E_V2DImode: - half_mode = DImode; - break; - case E_V2SImode: - half_mode = SImode; - break; - case E_V2DFmode: - half_mode = DFmode; - break; - case E_V2SFmode: - half_mode = SFmode; - break; - default: - gcc_unreachable (); - } - - if (!register_operand (ops[1], half_mode)) - ops[1] = force_reg (half_mode, ops[1]); - if (!register_operand (ops[0], half_mode)) - ops[0] = force_reg (half_mode, ops[0]); - emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0], - ops[1]))); - break; - - case 4: - switch (mode) - { - case E_V4DImode: - half_mode = V2DImode; - break; - case E_V4DFmode: - half_mode = V2DFmode; - break; - case E_V4SImode: - half_mode = V2SImode; - break; - case E_V4SFmode: - half_mode = V2SFmode; - break; - default: - gcc_unreachable (); - } - goto half; - - case 8: - switch (mode) - { - case E_V8DImode: - half_mode = V4DImode; - break; - case E_V8DFmode: - half_mode = V4DFmode; - break; - case E_V8SImode: - half_mode = V4SImode; - break; - case E_V8SFmode: - half_mode = V4SFmode; - break; - default: - gcc_unreachable (); - } - goto half; - - case 16: - switch (mode) - { - case E_V16SImode: - half_mode = V8SImode; - break; - case E_V16SFmode: - half_mode = V8SFmode; - break; - default: - gcc_unreachable (); - } - goto half; - -half: - /* FIXME: We process inputs backward to help RA. PR 36222. */ - i = n - 1; - for (j = 1; j != -1; j--) - { - half[j] = gen_reg_rtx (half_mode); - switch (n >> 1) - { - case 2: - v = gen_rtvec (2, ops[i-1], ops[i]); - i -= 2; - break; - case 4: - v = gen_rtvec (4, ops[i-3], ops[i-2], ops[i-1], ops[i]); - i -= 4; - break; - case 8: - v = gen_rtvec (8, ops[i-7], ops[i-6], ops[i-5], ops[i-4], - ops[i-3], ops[i-2], ops[i-1], ops[i]); - i -= 8; - break; - default: - gcc_unreachable (); - } - ix86_expand_vector_init (false, half[j], - gen_rtx_PARALLEL (half_mode, v)); - } - - ix86_expand_vector_init_concat (mode, target, half, 2); - break; - - default: - gcc_unreachable (); - } -} - -/* A subroutine of ix86_expand_vector_init_general. Use vector - interleave to handle the most general case: all values variable, - and none identical. */ - -static void -ix86_expand_vector_init_interleave (machine_mode mode, - rtx target, rtx *ops, int n) -{ - machine_mode first_imode, second_imode, third_imode, inner_mode; - int i, j; - rtx op0, op1; - rtx (*gen_load_even) (rtx, rtx, rtx); - rtx (*gen_interleave_first_low) (rtx, rtx, rtx); - rtx (*gen_interleave_second_low) (rtx, rtx, rtx); - - switch (mode) - { - case E_V8HImode: - gen_load_even = gen_vec_setv8hi; - gen_interleave_first_low = gen_vec_interleave_lowv4si; - gen_interleave_second_low = gen_vec_interleave_lowv2di; - inner_mode = HImode; - first_imode = V4SImode; - second_imode = V2DImode; - third_imode = VOIDmode; - break; - case E_V16QImode: - gen_load_even = gen_vec_setv16qi; - gen_interleave_first_low = gen_vec_interleave_lowv8hi; - gen_interleave_second_low = gen_vec_interleave_lowv4si; - inner_mode = QImode; - first_imode = V8HImode; - second_imode = V4SImode; - third_imode = V2DImode; - break; - default: - gcc_unreachable (); - } - - for (i = 0; i < n; i++) - { - /* Extend the odd elment to SImode using a paradoxical SUBREG. */ - op0 = gen_reg_rtx (SImode); - emit_move_insn (op0, gen_lowpart (SImode, ops [i + i])); - - /* Insert the SImode value as low element of V4SImode vector. */ - op1 = gen_reg_rtx (V4SImode); - op0 = gen_rtx_VEC_MERGE (V4SImode, - gen_rtx_VEC_DUPLICATE (V4SImode, - op0), - CONST0_RTX (V4SImode), - const1_rtx); - emit_insn (gen_rtx_SET (op1, op0)); - - /* Cast the V4SImode vector back to a vector in orignal mode. */ - op0 = gen_reg_rtx (mode); - emit_move_insn (op0, gen_lowpart (mode, op1)); - - /* Load even elements into the second position. */ - emit_insn (gen_load_even (op0, - force_reg (inner_mode, - ops [i + i + 1]), - const1_rtx)); - - /* Cast vector to FIRST_IMODE vector. */ - ops[i] = gen_reg_rtx (first_imode); - emit_move_insn (ops[i], gen_lowpart (first_imode, op0)); - } - - /* Interleave low FIRST_IMODE vectors. */ - for (i = j = 0; i < n; i += 2, j++) - { - op0 = gen_reg_rtx (first_imode); - emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1])); - - /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */ - ops[j] = gen_reg_rtx (second_imode); - emit_move_insn (ops[j], gen_lowpart (second_imode, op0)); - } - - /* Interleave low SECOND_IMODE vectors. */ - switch (second_imode) - { - case E_V4SImode: - for (i = j = 0; i < n / 2; i += 2, j++) - { - op0 = gen_reg_rtx (second_imode); - emit_insn (gen_interleave_second_low (op0, ops[i], - ops[i + 1])); - - /* Cast the SECOND_IMODE vector to the THIRD_IMODE - vector. */ - ops[j] = gen_reg_rtx (third_imode); - emit_move_insn (ops[j], gen_lowpart (third_imode, op0)); - } - second_imode = V2DImode; - gen_interleave_second_low = gen_vec_interleave_lowv2di; - /* FALLTHRU */ - - case E_V2DImode: - op0 = gen_reg_rtx (second_imode); - emit_insn (gen_interleave_second_low (op0, ops[0], - ops[1])); - - /* Cast the SECOND_IMODE vector back to a vector on original - mode. */ - emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0))); - break; - - default: - gcc_unreachable (); - } -} - -/* A subroutine of ix86_expand_vector_init. Handle the most general case: - all values variable, and none identical. */ - -static void -ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, - rtx target, rtx vals) -{ - rtx ops[64], op0, op1, op2, op3, op4, op5; - machine_mode half_mode = VOIDmode; - machine_mode quarter_mode = VOIDmode; - int n, i; - - switch (mode) - { - case E_V2SFmode: - case E_V2SImode: - if (!mmx_ok && !TARGET_SSE) - break; - /* FALLTHRU */ - - case E_V16SImode: - case E_V16SFmode: - case E_V8DFmode: - case E_V8DImode: - case E_V8SFmode: - case E_V8SImode: - case E_V4DFmode: - case E_V4DImode: - case E_V4SFmode: - case E_V4SImode: - case E_V2DFmode: - case E_V2DImode: - n = GET_MODE_NUNITS (mode); - for (i = 0; i < n; i++) - ops[i] = XVECEXP (vals, 0, i); - ix86_expand_vector_init_concat (mode, target, ops, n); - return; - - case E_V2TImode: - for (i = 0; i < 2; i++) - ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i)); - op0 = gen_reg_rtx (V4DImode); - ix86_expand_vector_init_concat (V4DImode, op0, ops, 2); - emit_move_insn (target, gen_lowpart (GET_MODE (target), op0)); - return; - - case E_V4TImode: - for (i = 0; i < 4; i++) - ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i)); - ops[4] = gen_reg_rtx (V4DImode); - ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2); - ops[5] = gen_reg_rtx (V4DImode); - ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2); - op0 = gen_reg_rtx (V8DImode); - ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2); - emit_move_insn (target, gen_lowpart (GET_MODE (target), op0)); - return; - - case E_V32QImode: - half_mode = V16QImode; - goto half; - - case E_V16HImode: - half_mode = V8HImode; - goto half; - -half: - n = GET_MODE_NUNITS (mode); - for (i = 0; i < n; i++) - ops[i] = XVECEXP (vals, 0, i); - op0 = gen_reg_rtx (half_mode); - op1 = gen_reg_rtx (half_mode); - ix86_expand_vector_init_interleave (half_mode, op0, ops, - n >> 2); - ix86_expand_vector_init_interleave (half_mode, op1, - &ops [n >> 1], n >> 2); - emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1))); - return; - - case E_V64QImode: - quarter_mode = V16QImode; - half_mode = V32QImode; - goto quarter; - - case E_V32HImode: - quarter_mode = V8HImode; - half_mode = V16HImode; - goto quarter; - -quarter: - n = GET_MODE_NUNITS (mode); - for (i = 0; i < n; i++) - ops[i] = XVECEXP (vals, 0, i); - op0 = gen_reg_rtx (quarter_mode); - op1 = gen_reg_rtx (quarter_mode); - op2 = gen_reg_rtx (quarter_mode); - op3 = gen_reg_rtx (quarter_mode); - op4 = gen_reg_rtx (half_mode); - op5 = gen_reg_rtx (half_mode); - ix86_expand_vector_init_interleave (quarter_mode, op0, ops, - n >> 3); - ix86_expand_vector_init_interleave (quarter_mode, op1, - &ops [n >> 2], n >> 3); - ix86_expand_vector_init_interleave (quarter_mode, op2, - &ops [n >> 1], n >> 3); - ix86_expand_vector_init_interleave (quarter_mode, op3, - &ops [(n >> 1) | (n >> 2)], n >> 3); - emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1))); - emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3))); - emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5))); - return; - - case E_V16QImode: - if (!TARGET_SSE4_1) - break; - /* FALLTHRU */ - - case E_V8HImode: - if (!TARGET_SSE2) - break; - - /* Don't use ix86_expand_vector_init_interleave if we can't - move from GPR to SSE register directly. */ - if (!TARGET_INTER_UNIT_MOVES_TO_VEC) - break; - - n = GET_MODE_NUNITS (mode); - for (i = 0; i < n; i++) - ops[i] = XVECEXP (vals, 0, i); - ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); - return; - - case E_V4HImode: - case E_V8QImode: - break; - - default: - gcc_unreachable (); - } - - { - int i, j, n_elts, n_words, n_elt_per_word; - machine_mode inner_mode; - rtx words[4], shift; - - inner_mode = GET_MODE_INNER (mode); - n_elts = GET_MODE_NUNITS (mode); - n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; - n_elt_per_word = n_elts / n_words; - shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); - - for (i = 0; i < n_words; ++i) - { - rtx word = NULL_RTX; - - for (j = 0; j < n_elt_per_word; ++j) - { - rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); - elt = convert_modes (word_mode, inner_mode, elt, true); - - if (j == 0) - word = elt; - else - { - word = expand_simple_binop (word_mode, ASHIFT, word, shift, - word, 1, OPTAB_LIB_WIDEN); - word = expand_simple_binop (word_mode, IOR, word, elt, - word, 1, OPTAB_LIB_WIDEN); - } - } - - words[i] = word; - } - - if (n_words == 1) - emit_move_insn (target, gen_lowpart (mode, words[0])); - else if (n_words == 2) - { - rtx tmp = gen_reg_rtx (mode); - emit_clobber (tmp); - emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); - emit_move_insn (gen_highpart (word_mode, tmp), words[1]); - emit_move_insn (target, tmp); - } - else if (n_words == 4) - { - rtx tmp = gen_reg_rtx (V4SImode); - gcc_assert (word_mode == SImode); - vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); - ix86_expand_vector_init_general (false, V4SImode, tmp, vals); - emit_move_insn (target, gen_lowpart (mode, tmp)); - } - else - gcc_unreachable (); - } -} - -/* Initialize vector TARGET via VALS. Suppress the use of MMX - instructions unless MMX_OK is true. */ - -void -ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) -{ - machine_mode mode = GET_MODE (target); - machine_mode inner_mode = GET_MODE_INNER (mode); - int n_elts = GET_MODE_NUNITS (mode); - int n_var = 0, one_var = -1; - bool all_same = true, all_const_zero = true; - int i; - rtx x; - - /* Handle first initialization from vector elts. */ - if (n_elts != XVECLEN (vals, 0)) - { - rtx subtarget = target; - x = XVECEXP (vals, 0, 0); - gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode); - if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts) - { - rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; - if (inner_mode == QImode || inner_mode == HImode) - { - unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); - mode = mode_for_vector (SImode, n_bits / 4).require (); - inner_mode = mode_for_vector (SImode, n_bits / 8).require (); - ops[0] = gen_lowpart (inner_mode, ops[0]); - ops[1] = gen_lowpart (inner_mode, ops[1]); - subtarget = gen_reg_rtx (mode); - } - ix86_expand_vector_init_concat (mode, subtarget, ops, 2); - if (subtarget != target) - emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget)); - return; - } - gcc_unreachable (); - } - - for (i = 0; i < n_elts; ++i) - { - x = XVECEXP (vals, 0, i); - if (!(CONST_SCALAR_INT_P (x) - || CONST_DOUBLE_P (x) - || CONST_FIXED_P (x))) - n_var++, one_var = i; - else if (x != CONST0_RTX (inner_mode)) - all_const_zero = false; - if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) - all_same = false; - } - - /* Constants are best loaded from the constant pool. */ - if (n_var == 0) - { - emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); - return; - } - - /* If all values are identical, broadcast the value. */ - if (all_same - && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, - XVECEXP (vals, 0, 0))) - return; - - /* Values where only one field is non-constant are best loaded from - the pool and overwritten via move later. */ - if (n_var == 1) - { - if (all_const_zero - && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, - XVECEXP (vals, 0, one_var), - one_var)) - return; - - if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) - return; - } - - ix86_expand_vector_init_general (mmx_ok, mode, target, vals); -} - -void -ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) -{ - machine_mode mode = GET_MODE (target); - machine_mode inner_mode = GET_MODE_INNER (mode); - machine_mode half_mode; - bool use_vec_merge = false; - rtx tmp; - static rtx (*gen_extract[6][2]) (rtx, rtx) - = { - { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi }, - { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi }, - { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si }, - { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di }, - { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf }, - { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df } - }; - static rtx (*gen_insert[6][2]) (rtx, rtx, rtx) - = { - { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi }, - { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi }, - { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si }, - { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di }, - { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf }, - { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } - }; - int i, j, n; - machine_mode mmode = VOIDmode; - rtx (*gen_blendm) (rtx, rtx, rtx, rtx); - - switch (mode) - { - case E_V2SImode: - use_vec_merge = TARGET_MMX_WITH_SSE && TARGET_SSE4_1; - if (use_vec_merge) - break; - /* FALLTHRU */ - - case E_V2SFmode: - if (mmx_ok) - { - tmp = gen_reg_rtx (GET_MODE_INNER (mode)); - ix86_expand_vector_extract (true, tmp, target, 1 - elt); - if (elt == 0) - tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); - else - tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); - emit_insn (gen_rtx_SET (target, tmp)); - return; - } - break; - - case E_V2DImode: - use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT; - if (use_vec_merge) - break; - - tmp = gen_reg_rtx (GET_MODE_INNER (mode)); - ix86_expand_vector_extract (false, tmp, target, 1 - elt); - if (elt == 0) - tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); - else - tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); - emit_insn (gen_rtx_SET (target, tmp)); - return; - - case E_V2DFmode: - /* NB: For ELT == 0, use standard scalar operation patterns which - preserve the rest of the vector for combiner: - - (vec_merge:V2DF - (vec_duplicate:V2DF (reg:DF)) - (reg:V2DF) - (const_int 1)) - */ - if (elt == 0) - goto do_vec_merge; - - { - rtx op0, op1; - - /* For the two element vectors, we implement a VEC_CONCAT with - the extraction of the other element. */ - - tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); - tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); - - if (elt == 0) - op0 = val, op1 = tmp; - else - op0 = tmp, op1 = val; - - tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); - emit_insn (gen_rtx_SET (target, tmp)); - } - return; - - case E_V4SFmode: - use_vec_merge = TARGET_SSE4_1; - if (use_vec_merge) - break; - - switch (elt) - { - case 0: - use_vec_merge = true; - break; - - case 1: - /* tmp = target = A B C D */ - tmp = copy_to_reg (target); - /* target = A A B B */ - emit_insn (gen_vec_interleave_lowv4sf (target, target, target)); - /* target = X A B B */ - ix86_expand_vector_set (false, target, val, 0); - /* target = A X C D */ - emit_insn (gen_sse_shufps_v4sf (target, target, tmp, - const1_rtx, const0_rtx, - GEN_INT (2+4), GEN_INT (3+4))); - return; - - case 2: - /* tmp = target = A B C D */ - tmp = copy_to_reg (target); - /* tmp = X B C D */ - ix86_expand_vector_set (false, tmp, val, 0); - /* target = A B X D */ - emit_insn (gen_sse_shufps_v4sf (target, target, tmp, - const0_rtx, const1_rtx, - GEN_INT (0+4), GEN_INT (3+4))); - return; - - case 3: - /* tmp = target = A B C D */ - tmp = copy_to_reg (target); - /* tmp = X B C D */ - ix86_expand_vector_set (false, tmp, val, 0); - /* target = A B X D */ - emit_insn (gen_sse_shufps_v4sf (target, target, tmp, - const0_rtx, const1_rtx, - GEN_INT (2+4), GEN_INT (0+4))); - return; - - default: - gcc_unreachable (); - } - break; - - case E_V4SImode: - use_vec_merge = TARGET_SSE4_1; - if (use_vec_merge) - break; - - /* Element 0 handled by vec_merge below. */ - if (elt == 0) - { - use_vec_merge = true; - break; - } - - if (TARGET_SSE2) - { - /* With SSE2, use integer shuffles to swap element 0 and ELT, - store into element 0, then shuffle them back. */ - - rtx order[4]; - - order[0] = GEN_INT (elt); - order[1] = const1_rtx; - order[2] = const2_rtx; - order[3] = GEN_INT (3); - order[elt] = const0_rtx; - - emit_insn (gen_sse2_pshufd_1 (target, target, order[0], - order[1], order[2], order[3])); - - ix86_expand_vector_set (false, target, val, 0); - - emit_insn (gen_sse2_pshufd_1 (target, target, order[0], - order[1], order[2], order[3])); - } - else - { - /* For SSE1, we have to reuse the V4SF code. */ - rtx t = gen_reg_rtx (V4SFmode); - emit_move_insn (t, gen_lowpart (V4SFmode, target)); - ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt); - emit_move_insn (target, gen_lowpart (mode, t)); - } - return; - - case E_V8HImode: - use_vec_merge = TARGET_SSE2; - break; - case E_V4HImode: - use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); - break; - - case E_V16QImode: - use_vec_merge = TARGET_SSE4_1; - break; - - case E_V8QImode: - use_vec_merge = TARGET_MMX_WITH_SSE && TARGET_SSE4_1; - break; - - case E_V32QImode: - half_mode = V16QImode; - j = 0; - n = 16; - goto half; - - case E_V16HImode: - half_mode = V8HImode; - j = 1; - n = 8; - goto half; - - case E_V8SImode: - half_mode = V4SImode; - j = 2; - n = 4; - goto half; - - case E_V4DImode: - half_mode = V2DImode; - j = 3; - n = 2; - goto half; - - case E_V8SFmode: - half_mode = V4SFmode; - j = 4; - n = 4; - goto half; - - case E_V4DFmode: - half_mode = V2DFmode; - j = 5; - n = 2; - goto half; - -half: - /* Compute offset. */ - i = elt / n; - elt %= n; - - gcc_assert (i <= 1); - - /* Extract the half. */ - tmp = gen_reg_rtx (half_mode); - emit_insn (gen_extract[j][i] (tmp, target)); - - /* Put val in tmp at elt. */ - ix86_expand_vector_set (false, tmp, val, elt); - - /* Put it back. */ - emit_insn (gen_insert[j][i] (target, target, tmp)); - return; - - case E_V8DFmode: - if (TARGET_AVX512F) - { - mmode = QImode; - gen_blendm = gen_avx512f_blendmv8df; - } - break; - - case E_V8DImode: - if (TARGET_AVX512F) - { - mmode = QImode; - gen_blendm = gen_avx512f_blendmv8di; - } - break; - - case E_V16SFmode: - if (TARGET_AVX512F) - { - mmode = HImode; - gen_blendm = gen_avx512f_blendmv16sf; - } - break; - - case E_V16SImode: - if (TARGET_AVX512F) - { - mmode = HImode; - gen_blendm = gen_avx512f_blendmv16si; - } - break; - - case E_V32HImode: - if (TARGET_AVX512BW) - { - mmode = SImode; - gen_blendm = gen_avx512bw_blendmv32hi; - } - else if (TARGET_AVX512F) - { - half_mode = E_V8HImode; - n = 8; - goto quarter; - } - break; - - case E_V64QImode: - if (TARGET_AVX512BW) - { - mmode = DImode; - gen_blendm = gen_avx512bw_blendmv64qi; - } - else if (TARGET_AVX512F) - { - half_mode = E_V16QImode; - n = 16; - goto quarter; - } - break; - -quarter: - /* Compute offset. */ - i = elt / n; - elt %= n; - - gcc_assert (i <= 3); - - { - /* Extract the quarter. */ - tmp = gen_reg_rtx (V4SImode); - rtx tmp2 = gen_lowpart (V16SImode, target); - rtx mask = gen_reg_rtx (QImode); - - emit_move_insn (mask, constm1_rtx); - emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i), - tmp, mask)); - - tmp2 = gen_reg_rtx (half_mode); - emit_move_insn (tmp2, gen_lowpart (half_mode, tmp)); - tmp = tmp2; - - /* Put val in tmp at elt. */ - ix86_expand_vector_set (false, tmp, val, elt); - - /* Put it back. */ - tmp2 = gen_reg_rtx (V16SImode); - rtx tmp3 = gen_lowpart (V16SImode, target); - mask = gen_reg_rtx (HImode); - emit_move_insn (mask, constm1_rtx); - tmp = gen_lowpart (V4SImode, tmp); - emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i), - tmp3, mask)); - emit_move_insn (target, gen_lowpart (mode, tmp2)); - } - return; - - default: - break; - } - - if (mmode != VOIDmode) - { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val))); - /* The avx512*_blendm expanders have different operand order - from VEC_MERGE. In VEC_MERGE, the first input operand is used for - elements where the mask is set and second input operand otherwise, - in {sse,avx}*_*blend* the first input operand is used for elements - where the mask is clear and second input operand otherwise. */ - emit_insn (gen_blendm (target, target, tmp, - force_reg (mmode, - gen_int_mode (HOST_WIDE_INT_1U << elt, - mmode)))); - } - else if (use_vec_merge) - { -do_vec_merge: - tmp[...] [diff truncated at 524288 bytes]