public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-3346] Remove accidential commits
@ 2022-10-17 23:36 Jeff Law
0 siblings, 0 replies; only message in thread
From: Jeff Law @ 2022-10-17 23:36 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:f6e93b7b48195037d6c545104c952b97e05ad381
commit r13-3346-gf6e93b7b48195037d6c545104c952b97e05ad381
Author: Jeff Law <jlaw@ventanamicro.com>
Date: Mon Oct 17 17:33:52 2022 -0600
Remove accidential commits
gcc/
* config/i386/cet.c: Remove accidental commit.
* config/i386/driver-mingw32.c: Likewise.
* config/i386/i386-builtins.c: Likewise.
* config/i386/i386-d.c: Likewise.
* config/i386/i386-expand.c: Likewise.
* config/i386/i386-features.c: Likewise.
* config/i386/i386-options.c: Likewise.
* config/i386/t-cet: Likewise.
* config/i386/x86-tune-sched-atom.c: Likewise.
* config/i386/x86-tune-sched-bd.c: Likewise.
* config/i386/x86-tune-sched-core.c: Likewise.
* config/i386/x86-tune-sched.c: Likewise.
Diff:
---
gcc/config/i386/cet.c | 76 -
gcc/config/i386/driver-mingw32.c | 28 -
gcc/config/i386/i386-builtins.c | 2546 ----
gcc/config/i386/i386-d.c | 44 -
gcc/config/i386/i386-expand.c | 20310 --------------------------------
gcc/config/i386/i386-features.c | 2884 -----
gcc/config/i386/i386-options.c | 3799 ------
gcc/config/i386/t-cet | 21 -
gcc/config/i386/x86-tune-sched-atom.c | 246 -
gcc/config/i386/x86-tune-sched-bd.c | 824 --
gcc/config/i386/x86-tune-sched-core.c | 257 -
gcc/config/i386/x86-tune-sched.c | 636 -
12 files changed, 31671 deletions(-)
diff --git a/gcc/config/i386/cet.c b/gcc/config/i386/cet.c
deleted file mode 100644
index 5450ac307d5..00000000000
--- a/gcc/config/i386/cet.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Functions for CET/x86.
- Copyright (C) 2017-2020 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "tm.h"
-#include "output.h"
-#include "linux-common.h"
-
-void
-file_end_indicate_exec_stack_and_cet (void)
-{
- file_end_indicate_exec_stack ();
-
- if (flag_cf_protection == CF_NONE)
- return;
-
- unsigned int feature_1 = 0;
-
- if (flag_cf_protection & CF_BRANCH)
- /* GNU_PROPERTY_X86_FEATURE_1_IBT. */
- feature_1 |= 0x1;
-
- if (flag_cf_protection & CF_RETURN)
- /* GNU_PROPERTY_X86_FEATURE_1_SHSTK. */
- feature_1 |= 0x2;
-
- if (feature_1)
- {
- int p2align = ptr_mode == SImode ? 2 : 3;
-
- /* Generate GNU_PROPERTY_X86_FEATURE_1_XXX. */
- switch_to_section (get_section (".note.gnu.property",
- SECTION_NOTYPE, NULL));
-
- ASM_OUTPUT_ALIGN (asm_out_file, p2align);
- /* name length. */
- fprintf (asm_out_file, ASM_LONG " 1f - 0f\n");
- /* data length. */
- fprintf (asm_out_file, ASM_LONG " 4f - 1f\n");
- /* note type: NT_GNU_PROPERTY_TYPE_0. */
- fprintf (asm_out_file, ASM_LONG " 5\n");
- fprintf (asm_out_file, "0:\n");
- /* vendor name: "GNU". */
- fprintf (asm_out_file, STRING_ASM_OP " \"GNU\"\n");
- fprintf (asm_out_file, "1:\n");
- ASM_OUTPUT_ALIGN (asm_out_file, p2align);
- /* pr_type: GNU_PROPERTY_X86_FEATURE_1_AND. */
- fprintf (asm_out_file, ASM_LONG " 0xc0000002\n");
- /* pr_datasz. */\
- fprintf (asm_out_file, ASM_LONG " 3f - 2f\n");
- fprintf (asm_out_file, "2:\n");
- /* GNU_PROPERTY_X86_FEATURE_1_XXX. */
- fprintf (asm_out_file, ASM_LONG " 0x%x\n", feature_1);
- fprintf (asm_out_file, "3:\n");
- ASM_OUTPUT_ALIGN (asm_out_file, p2align);
- fprintf (asm_out_file, "4:\n");
- }
-}
diff --git a/gcc/config/i386/driver-mingw32.c b/gcc/config/i386/driver-mingw32.c
deleted file mode 100644
index d0517e6759d..00000000000
--- a/gcc/config/i386/driver-mingw32.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Host OS specific configuration for the gcc driver.
- Copyright (C) 2017-2020 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#define IN_TARGET_CODE 1
-
-#include "config.h"
-
-/* When defined, force the use (if non null) or not (otherwise) of CLI
- globbing. */
-#ifdef MINGW_DOWILDCARD
-int _dowildcard = MINGW_DOWILDCARD;
-#endif
diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c
deleted file mode 100644
index be3ed0158f2..00000000000
--- a/gcc/config/i386/i386-builtins.c
+++ /dev/null
@@ -1,2546 +0,0 @@
-/* Copyright (C) 1988-2020 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#define IN_TARGET_CODE 1
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "backend.h"
-#include "rtl.h"
-#include "tree.h"
-#include "memmodel.h"
-#include "gimple.h"
-#include "cfghooks.h"
-#include "cfgloop.h"
-#include "df.h"
-#include "tm_p.h"
-#include "stringpool.h"
-#include "expmed.h"
-#include "optabs.h"
-#include "regs.h"
-#include "emit-rtl.h"
-#include "recog.h"
-#include "cgraph.h"
-#include "diagnostic.h"
-#include "cfgbuild.h"
-#include "alias.h"
-#include "fold-const.h"
-#include "attribs.h"
-#include "calls.h"
-#include "stor-layout.h"
-#include "varasm.h"
-#include "output.h"
-#include "insn-attr.h"
-#include "flags.h"
-#include "except.h"
-#include "explow.h"
-#include "expr.h"
-#include "cfgrtl.h"
-#include "common/common-target.h"
-#include "langhooks.h"
-#include "reload.h"
-#include "gimplify.h"
-#include "dwarf2.h"
-#include "tm-constrs.h"
-#include "cselib.h"
-#include "sched-int.h"
-#include "opts.h"
-#include "tree-pass.h"
-#include "context.h"
-#include "pass_manager.h"
-#include "target-globals.h"
-#include "gimple-iterator.h"
-#include "tree-vectorizer.h"
-#include "shrink-wrap.h"
-#include "builtins.h"
-#include "rtl-iter.h"
-#include "tree-iterator.h"
-#include "dbgcnt.h"
-#include "case-cfn-macros.h"
-#include "dojump.h"
-#include "fold-const-call.h"
-#include "tree-vrp.h"
-#include "tree-ssanames.h"
-#include "selftest.h"
-#include "selftest-rtl.h"
-#include "print-rtl.h"
-#include "intl.h"
-#include "ifcvt.h"
-#include "symbol-summary.h"
-#include "ipa-prop.h"
-#include "ipa-fnsummary.h"
-#include "wide-int-bitmask.h"
-#include "tree-vector-builder.h"
-#include "debug.h"
-#include "dwarf2out.h"
-#include "i386-builtins.h"
-
-#undef BDESC
-#undef BDESC_FIRST
-#undef BDESC_END
-
-/* Macros for verification of enum ix86_builtins order. */
-#define BDESC_VERIFY(x, y, z) \
- gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
-#define BDESC_VERIFYS(x, y, z) \
- STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
-
-BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
- IX86_BUILTIN__BDESC_COMI_LAST, 1);
-BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
- IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1);
-BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
- IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1);
-BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST,
- IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1);
-BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
- IX86_BUILTIN__BDESC_ARGS_LAST, 1);
-BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
- IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1);
-BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST,
- IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1);
-BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
- IX86_BUILTIN__BDESC_CET_LAST, 1);
-BDESC_VERIFYS (IX86_BUILTIN_MAX,
- IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1);
-
-
-/* Table for the ix86 builtin non-function types. */
-static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
-
-/* Retrieve an element from the above table, building some of
- the types lazily. */
-
-static tree
-ix86_get_builtin_type (enum ix86_builtin_type tcode)
-{
- unsigned int index;
- tree type, itype;
-
- gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
-
- type = ix86_builtin_type_tab[(int) tcode];
- if (type != NULL)
- return type;
-
- gcc_assert (tcode > IX86_BT_LAST_PRIM);
- if (tcode <= IX86_BT_LAST_VECT)
- {
- machine_mode mode;
-
- index = tcode - IX86_BT_LAST_PRIM - 1;
- itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
- mode = ix86_builtin_type_vect_mode[index];
-
- type = build_vector_type_for_mode (itype, mode);
- }
- else
- {
- int quals;
-
- index = tcode - IX86_BT_LAST_VECT - 1;
- if (tcode <= IX86_BT_LAST_PTR)
- quals = TYPE_UNQUALIFIED;
- else
- quals = TYPE_QUAL_CONST;
-
- itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
- if (quals != TYPE_UNQUALIFIED)
- itype = build_qualified_type (itype, quals);
-
- type = build_pointer_type (itype);
- }
-
- ix86_builtin_type_tab[(int) tcode] = type;
- return type;
-}
-
-/* Table for the ix86 builtin function types. */
-static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
-
-/* Retrieve an element from the above table, building some of
- the types lazily. */
-
-static tree
-ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
-{
- tree type;
-
- gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
-
- type = ix86_builtin_func_type_tab[(int) tcode];
- if (type != NULL)
- return type;
-
- if (tcode <= IX86_BT_LAST_FUNC)
- {
- unsigned start = ix86_builtin_func_start[(int) tcode];
- unsigned after = ix86_builtin_func_start[(int) tcode + 1];
- tree rtype, atype, args = void_list_node;
- unsigned i;
-
- rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
- for (i = after - 1; i > start; --i)
- {
- atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
- args = tree_cons (NULL, atype, args);
- }
-
- type = build_function_type (rtype, args);
- }
- else
- {
- unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
- enum ix86_builtin_func_type icode;
-
- icode = ix86_builtin_func_alias_base[index];
- type = ix86_get_builtin_func_type (icode);
- }
-
- ix86_builtin_func_type_tab[(int) tcode] = type;
- return type;
-}
-
-/* Table for the ix86 builtin decls. */
-static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
-
-struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
-
-tree get_ix86_builtin (enum ix86_builtins c)
-{
- return ix86_builtins[c];
-}
-
-/* Bits that can still enable any inclusion of a builtin. */
-HOST_WIDE_INT deferred_isa_values = 0;
-HOST_WIDE_INT deferred_isa_values2 = 0;
-
-/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the
- MASK and MASK2 of which isa_flags and ix86_isa_flags2 to use in the
- ix86_builtins_isa array. Stores the function decl in the ix86_builtins
- array. Returns the function decl or NULL_TREE, if the builtin was not
- added.
-
- If the front end has a special hook for builtin functions, delay adding
- builtin functions that aren't in the current ISA until the ISA is changed
- with function specific optimization. Doing so, can save about 300K for the
- default compiler. When the builtin is expanded, check at that time whether
- it is valid.
-
- If the front end doesn't have a special hook, record all builtins, even if
- it isn't an instruction set in the current ISA in case the user uses
- function specific options for a different ISA, so that we don't get scope
- errors if a builtin is added in the middle of a function scope. */
-
-static inline tree
-def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2,
- const char *name,
- enum ix86_builtin_func_type tcode,
- enum ix86_builtins code)
-{
- tree decl = NULL_TREE;
-
- /* An instruction may be 64bit only regardless of ISAs. */
- if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
- {
- ix86_builtins_isa[(int) code].isa = mask;
- ix86_builtins_isa[(int) code].isa2 = mask2;
-
- mask &= ~OPTION_MASK_ISA_64BIT;
-
- /* Filter out the masks most often ored together with others. */
- if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
- && mask != OPTION_MASK_ISA_AVX512VL)
- mask &= ~OPTION_MASK_ISA_AVX512VL;
- if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512BW)
- && mask != OPTION_MASK_ISA_AVX512BW)
- mask &= ~OPTION_MASK_ISA_AVX512BW;
-
- if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0)
- && (mask == 0 || (mask & ix86_isa_flags) != 0))
- || ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE)
- || (lang_hooks.builtin_function
- == lang_hooks.builtin_function_ext_scope))
- {
- tree type = ix86_get_builtin_func_type (tcode);
- decl = add_builtin_function (name, type, code, BUILT_IN_MD,
- NULL, NULL_TREE);
- ix86_builtins[(int) code] = decl;
- ix86_builtins_isa[(int) code].set_and_not_built_p = false;
- }
- else
- {
- /* Just MASK and MASK2 where set_and_not_built_p == true can potentially
- include a builtin. */
- deferred_isa_values |= mask;
- deferred_isa_values2 |= mask2;
- ix86_builtins[(int) code] = NULL_TREE;
- ix86_builtins_isa[(int) code].tcode = tcode;
- ix86_builtins_isa[(int) code].name = name;
- ix86_builtins_isa[(int) code].const_p = false;
- ix86_builtins_isa[(int) code].pure_p = false;
- ix86_builtins_isa[(int) code].set_and_not_built_p = true;
- }
- }
-
- return decl;
-}
-
-/* Like def_builtin, but also marks the function decl "const". */
-
-static inline tree
-def_builtin_const (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name,
- enum ix86_builtin_func_type tcode, enum ix86_builtins code)
-{
- tree decl = def_builtin (mask, mask2, name, tcode, code);
- if (decl)
- TREE_READONLY (decl) = 1;
- else
- ix86_builtins_isa[(int) code].const_p = true;
-
- return decl;
-}
-
-/* Like def_builtin, but also marks the function decl "pure". */
-
-static inline tree
-def_builtin_pure (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name,
- enum ix86_builtin_func_type tcode, enum ix86_builtins code)
-{
- tree decl = def_builtin (mask, mask2, name, tcode, code);
- if (decl)
- DECL_PURE_P (decl) = 1;
- else
- ix86_builtins_isa[(int) code].pure_p = true;
-
- return decl;
-}
-
-/* Add any new builtin functions for a given ISA that may not have been
- declared. This saves a bit of space compared to adding all of the
- declarations to the tree, even if we didn't use them. */
-
-void
-ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2)
-{
- isa &= ~OPTION_MASK_ISA_64BIT;
-
- if ((isa & deferred_isa_values) == 0
- && (isa2 & deferred_isa_values2) == 0
- && ((deferred_isa_values & OPTION_MASK_ISA_MMX) == 0
- || !(TARGET_64BIT && (isa & OPTION_MASK_ISA_SSE2) != 0)))
- return;
-
- /* Bits in ISA value can be removed from potential isa values. */
- deferred_isa_values &= ~isa;
- deferred_isa_values2 &= ~isa2;
- if (TARGET_64BIT && (isa & OPTION_MASK_ISA_SSE2) != 0)
- deferred_isa_values &= ~OPTION_MASK_ISA_MMX;
-
- int i;
- tree saved_current_target_pragma = current_target_pragma;
- current_target_pragma = NULL_TREE;
-
- for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
- {
- if (((ix86_builtins_isa[i].isa & isa) != 0
- || (ix86_builtins_isa[i].isa2 & isa2) != 0
- || ((ix86_builtins_isa[i].isa & OPTION_MASK_ISA_MMX) != 0
- && TARGET_64BIT
- && (isa & OPTION_MASK_ISA_SSE2) != 0))
- && ix86_builtins_isa[i].set_and_not_built_p)
- {
- tree decl, type;
-
- /* Don't define the builtin again. */
- ix86_builtins_isa[i].set_and_not_built_p = false;
-
- type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
- decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
- type, i, BUILT_IN_MD, NULL,
- NULL_TREE);
-
- ix86_builtins[i] = decl;
- if (ix86_builtins_isa[i].const_p)
- TREE_READONLY (decl) = 1;
- }
- }
-
- current_target_pragma = saved_current_target_pragma;
-}
-\f
-/* TM vector builtins. */
-
-/* Reuse the existing x86-specific `struct builtin_description' cause
- we're lazy. Add casts to make them fit. */
-static const struct builtin_description bdesc_tm[] =
-{
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-
- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
-
- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
-
- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
-};
-
-/* Initialize the transactional memory vector load/store builtins. */
-
-static void
-ix86_init_tm_builtins (void)
-{
- enum ix86_builtin_func_type ftype;
- const struct builtin_description *d;
- size_t i;
- tree decl;
- tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
- tree attrs_log, attrs_type_log;
-
- if (!flag_tm)
- return;
-
- /* If there are no builtins defined, we must be compiling in a
- language without trans-mem support. */
- if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
- return;
-
- /* Use whatever attributes a normal TM load has. */
- decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
- attrs_load = DECL_ATTRIBUTES (decl);
- attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
- /* Use whatever attributes a normal TM store has. */
- decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
- attrs_store = DECL_ATTRIBUTES (decl);
- attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
- /* Use whatever attributes a normal TM log has. */
- decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
- attrs_log = DECL_ATTRIBUTES (decl);
- attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
-
- for (i = 0, d = bdesc_tm;
- i < ARRAY_SIZE (bdesc_tm);
- i++, d++)
- {
- if ((d->mask & ix86_isa_flags) != 0
- || ((d->mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE)
- || (lang_hooks.builtin_function
- == lang_hooks.builtin_function_ext_scope))
- {
- tree type, attrs, attrs_type;
- enum built_in_function code = (enum built_in_function) d->code;
-
- ftype = (enum ix86_builtin_func_type) d->flag;
- type = ix86_get_builtin_func_type (ftype);
-
- if (BUILTIN_TM_LOAD_P (code))
- {
- attrs = attrs_load;
- attrs_type = attrs_type_load;
- }
- else if (BUILTIN_TM_STORE_P (code))
- {
- attrs = attrs_store;
- attrs_type = attrs_type_store;
- }
- else
- {
- attrs = attrs_log;
- attrs_type = attrs_type_log;
- }
- decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
- /* The builtin without the prefix for
- calling it directly. */
- d->name + strlen ("__builtin_"),
- attrs);
- /* add_builtin_function() will set the DECL_ATTRIBUTES, now
- set the TYPE_ATTRIBUTES. */
- decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
-
- set_builtin_decl (code, decl, false);
- }
- }
-}
-
-/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
- in the current target ISA to allow the user to compile particular modules
- with different target specific options that differ from the command line
- options. */
-static void
-ix86_init_mmx_sse_builtins (void)
-{
- const struct builtin_description * d;
- enum ix86_builtin_func_type ftype;
- size_t i;
-
- /* Add all special builtins with variable number of operands. */
- for (i = 0, d = bdesc_special_args;
- i < ARRAY_SIZE (bdesc_special_args);
- i++, d++)
- {
- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i);
- if (d->name == 0)
- continue;
-
- ftype = (enum ix86_builtin_func_type) d->flag;
- def_builtin (d->mask, d->mask2, d->name, ftype, d->code);
- }
- BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST,
- IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST,
- ARRAY_SIZE (bdesc_special_args) - 1);
-
- /* Add all builtins with variable number of operands. */
- for (i = 0, d = bdesc_args;
- i < ARRAY_SIZE (bdesc_args);
- i++, d++)
- {
- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i);
- if (d->name == 0)
- continue;
-
- ftype = (enum ix86_builtin_func_type) d->flag;
- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code);
- }
- BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST,
- IX86_BUILTIN__BDESC_ARGS_FIRST,
- ARRAY_SIZE (bdesc_args) - 1);
-
- /* Add all builtins with rounding. */
- for (i = 0, d = bdesc_round_args;
- i < ARRAY_SIZE (bdesc_round_args);
- i++, d++)
- {
- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i);
- if (d->name == 0)
- continue;
-
- ftype = (enum ix86_builtin_func_type) d->flag;
- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code);
- }
- BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST,
- IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST,
- ARRAY_SIZE (bdesc_round_args) - 1);
-
- /* pcmpestr[im] insns. */
- for (i = 0, d = bdesc_pcmpestr;
- i < ARRAY_SIZE (bdesc_pcmpestr);
- i++, d++)
- {
- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i);
- if (d->code == IX86_BUILTIN_PCMPESTRM128)
- ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
- else
- ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code);
- }
- BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST,
- IX86_BUILTIN__BDESC_PCMPESTR_FIRST,
- ARRAY_SIZE (bdesc_pcmpestr) - 1);
-
- /* pcmpistr[im] insns. */
- for (i = 0, d = bdesc_pcmpistr;
- i < ARRAY_SIZE (bdesc_pcmpistr);
- i++, d++)
- {
- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i);
- if (d->code == IX86_BUILTIN_PCMPISTRM128)
- ftype = V16QI_FTYPE_V16QI_V16QI_INT;
- else
- ftype = INT_FTYPE_V16QI_V16QI_INT;
- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code);
- }
- BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST,
- IX86_BUILTIN__BDESC_PCMPISTR_FIRST,
- ARRAY_SIZE (bdesc_pcmpistr) - 1);
-
- /* comi/ucomi insns. */
- for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
- {
- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i);
- if (d->mask == OPTION_MASK_ISA_SSE2)
- ftype = INT_FTYPE_V2DF_V2DF;
- else
- ftype = INT_FTYPE_V4SF_V4SF;
- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code);
- }
- BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST,
- IX86_BUILTIN__BDESC_COMI_FIRST,
- ARRAY_SIZE (bdesc_comi) - 1);
-
- /* SSE */
- def_builtin (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_ldmxcsr",
- VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
- def_builtin_pure (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_stmxcsr",
- UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
-
- /* SSE or 3DNow!A */
- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
- /* As it uses V4HImode, we have to require -mmmx too. */
- | OPTION_MASK_ISA_MMX, 0,
- "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
- IX86_BUILTIN_MASKMOVQ);
-
- /* SSE2 */
- def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_maskmovdqu",
- VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
-
- def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_clflush",
- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
- x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_mfence",
- VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
-
- /* SSE3. */
- def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_monitor",
- VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
- def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_mwait",
- VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
-
- /* AES */
- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
- "__builtin_ia32_aesenc128",
- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
- "__builtin_ia32_aesenclast128",
- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
- "__builtin_ia32_aesdec128",
- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
- "__builtin_ia32_aesdeclast128",
- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
- "__builtin_ia32_aesimc128",
- V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
- "__builtin_ia32_aeskeygenassist128",
- V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
-
- /* PCLMUL */
- def_builtin_const (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2, 0,
- "__builtin_ia32_pclmulqdq128",
- V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
-
- /* RDRND */
- def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand16_step",
- INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
- def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand32_step",
- INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
- def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, 0,
- "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
- IX86_BUILTIN_RDRAND64_STEP);
-
- /* AVX2 */
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2df",
- V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
- IX86_BUILTIN_GATHERSIV2DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4df",
- V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
- IX86_BUILTIN_GATHERSIV4DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2df",
- V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
- IX86_BUILTIN_GATHERDIV2DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4df",
- V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
- IX86_BUILTIN_GATHERDIV4DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4sf",
- V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
- IX86_BUILTIN_GATHERSIV4SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8sf",
- V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
- IX86_BUILTIN_GATHERSIV8SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf",
- V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
- IX86_BUILTIN_GATHERDIV4SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf256",
- V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
- IX86_BUILTIN_GATHERDIV8SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2di",
- V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
- IX86_BUILTIN_GATHERSIV2DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4di",
- V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
- IX86_BUILTIN_GATHERSIV4DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2di",
- V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
- IX86_BUILTIN_GATHERDIV2DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4di",
- V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
- IX86_BUILTIN_GATHERDIV4DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4si",
- V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
- IX86_BUILTIN_GATHERSIV4SI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8si",
- V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
- IX86_BUILTIN_GATHERSIV8SI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si",
- V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
- IX86_BUILTIN_GATHERDIV4SI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si256",
- V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
- IX86_BUILTIN_GATHERDIV8SI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4df ",
- V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
- IX86_BUILTIN_GATHERALTSIV4DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8sf ",
- V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
- IX86_BUILTIN_GATHERALTDIV8SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4di ",
- V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
- IX86_BUILTIN_GATHERALTSIV4DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8si ",
- V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
- IX86_BUILTIN_GATHERALTDIV8SI);
-
- /* AVX512F */
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16sf",
- V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
- IX86_BUILTIN_GATHER3SIV16SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8df",
- V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
- IX86_BUILTIN_GATHER3SIV8DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16sf",
- V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV16SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8df",
- V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV8DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16si",
- V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
- IX86_BUILTIN_GATHER3SIV16SI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8di",
- V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
- IX86_BUILTIN_GATHER3SIV8DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16si",
- V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV16SI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8di",
- V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV8DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8df ",
- V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
- IX86_BUILTIN_GATHER3ALTSIV8DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16sf ",
- V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
- IX86_BUILTIN_GATHER3ALTDIV16SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8di ",
- V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
- IX86_BUILTIN_GATHER3ALTSIV8DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16si ",
- V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
- IX86_BUILTIN_GATHER3ALTDIV16SI);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16sf",
- VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
- IX86_BUILTIN_SCATTERSIV16SF);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8df",
- VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
- IX86_BUILTIN_SCATTERSIV8DF);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16sf",
- VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
- IX86_BUILTIN_SCATTERDIV16SF);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8df",
- VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
- IX86_BUILTIN_SCATTERDIV8DF);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16si",
- VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
- IX86_BUILTIN_SCATTERSIV16SI);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8di",
- VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
- IX86_BUILTIN_SCATTERSIV8DI);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16si",
- VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
- IX86_BUILTIN_SCATTERDIV16SI);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8di",
- VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
- IX86_BUILTIN_SCATTERDIV8DI);
-
- /* AVX512VL */
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2df",
- V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT,
- IX86_BUILTIN_GATHER3SIV2DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4df",
- V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT,
- IX86_BUILTIN_GATHER3SIV4DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2df",
- V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV2DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4df",
- V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV4DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4sf",
- V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT,
- IX86_BUILTIN_GATHER3SIV4SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8sf",
- V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT,
- IX86_BUILTIN_GATHER3SIV8SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4sf",
- V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV4SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8sf",
- V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV8SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2di",
- V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT,
- IX86_BUILTIN_GATHER3SIV2DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4di",
- V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT,
- IX86_BUILTIN_GATHER3SIV4DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2di",
- V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV2DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4di",
- V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV4DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4si",
- V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT,
- IX86_BUILTIN_GATHER3SIV4SI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8si",
- V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT,
- IX86_BUILTIN_GATHER3SIV8SI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4si",
- V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV4SI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8si",
- V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT,
- IX86_BUILTIN_GATHER3DIV8SI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4df ",
- V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
- IX86_BUILTIN_GATHER3ALTSIV4DF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8sf ",
- V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
- IX86_BUILTIN_GATHER3ALTDIV8SF);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4di ",
- V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
- IX86_BUILTIN_GATHER3ALTSIV4DI);
-
- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8si ",
- V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
- IX86_BUILTIN_GATHER3ALTDIV8SI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8sf",
- VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT,
- IX86_BUILTIN_SCATTERSIV8SF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4sf",
- VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT,
- IX86_BUILTIN_SCATTERSIV4SF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4df",
- VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT,
- IX86_BUILTIN_SCATTERSIV4DF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2df",
- VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT,
- IX86_BUILTIN_SCATTERSIV2DF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8sf",
- VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT,
- IX86_BUILTIN_SCATTERDIV8SF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4sf",
- VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT,
- IX86_BUILTIN_SCATTERDIV4SF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4df",
- VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT,
- IX86_BUILTIN_SCATTERDIV4DF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2df",
- VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT,
- IX86_BUILTIN_SCATTERDIV2DF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8si",
- VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT,
- IX86_BUILTIN_SCATTERSIV8SI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4si",
- VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT,
- IX86_BUILTIN_SCATTERSIV4SI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4di",
- VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT,
- IX86_BUILTIN_SCATTERSIV4DI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2di",
- VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT,
- IX86_BUILTIN_SCATTERSIV2DI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8si",
- VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT,
- IX86_BUILTIN_SCATTERDIV8SI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4si",
- VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT,
- IX86_BUILTIN_SCATTERDIV4SI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4di",
- VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT,
- IX86_BUILTIN_SCATTERDIV4DI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2di",
- VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
- IX86_BUILTIN_SCATTERDIV2DI);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8df ",
- VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
- IX86_BUILTIN_SCATTERALTSIV8DF);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16sf ",
- VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
- IX86_BUILTIN_SCATTERALTDIV16SF);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8di ",
- VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
- IX86_BUILTIN_SCATTERALTSIV8DI);
-
- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16si ",
- VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
- IX86_BUILTIN_SCATTERALTDIV16SI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4df ",
- VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT,
- IX86_BUILTIN_SCATTERALTSIV4DF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8sf ",
- VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT,
- IX86_BUILTIN_SCATTERALTDIV8SF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4di ",
- VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT,
- IX86_BUILTIN_SCATTERALTSIV4DI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8si ",
- VOID_FTYPE_PINT_QI_V4DI_V8SI_INT,
- IX86_BUILTIN_SCATTERALTDIV8SI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2df ",
- VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
- IX86_BUILTIN_SCATTERALTSIV2DF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4sf ",
- VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
- IX86_BUILTIN_SCATTERALTDIV4SF);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2di ",
- VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
- IX86_BUILTIN_SCATTERALTSIV2DI);
-
- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4si ",
- VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
- IX86_BUILTIN_SCATTERALTDIV4SI);
-
- /* AVX512PF */
- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdpd",
- VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
- IX86_BUILTIN_GATHERPFDPD);
- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdps",
- VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
- IX86_BUILTIN_GATHERPFDPS);
- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqpd",
- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
- IX86_BUILTIN_GATHERPFQPD);
- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqps",
- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
- IX86_BUILTIN_GATHERPFQPS);
- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdpd",
- VOID_FTYPE_QI_V8SI_PCVOID_INT_INT,
- IX86_BUILTIN_SCATTERPFDPD);
- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdps",
- VOID_FTYPE_HI_V16SI_PCVOID_INT_INT,
- IX86_BUILTIN_SCATTERPFDPS);
- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqpd",
- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
- IX86_BUILTIN_SCATTERPFQPD);
- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqps",
- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT,
- IX86_BUILTIN_SCATTERPFQPS);
-
- /* SHA */
- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg1",
- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg2",
- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1nexte",
- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1rnds4",
- V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg1",
- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg2",
- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256rnds2",
- V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
-
- /* RTM. */
- def_builtin (OPTION_MASK_ISA_RTM, 0, "__builtin_ia32_xabort",
- VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
-
- /* MMX access to the vec_init patterns. */
- def_builtin_const (OPTION_MASK_ISA_MMX, 0,
- "__builtin_ia32_vec_init_v2si",
- V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
-
- def_builtin_const (OPTION_MASK_ISA_MMX, 0,
- "__builtin_ia32_vec_init_v4hi",
- V4HI_FTYPE_HI_HI_HI_HI,
- IX86_BUILTIN_VEC_INIT_V4HI);
-
- def_builtin_const (OPTION_MASK_ISA_MMX, 0,
- "__builtin_ia32_vec_init_v8qi",
- V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
- IX86_BUILTIN_VEC_INIT_V8QI);
-
- /* Access to the vec_extract patterns. */
- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2df",
- DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2di",
- DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
- def_builtin_const (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_vec_ext_v4sf",
- FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v4si",
- SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v8hi",
- HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
-
- def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
- /* As it uses V4HImode, we have to require -mmmx too. */
- | OPTION_MASK_ISA_MMX, 0,
- "__builtin_ia32_vec_ext_v4hi",
- HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
-
- def_builtin_const (OPTION_MASK_ISA_MMX, 0,
- "__builtin_ia32_vec_ext_v2si",
- SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
-
- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi",
- QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
-
- /* Access to the vec_set patterns. */
- def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, 0,
- "__builtin_ia32_vec_set_v2di",
- V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
-
- def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4sf",
- V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
-
- def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4si",
- V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
-
- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_set_v8hi",
- V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
-
- def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
- /* As it uses V4HImode, we have to require -mmmx too. */
- | OPTION_MASK_ISA_MMX, 0,
- "__builtin_ia32_vec_set_v4hi",
- V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
-
- def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v16qi",
- V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
-
- /* RDSEED */
- def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_hi_step",
- INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
- def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_si_step",
- INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
- def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT, 0,
- "__builtin_ia32_rdseed_di_step",
- INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
-
- /* ADCX */
- def_builtin (0, 0, "__builtin_ia32_addcarryx_u32",
- UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
- def_builtin (OPTION_MASK_ISA_64BIT, 0,
- "__builtin_ia32_addcarryx_u64",
- UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
- IX86_BUILTIN_ADDCARRYX64);
-
- /* SBB */
- def_builtin (0, 0, "__builtin_ia32_sbb_u32",
- UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
- def_builtin (OPTION_MASK_ISA_64BIT, 0,
- "__builtin_ia32_sbb_u64",
- UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
- IX86_BUILTIN_SBB64);
-
- /* Read/write FLAGS. */
- if (TARGET_64BIT)
- {
- def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_readeflags_u64",
- UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
- def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_writeeflags_u64",
- VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
- }
- else
- {
- def_builtin (0, 0, "__builtin_ia32_readeflags_u32",
- UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
- def_builtin (0, 0, "__builtin_ia32_writeeflags_u32",
- VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
- }
-
- /* CLFLUSHOPT. */
- def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, 0, "__builtin_ia32_clflushopt",
- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
-
- /* CLWB. */
- def_builtin (OPTION_MASK_ISA_CLWB, 0, "__builtin_ia32_clwb",
- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
-
- /* MONITORX and MWAITX. */
- def_builtin (0, OPTION_MASK_ISA2_MWAITX, "__builtin_ia32_monitorx",
- VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
- def_builtin (0, OPTION_MASK_ISA2_MWAITX, "__builtin_ia32_mwaitx",
- VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
-
- /* CLZERO. */
- def_builtin (0, OPTION_MASK_ISA2_CLZERO, "__builtin_ia32_clzero",
- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
-
- /* WAITPKG. */
- def_builtin (0, OPTION_MASK_ISA2_WAITPKG, "__builtin_ia32_umonitor",
- VOID_FTYPE_PVOID, IX86_BUILTIN_UMONITOR);
- def_builtin (0, OPTION_MASK_ISA2_WAITPKG, "__builtin_ia32_umwait",
- UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_UMWAIT);
- def_builtin (0, OPTION_MASK_ISA2_WAITPKG, "__builtin_ia32_tpause",
- UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE);
-
- /* CLDEMOTE. */
- def_builtin (0, OPTION_MASK_ISA2_CLDEMOTE, "__builtin_ia32_cldemote",
- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLDEMOTE);
-
- /* Add FMA4 multi-arg argument instructions */
- for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
- {
- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i);
- if (d->name == 0)
- continue;
-
- ftype = (enum ix86_builtin_func_type) d->flag;
- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code);
- }
- BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST,
- IX86_BUILTIN__BDESC_MULTI_ARG_FIRST,
- ARRAY_SIZE (bdesc_multi_arg) - 1);
-
- /* Add CET inrinsics. */
- for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++)
- {
- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i);
- if (d->name == 0)
- continue;
-
- ftype = (enum ix86_builtin_func_type) d->flag;
- def_builtin (d->mask, d->mask2, d->name, ftype, d->code);
- }
- BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST,
- IX86_BUILTIN__BDESC_CET_FIRST,
- ARRAY_SIZE (bdesc_cet) - 1);
-
- for (i = 0, d = bdesc_cet_rdssp;
- i < ARRAY_SIZE (bdesc_cet_rdssp);
- i++, d++)
- {
- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i);
- if (d->name == 0)
- continue;
-
- ftype = (enum ix86_builtin_func_type) d->flag;
- def_builtin (d->mask, d->mask2, d->name, ftype, d->code);
- }
- BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST,
- IX86_BUILTIN__BDESC_CET_NORMAL_FIRST,
- ARRAY_SIZE (bdesc_cet_rdssp) - 1);
-}
-
-#undef BDESC_VERIFY
-#undef BDESC_VERIFYS
-
-/* Make builtins to detect cpu type and features supported. NAME is
- the builtin name, CODE is the builtin code, and FTYPE is the function
- type of the builtin. */
-
-static void
-make_cpu_type_builtin (const char* name, int code,
- enum ix86_builtin_func_type ftype, bool is_const)
-{
- tree decl;
- tree type;
-
- type = ix86_get_builtin_func_type (ftype);
- decl = add_builtin_function (name, type, code, BUILT_IN_MD,
- NULL, NULL_TREE);
- gcc_assert (decl != NULL_TREE);
- ix86_builtins[(int) code] = decl;
- TREE_READONLY (decl) = is_const;
-}
-
-/* Make builtins to get CPU type and features supported. The created
- builtins are :
-
- __builtin_cpu_init (), to detect cpu type and features,
- __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
- __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
- */
-
-static void
-ix86_init_platform_type_builtins (void)
-{
- make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
- INT_FTYPE_VOID, false);
- make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
- INT_FTYPE_PCCHAR, true);
- make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
- INT_FTYPE_PCCHAR, true);
-}
-
-/* Internal method for ix86_init_builtins. */
-
-static void
-ix86_init_builtins_va_builtins_abi (void)
-{
- tree ms_va_ref, sysv_va_ref;
- tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
- tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
- tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
- tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
-
- if (!TARGET_64BIT)
- return;
- fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
- fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
- ms_va_ref = build_reference_type (ms_va_list_type_node);
- sysv_va_ref = build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
-
- fnvoid_va_end_ms = build_function_type_list (void_type_node, ms_va_ref,
- NULL_TREE);
- fnvoid_va_start_ms
- = build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
- fnvoid_va_end_sysv
- = build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
- fnvoid_va_start_sysv
- = build_varargs_function_type_list (void_type_node, sysv_va_ref,
- NULL_TREE);
- fnvoid_va_copy_ms
- = build_function_type_list (void_type_node, ms_va_ref,
- ms_va_list_type_node, NULL_TREE);
- fnvoid_va_copy_sysv
- = build_function_type_list (void_type_node, sysv_va_ref,
- sysv_va_ref, NULL_TREE);
-
- add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
- BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
- add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
- BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
- add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
- BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
- add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
- BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
- add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
- BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
- add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
- BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
-}
-
-static void
-ix86_init_builtin_types (void)
-{
- tree float80_type_node, const_string_type_node;
-
- /* The __float80 type. */
- float80_type_node = long_double_type_node;
- if (TYPE_MODE (float80_type_node) != XFmode)
- {
- if (float64x_type_node != NULL_TREE
- && TYPE_MODE (float64x_type_node) == XFmode)
- float80_type_node = float64x_type_node;
- else
- {
- /* The __float80 type. */
- float80_type_node = make_node (REAL_TYPE);
-
- TYPE_PRECISION (float80_type_node) = 80;
- layout_type (float80_type_node);
- }
- }
- lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
-
- /* The __float128 type. The node has already been created as
- _Float128, so we only need to register the __float128 name for
- it. */
- lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
-
- const_string_type_node
- = build_pointer_type (build_qualified_type
- (char_type_node, TYPE_QUAL_CONST));
-
- /* This macro is built by i386-builtin-types.awk. */
- DEFINE_BUILTIN_PRIMITIVE_TYPES;
-}
-
-void
-ix86_init_builtins (void)
-{
- tree ftype, decl;
-
- ix86_init_builtin_types ();
-
- /* Builtins to get CPU type and features. */
- ix86_init_platform_type_builtins ();
-
- /* TFmode support builtins. */
- def_builtin_const (0, 0, "__builtin_infq",
- FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
- def_builtin_const (0, 0, "__builtin_huge_valq",
- FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
-
- ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING);
- decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ,
- BUILT_IN_MD, "nanq", NULL_TREE);
- TREE_READONLY (decl) = 1;
- ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl;
-
- decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ,
- BUILT_IN_MD, "nansq", NULL_TREE);
- TREE_READONLY (decl) = 1;
- ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl;
-
- /* We will expand them to normal call if SSE isn't available since
- they are used by libgcc. */
- ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
- decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ,
- BUILT_IN_MD, "__fabstf2", NULL_TREE);
- TREE_READONLY (decl) = 1;
- ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
-
- ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
- decl = add_builtin_function ("__builtin_copysignq", ftype,
- IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
- "__copysigntf3", NULL_TREE);
- TREE_READONLY (decl) = 1;
- ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
-
- ix86_init_tm_builtins ();
- ix86_init_mmx_sse_builtins ();
-
- if (TARGET_LP64)
- ix86_init_builtins_va_builtins_abi ();
-
-#ifdef SUBTARGET_INIT_BUILTINS
- SUBTARGET_INIT_BUILTINS;
-#endif
-}
-
-/* Return the ix86 builtin for CODE. */
-
-tree
-ix86_builtin_decl (unsigned code, bool)
-{
- if (code >= IX86_BUILTIN_MAX)
- return error_mark_node;
-
- return ix86_builtins[code];
-}
-
-/* This returns the target-specific builtin with code CODE if
- current_function_decl has visibility on this builtin, which is checked
- using isa flags. Returns NULL_TREE otherwise. */
-
-static tree ix86_get_builtin (enum ix86_builtins code)
-{
- struct cl_target_option *opts;
- tree target_tree = NULL_TREE;
-
- /* Determine the isa flags of current_function_decl. */
-
- if (current_function_decl)
- target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
-
- if (target_tree == NULL)
- target_tree = target_option_default_node;
-
- opts = TREE_TARGET_OPTION (target_tree);
-
- if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
- || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2))
- return ix86_builtin_decl (code, true);
- else
- return NULL_TREE;
-}
-
-/* Vectorization library interface and handlers. */
-tree (*ix86_veclib_handler) (combined_fn, tree, tree);
-
-/* Returns a function decl for a vectorized version of the combined function
- with combined_fn code FN and the result vector type TYPE, or NULL_TREE
- if it is not available. */
-
-tree
-ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
- tree type_in)
-{
- machine_mode in_mode, out_mode;
- int in_n, out_n;
-
- if (TREE_CODE (type_out) != VECTOR_TYPE
- || TREE_CODE (type_in) != VECTOR_TYPE)
- return NULL_TREE;
-
- out_mode = TYPE_MODE (TREE_TYPE (type_out));
- out_n = TYPE_VECTOR_SUBPARTS (type_out);
- in_mode = TYPE_MODE (TREE_TYPE (type_in));
- in_n = TYPE_VECTOR_SUBPARTS (type_in);
-
- switch (fn)
- {
- CASE_CFN_EXP2:
- if (out_mode == SFmode && in_mode == SFmode)
- {
- if (out_n == 16 && in_n == 16)
- return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
- }
- break;
-
- CASE_CFN_IFLOOR:
- CASE_CFN_LFLOOR:
- CASE_CFN_LLFLOOR:
- /* The round insn does not trap on denormals. */
- if (flag_trapping_math || !TARGET_SSE4_1)
- break;
-
- if (out_mode == SImode && in_mode == DFmode)
- {
- if (out_n == 4 && in_n == 2)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
- else if (out_n == 8 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
- else if (out_n == 16 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
- }
- if (out_mode == SImode && in_mode == SFmode)
- {
- if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
- else if (out_n == 16 && in_n == 16)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
- }
- break;
-
- CASE_CFN_ICEIL:
- CASE_CFN_LCEIL:
- CASE_CFN_LLCEIL:
- /* The round insn does not trap on denormals. */
- if (flag_trapping_math || !TARGET_SSE4_1)
- break;
-
- if (out_mode == SImode && in_mode == DFmode)
- {
- if (out_n == 4 && in_n == 2)
- return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
- else if (out_n == 8 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
- else if (out_n == 16 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
- }
- if (out_mode == SImode && in_mode == SFmode)
- {
- if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
- else if (out_n == 16 && in_n == 16)
- return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
- }
- break;
-
- CASE_CFN_IRINT:
- CASE_CFN_LRINT:
- CASE_CFN_LLRINT:
- if (out_mode == SImode && in_mode == DFmode)
- {
- if (out_n == 4 && in_n == 2)
- return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
- else if (out_n == 8 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
- else if (out_n == 16 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
- }
- if (out_mode == SImode && in_mode == SFmode)
- {
- if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
- else if (out_n == 16 && in_n == 16)
- return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
- }
- break;
-
- CASE_CFN_IROUND:
- CASE_CFN_LROUND:
- CASE_CFN_LLROUND:
- /* The round insn does not trap on denormals. */
- if (flag_trapping_math || !TARGET_SSE4_1)
- break;
-
- if (out_mode == SImode && in_mode == DFmode)
- {
- if (out_n == 4 && in_n == 2)
- return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
- else if (out_n == 8 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
- else if (out_n == 16 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
- }
- if (out_mode == SImode && in_mode == SFmode)
- {
- if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
- else if (out_n == 16 && in_n == 16)
- return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
- }
- break;
-
- CASE_CFN_FLOOR:
- /* The round insn does not trap on denormals. */
- if (flag_trapping_math || !TARGET_SSE4_1)
- break;
-
- if (out_mode == DFmode && in_mode == DFmode)
- {
- if (out_n == 2 && in_n == 2)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
- else if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
- }
- if (out_mode == SFmode && in_mode == SFmode)
- {
- if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
- else if (out_n == 16 && in_n == 16)
- return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
- }
- break;
-
- CASE_CFN_CEIL:
- /* The round insn does not trap on denormals. */
- if (flag_trapping_math || !TARGET_SSE4_1)
- break;
-
- if (out_mode == DFmode && in_mode == DFmode)
- {
- if (out_n == 2 && in_n == 2)
- return ix86_get_builtin (IX86_BUILTIN_CEILPD);
- else if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
- }
- if (out_mode == SFmode && in_mode == SFmode)
- {
- if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_CEILPS);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
- else if (out_n == 16 && in_n == 16)
- return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
- }
- break;
-
- CASE_CFN_TRUNC:
- /* The round insn does not trap on denormals. */
- if (flag_trapping_math || !TARGET_SSE4_1)
- break;
-
- if (out_mode == DFmode && in_mode == DFmode)
- {
- if (out_n == 2 && in_n == 2)
- return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
- else if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
- }
- if (out_mode == SFmode && in_mode == SFmode)
- {
- if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
- else if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
- else if (out_n == 16 && in_n == 16)
- return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
- }
- break;
-
- CASE_CFN_FMA:
- if (out_mode == DFmode && in_mode == DFmode)
- {
- if (out_n == 2 && in_n == 2)
- return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
- if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
- }
- if (out_mode == SFmode && in_mode == SFmode)
- {
- if (out_n == 4 && in_n == 4)
- return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
- if (out_n == 8 && in_n == 8)
- return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
- }
- break;
-
- default:
- break;
- }
-
- /* Dispatch to a handler for a vectorization library. */
- if (ix86_veclib_handler)
- return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
-
- return NULL_TREE;
-}
-
-/* Returns a decl of a function that implements gather load with
- memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
- Return NULL_TREE if it is not available. */
-
-tree
-ix86_vectorize_builtin_gather (const_tree mem_vectype,
- const_tree index_type, int scale)
-{
- bool si;
- enum ix86_builtins code;
-
- if (! TARGET_AVX2 || !TARGET_USE_GATHER)
- return NULL_TREE;
-
- if ((TREE_CODE (index_type) != INTEGER_TYPE
- && !POINTER_TYPE_P (index_type))
- || (TYPE_MODE (index_type) != SImode
- && TYPE_MODE (index_type) != DImode))
- return NULL_TREE;
-
- if (TYPE_PRECISION (index_type) > POINTER_SIZE)
- return NULL_TREE;
-
- /* v*gather* insn sign extends index to pointer mode. */
- if (TYPE_PRECISION (index_type) < POINTER_SIZE
- && TYPE_UNSIGNED (index_type))
- return NULL_TREE;
-
- if (scale <= 0
- || scale > 8
- || (scale & (scale - 1)) != 0)
- return NULL_TREE;
-
- si = TYPE_MODE (index_type) == SImode;
- switch (TYPE_MODE (mem_vectype))
- {
- case E_V2DFmode:
- if (TARGET_AVX512VL)
- code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
- else
- code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
- break;
- case E_V4DFmode:
- if (TARGET_AVX512VL)
- code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
- else
- code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
- break;
- case E_V2DImode:
- if (TARGET_AVX512VL)
- code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
- else
- code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
- break;
- case E_V4DImode:
- if (TARGET_AVX512VL)
- code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
- else
- code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
- break;
- case E_V4SFmode:
- if (TARGET_AVX512VL)
- code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
- else
- code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
- break;
- case E_V8SFmode:
- if (TARGET_AVX512VL)
- code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
- else
- code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
- break;
- case E_V4SImode:
- if (TARGET_AVX512VL)
- code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
- else
- code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
- break;
- case E_V8SImode:
- if (TARGET_AVX512VL)
- code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
- else
- code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
- break;
- case E_V8DFmode:
- if (TARGET_AVX512F)
- code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
- else
- return NULL_TREE;
- break;
- case E_V8DImode:
- if (TARGET_AVX512F)
- code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
- else
- return NULL_TREE;
- break;
- case E_V16SFmode:
- if (TARGET_AVX512F)
- code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
- else
- return NULL_TREE;
- break;
- case E_V16SImode:
- if (TARGET_AVX512F)
- code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
- else
- return NULL_TREE;
- break;
- default:
- return NULL_TREE;
- }
-
- return ix86_get_builtin (code);
-}
-
-/* Returns a code for a target-specific builtin that implements
- reciprocal of the function, or NULL_TREE if not available. */
-
-tree
-ix86_builtin_reciprocal (tree fndecl)
-{
- enum ix86_builtins fn_code
- = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
- switch (fn_code)
- {
- /* Vectorized version of sqrt to rsqrt conversion. */
- case IX86_BUILTIN_SQRTPS_NR:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
-
- case IX86_BUILTIN_SQRTPS_NR256:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
-
- default:
- return NULL_TREE;
- }
-}
-
-/* Priority of i386 features, greater value is higher priority. This is
- used to decide the order in which function dispatch must happen. For
- instance, a version specialized for SSE4.2 should be checked for dispatch
- before a version for SSE3, as SSE4.2 implies SSE3. */
-enum feature_priority
-{
- P_ZERO = 0,
- P_MMX,
- P_SSE,
- P_SSE2,
- P_SSE3,
- P_SSSE3,
- P_PROC_SSSE3,
- P_SSE4_A,
- P_PROC_SSE4_A,
- P_SSE4_1,
- P_SSE4_2,
- P_PROC_SSE4_2,
- P_POPCNT,
- P_AES,
- P_PCLMUL,
- P_AVX,
- P_PROC_AVX,
- P_BMI,
- P_PROC_BMI,
- P_FMA4,
- P_XOP,
- P_PROC_XOP,
- P_FMA,
- P_PROC_FMA,
- P_BMI2,
- P_AVX2,
- P_PROC_AVX2,
- P_AVX512F,
- P_PROC_AVX512F
-};
-
-/* This is the order of bit-fields in __processor_features in cpuinfo.c */
-enum processor_features
-{
- F_CMOV = 0,
- F_MMX,
- F_POPCNT,
- F_SSE,
- F_SSE2,
- F_SSE3,
- F_SSSE3,
- F_SSE4_1,
- F_SSE4_2,
- F_AVX,
- F_AVX2,
- F_SSE4_A,
- F_FMA4,
- F_XOP,
- F_FMA,
- F_AVX512F,
- F_BMI,
- F_BMI2,
- F_AES,
- F_PCLMUL,
- F_AVX512VL,
- F_AVX512BW,
- F_AVX512DQ,
- F_AVX512CD,
- F_AVX512ER,
- F_AVX512PF,
- F_AVX512VBMI,
- F_AVX512IFMA,
- F_AVX5124VNNIW,
- F_AVX5124FMAPS,
- F_AVX512VPOPCNTDQ,
- F_AVX512VBMI2,
- F_GFNI,
- F_VPCLMULQDQ,
- F_AVX512VNNI,
- F_AVX512BITALG,
- F_AVX512BF16,
- F_AVX512VP2INTERSECT,
- F_MAX
-};
-
-/* These are the values for vendor types and cpu types and subtypes
- in cpuinfo.c. Cpu types and subtypes should be subtracted by
- the corresponding start value. */
-enum processor_model
-{
- M_INTEL = 1,
- M_AMD,
- M_CPU_TYPE_START,
- M_INTEL_BONNELL,
- M_INTEL_CORE2,
- M_INTEL_COREI7,
- M_AMDFAM10H,
- M_AMDFAM15H,
- M_INTEL_SILVERMONT,
- M_INTEL_KNL,
- M_AMD_BTVER1,
- M_AMD_BTVER2,
- M_AMDFAM17H,
- M_INTEL_KNM,
- M_INTEL_GOLDMONT,
- M_INTEL_GOLDMONT_PLUS,
- M_INTEL_TREMONT,
- M_CPU_SUBTYPE_START,
- M_INTEL_COREI7_NEHALEM,
- M_INTEL_COREI7_WESTMERE,
- M_INTEL_COREI7_SANDYBRIDGE,
- M_AMDFAM10H_BARCELONA,
- M_AMDFAM10H_SHANGHAI,
- M_AMDFAM10H_ISTANBUL,
- M_AMDFAM15H_BDVER1,
- M_AMDFAM15H_BDVER2,
- M_AMDFAM15H_BDVER3,
- M_AMDFAM15H_BDVER4,
- M_AMDFAM17H_ZNVER1,
- M_INTEL_COREI7_IVYBRIDGE,
- M_INTEL_COREI7_HASWELL,
- M_INTEL_COREI7_BROADWELL,
- M_INTEL_COREI7_SKYLAKE,
- M_INTEL_COREI7_SKYLAKE_AVX512,
- M_INTEL_COREI7_CANNONLAKE,
- M_INTEL_COREI7_ICELAKE_CLIENT,
- M_INTEL_COREI7_ICELAKE_SERVER,
- M_AMDFAM17H_ZNVER2,
- M_INTEL_COREI7_CASCADELAKE,
- M_INTEL_COREI7_TIGERLAKE,
- M_INTEL_COREI7_COOPERLAKE
-};
-
-struct _arch_names_table
-{
- const char *const name;
- const enum processor_model model;
-};
-
-static const _arch_names_table arch_names_table[] =
-{
- {"amd", M_AMD},
- {"intel", M_INTEL},
- {"atom", M_INTEL_BONNELL},
- {"slm", M_INTEL_SILVERMONT},
- {"core2", M_INTEL_CORE2},
- {"corei7", M_INTEL_COREI7},
- {"nehalem", M_INTEL_COREI7_NEHALEM},
- {"westmere", M_INTEL_COREI7_WESTMERE},
- {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
- {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
- {"haswell", M_INTEL_COREI7_HASWELL},
- {"broadwell", M_INTEL_COREI7_BROADWELL},
- {"skylake", M_INTEL_COREI7_SKYLAKE},
- {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
- {"cannonlake", M_INTEL_COREI7_CANNONLAKE},
- {"icelake-client", M_INTEL_COREI7_ICELAKE_CLIENT},
- {"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER},
- {"cascadelake", M_INTEL_COREI7_CASCADELAKE},
- {"tigerlake", M_INTEL_COREI7_TIGERLAKE},
- {"cooperlake", M_INTEL_COREI7_COOPERLAKE},
- {"bonnell", M_INTEL_BONNELL},
- {"silvermont", M_INTEL_SILVERMONT},
- {"goldmont", M_INTEL_GOLDMONT},
- {"goldmont-plus", M_INTEL_GOLDMONT_PLUS},
- {"tremont", M_INTEL_TREMONT},
- {"knl", M_INTEL_KNL},
- {"knm", M_INTEL_KNM},
- {"amdfam10h", M_AMDFAM10H},
- {"barcelona", M_AMDFAM10H_BARCELONA},
- {"shanghai", M_AMDFAM10H_SHANGHAI},
- {"istanbul", M_AMDFAM10H_ISTANBUL},
- {"btver1", M_AMD_BTVER1},
- {"amdfam15h", M_AMDFAM15H},
- {"bdver1", M_AMDFAM15H_BDVER1},
- {"bdver2", M_AMDFAM15H_BDVER2},
- {"bdver3", M_AMDFAM15H_BDVER3},
- {"bdver4", M_AMDFAM15H_BDVER4},
- {"btver2", M_AMD_BTVER2},
- {"amdfam17h", M_AMDFAM17H},
- {"znver1", M_AMDFAM17H_ZNVER1},
- {"znver2", M_AMDFAM17H_ZNVER2},
-};
-
-/* These are the target attribute strings for which a dispatcher is
- available, from fold_builtin_cpu. */
-struct _isa_names_table
-{
- const char *const name;
- const enum processor_features feature;
- const enum feature_priority priority;
-};
-
-static const _isa_names_table isa_names_table[] =
-{
- {"cmov", F_CMOV, P_ZERO},
- {"mmx", F_MMX, P_MMX},
- {"popcnt", F_POPCNT, P_POPCNT},
- {"sse", F_SSE, P_SSE},
- {"sse2", F_SSE2, P_SSE2},
- {"sse3", F_SSE3, P_SSE3},
- {"ssse3", F_SSSE3, P_SSSE3},
- {"sse4a", F_SSE4_A, P_SSE4_A},
- {"sse4.1", F_SSE4_1, P_SSE4_1},
- {"sse4.2", F_SSE4_2, P_SSE4_2},
- {"avx", F_AVX, P_AVX},
- {"fma4", F_FMA4, P_FMA4},
- {"xop", F_XOP, P_XOP},
- {"fma", F_FMA, P_FMA},
- {"avx2", F_AVX2, P_AVX2},
- {"avx512f", F_AVX512F, P_AVX512F},
- {"bmi", F_BMI, P_BMI},
- {"bmi2", F_BMI2, P_BMI2},
- {"aes", F_AES, P_AES},
- {"pclmul", F_PCLMUL, P_PCLMUL},
- {"avx512vl",F_AVX512VL, P_ZERO},
- {"avx512bw",F_AVX512BW, P_ZERO},
- {"avx512dq",F_AVX512DQ, P_ZERO},
- {"avx512cd",F_AVX512CD, P_ZERO},
- {"avx512er",F_AVX512ER, P_ZERO},
- {"avx512pf",F_AVX512PF, P_ZERO},
- {"avx512vbmi",F_AVX512VBMI, P_ZERO},
- {"avx512ifma",F_AVX512IFMA, P_ZERO},
- {"avx5124vnniw",F_AVX5124VNNIW, P_ZERO},
- {"avx5124fmaps",F_AVX5124FMAPS, P_ZERO},
- {"avx512vpopcntdq",F_AVX512VPOPCNTDQ, P_ZERO},
- {"avx512vbmi2", F_AVX512VBMI2, P_ZERO},
- {"gfni", F_GFNI, P_ZERO},
- {"vpclmulqdq", F_VPCLMULQDQ, P_ZERO},
- {"avx512vnni", F_AVX512VNNI, P_ZERO},
- {"avx512bitalg", F_AVX512BITALG, P_ZERO},
- {"avx512bf16", F_AVX512BF16, P_ZERO},
- {"avx512vp2intersect",F_AVX512VP2INTERSECT, P_ZERO}
-};
-
-/* This parses the attribute arguments to target in DECL and determines
- the right builtin to use to match the platform specification.
- It returns the priority value for this version decl. If PREDICATE_LIST
- is not NULL, it stores the list of cpu features that need to be checked
- before dispatching this function. */
-
-unsigned int
-get_builtin_code_for_version (tree decl, tree *predicate_list)
-{
- tree attrs;
- struct cl_target_option cur_target;
- tree target_node;
- struct cl_target_option *new_target;
- const char *arg_str = NULL;
- const char *attrs_str = NULL;
- char *tok_str = NULL;
- char *token;
-
- enum feature_priority priority = P_ZERO;
-
- static unsigned int NUM_FEATURES
- = sizeof (isa_names_table) / sizeof (_isa_names_table);
-
- unsigned int i;
-
- tree predicate_chain = NULL_TREE;
- tree predicate_decl, predicate_arg;
-
- attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
- gcc_assert (attrs != NULL);
-
- attrs = TREE_VALUE (TREE_VALUE (attrs));
-
- gcc_assert (TREE_CODE (attrs) == STRING_CST);
- attrs_str = TREE_STRING_POINTER (attrs);
-
- /* Return priority zero for default function. */
- if (strcmp (attrs_str, "default") == 0)
- return 0;
-
- /* Handle arch= if specified. For priority, set it to be 1 more than
- the best instruction set the processor can handle. For instance, if
- there is a version for atom and a version for ssse3 (the highest ISA
- priority for atom), the atom version must be checked for dispatch
- before the ssse3 version. */
- if (strstr (attrs_str, "arch=") != NULL)
- {
- cl_target_option_save (&cur_target, &global_options);
- target_node
- = ix86_valid_target_attribute_tree (decl, attrs, &global_options,
- &global_options_set, 0);
-
- gcc_assert (target_node);
- if (target_node == error_mark_node)
- return 0;
- new_target = TREE_TARGET_OPTION (target_node);
- gcc_assert (new_target);
-
- if (new_target->arch_specified && new_target->arch > 0)
- {
- switch (new_target->arch)
- {
- case PROCESSOR_CORE2:
- arg_str = "core2";
- priority = P_PROC_SSSE3;
- break;
- case PROCESSOR_NEHALEM:
- if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_PCLMUL)
- {
- arg_str = "westmere";
- priority = P_PCLMUL;
- }
- else
- {
- /* We translate "arch=corei7" and "arch=nehalem" to
- "corei7" so that it will be mapped to M_INTEL_COREI7
- as cpu type to cover all M_INTEL_COREI7_XXXs. */
- arg_str = "corei7";
- priority = P_PROC_SSE4_2;
- }
- break;
- case PROCESSOR_SANDYBRIDGE:
- if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
- arg_str = "ivybridge";
- else
- arg_str = "sandybridge";
- priority = P_PROC_AVX;
- break;
- case PROCESSOR_HASWELL:
- if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
- arg_str = "broadwell";
- else
- arg_str = "haswell";
- priority = P_PROC_AVX2;
- break;
- case PROCESSOR_SKYLAKE:
- arg_str = "skylake";
- priority = P_PROC_AVX2;
- break;
- case PROCESSOR_SKYLAKE_AVX512:
- arg_str = "skylake-avx512";
- priority = P_PROC_AVX512F;
- break;
- case PROCESSOR_CANNONLAKE:
- arg_str = "cannonlake";
- priority = P_PROC_AVX512F;
- break;
- case PROCESSOR_ICELAKE_CLIENT:
- arg_str = "icelake-client";
- priority = P_PROC_AVX512F;
- break;
- case PROCESSOR_ICELAKE_SERVER:
- arg_str = "icelake-server";
- priority = P_PROC_AVX512F;
- break;
- case PROCESSOR_CASCADELAKE:
- arg_str = "cascadelake";
- priority = P_PROC_AVX512F;
- break;
- case PROCESSOR_TIGERLAKE:
- arg_str = "tigerlake";
- priority = P_PROC_AVX512F;
- break;
- case PROCESSOR_COOPERLAKE:
- arg_str = "cooperlake";
- priority = P_PROC_AVX512F;
- break;
- case PROCESSOR_BONNELL:
- arg_str = "bonnell";
- priority = P_PROC_SSSE3;
- break;
- case PROCESSOR_KNL:
- arg_str = "knl";
- priority = P_PROC_AVX512F;
- break;
- case PROCESSOR_KNM:
- arg_str = "knm";
- priority = P_PROC_AVX512F;
- break;
- case PROCESSOR_SILVERMONT:
- arg_str = "silvermont";
- priority = P_PROC_SSE4_2;
- break;
- case PROCESSOR_GOLDMONT:
- arg_str = "goldmont";
- priority = P_PROC_SSE4_2;
- break;
- case PROCESSOR_GOLDMONT_PLUS:
- arg_str = "goldmont-plus";
- priority = P_PROC_SSE4_2;
- break;
- case PROCESSOR_TREMONT:
- arg_str = "tremont";
- priority = P_PROC_SSE4_2;
- break;
- case PROCESSOR_AMDFAM10:
- arg_str = "amdfam10h";
- priority = P_PROC_SSE4_A;
- break;
- case PROCESSOR_BTVER1:
- arg_str = "btver1";
- priority = P_PROC_SSE4_A;
- break;
- case PROCESSOR_BTVER2:
- arg_str = "btver2";
- priority = P_PROC_BMI;
- break;
- case PROCESSOR_BDVER1:
- arg_str = "bdver1";
- priority = P_PROC_XOP;
- break;
- case PROCESSOR_BDVER2:
- arg_str = "bdver2";
- priority = P_PROC_FMA;
- break;
- case PROCESSOR_BDVER3:
- arg_str = "bdver3";
- priority = P_PROC_FMA;
- break;
- case PROCESSOR_BDVER4:
- arg_str = "bdver4";
- priority = P_PROC_AVX2;
- break;
- case PROCESSOR_ZNVER1:
- arg_str = "znver1";
- priority = P_PROC_AVX2;
- break;
- case PROCESSOR_ZNVER2:
- arg_str = "znver2";
- priority = P_PROC_AVX2;
- break;
- }
- }
-
- cl_target_option_restore (&global_options, &cur_target);
-
- if (predicate_list && arg_str == NULL)
- {
- error_at (DECL_SOURCE_LOCATION (decl),
- "no dispatcher found for the versioning attributes");
- return 0;
- }
-
- if (predicate_list)
- {
- predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
- /* For a C string literal the length includes the trailing NULL. */
- predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
- predicate_chain = tree_cons (predicate_decl, predicate_arg,
- predicate_chain);
- }
- }
-
- /* Process feature name. */
- tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
- strcpy (tok_str, attrs_str);
- token = strtok (tok_str, ",");
- predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
-
- while (token != NULL)
- {
- /* Do not process "arch=" */
- if (strncmp (token, "arch=", 5) == 0)
- {
- token = strtok (NULL, ",");
- continue;
- }
- for (i = 0; i < NUM_FEATURES; ++i)
- {
- if (strcmp (token, isa_names_table[i].name) == 0)
- {
- if (predicate_list)
- {
- predicate_arg = build_string_literal (
- strlen (isa_names_table[i].name) + 1,
- isa_names_table[i].name);
- predicate_chain = tree_cons (predicate_decl, predicate_arg,
- predicate_chain);
- }
- /* Find the maximum priority feature. */
- if (isa_names_table[i].priority > priority)
- priority = isa_names_table[i].priority;
-
- break;
- }
- }
- if (predicate_list && priority == P_ZERO)
- {
- error_at (DECL_SOURCE_LOCATION (decl),
- "ISA %qs is not supported in %<target%> attribute, "
- "use %<arch=%> syntax", token);
- return 0;
- }
- token = strtok (NULL, ",");
- }
- free (tok_str);
-
- if (predicate_list && predicate_chain == NULL_TREE)
- {
- error_at (DECL_SOURCE_LOCATION (decl),
- "no dispatcher found for the versioning attributes: %s",
- attrs_str);
- return 0;
- }
- else if (predicate_list)
- {
- predicate_chain = nreverse (predicate_chain);
- *predicate_list = predicate_chain;
- }
-
- return priority;
-}
-
-/* This builds the processor_model struct type defined in
- libgcc/config/i386/cpuinfo.c */
-
-static tree
-build_processor_model_struct (void)
-{
- const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
- "__cpu_features"};
- tree field = NULL_TREE, field_chain = NULL_TREE;
- int i;
- tree type = make_node (RECORD_TYPE);
-
- /* The first 3 fields are unsigned int. */
- for (i = 0; i < 3; ++i)
- {
- field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
- get_identifier (field_name[i]), unsigned_type_node);
- if (field_chain != NULL_TREE)
- DECL_CHAIN (field) = field_chain;
- field_chain = field;
- }
-
- /* The last field is an array of unsigned integers of size one. */
- field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
- get_identifier (field_name[3]),
- build_array_type (unsigned_type_node,
- build_index_type (size_one_node)));
- if (field_chain != NULL_TREE)
- DECL_CHAIN (field) = field_chain;
- field_chain = field;
-
- finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
- return type;
-}
-
-/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
-
-static tree
-make_var_decl (tree type, const char *name)
-{
- tree new_decl;
-
- new_decl = build_decl (UNKNOWN_LOCATION,
- VAR_DECL,
- get_identifier(name),
- type);
-
- DECL_EXTERNAL (new_decl) = 1;
- TREE_STATIC (new_decl) = 1;
- TREE_PUBLIC (new_decl) = 1;
- DECL_INITIAL (new_decl) = 0;
- DECL_ARTIFICIAL (new_decl) = 0;
- DECL_PRESERVE_P (new_decl) = 1;
-
- make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
- assemble_variable (new_decl, 0, 0, 0);
-
- return new_decl;
-}
-
-/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
- into an integer defined in libgcc/config/i386/cpuinfo.c */
-
-tree
-fold_builtin_cpu (tree fndecl, tree *args)
-{
- unsigned int i;
- enum ix86_builtins fn_code
- = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
- tree param_string_cst = NULL;
-
- tree __processor_model_type = build_processor_model_struct ();
- tree __cpu_model_var = make_var_decl (__processor_model_type,
- "__cpu_model");
-
-
- varpool_node::add (__cpu_model_var);
-
- gcc_assert ((args != NULL) && (*args != NULL));
-
- param_string_cst = *args;
- while (param_string_cst
- && TREE_CODE (param_string_cst) != STRING_CST)
- {
- /* *args must be a expr that can contain other EXPRS leading to a
- STRING_CST. */
- if (!EXPR_P (param_string_cst))
- {
- error ("parameter to builtin must be a string constant or literal");
- return integer_zero_node;
- }
- param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
- }
-
- gcc_assert (param_string_cst);
-
- if (fn_code == IX86_BUILTIN_CPU_IS)
- {
- tree ref;
- tree field;
- tree final;
-
- unsigned int field_val = 0;
- unsigned int NUM_ARCH_NAMES
- = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
-
- for (i = 0; i < NUM_ARCH_NAMES; i++)
- if (strcmp (arch_names_table[i].name,
- TREE_STRING_POINTER (param_string_cst)) == 0)
- break;
-
- if (i == NUM_ARCH_NAMES)
- {
- error ("parameter to builtin not valid: %s",
- TREE_STRING_POINTER (param_string_cst));
- return integer_zero_node;
- }
-
- field = TYPE_FIELDS (__processor_model_type);
- field_val = arch_names_table[i].model;
-
- /* CPU types are stored in the next field. */
- if (field_val > M_CPU_TYPE_START
- && field_val < M_CPU_SUBTYPE_START)
- {
- field = DECL_CHAIN (field);
- field_val -= M_CPU_TYPE_START;
- }
-
- /* CPU subtypes are stored in the next field. */
- if (field_val > M_CPU_SUBTYPE_START)
- {
- field = DECL_CHAIN ( DECL_CHAIN (field));
- field_val -= M_CPU_SUBTYPE_START;
- }
-
- /* Get the appropriate field in __cpu_model. */
- ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
- field, NULL_TREE);
-
- /* Check the value. */
- final = build2 (EQ_EXPR, unsigned_type_node, ref,
- build_int_cstu (unsigned_type_node, field_val));
- return build1 (CONVERT_EXPR, integer_type_node, final);
- }
- else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
- {
- tree ref;
- tree array_elt;
- tree field;
- tree final;
-
- unsigned int field_val = 0;
- unsigned int NUM_ISA_NAMES
- = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
-
- for (i = 0; i < NUM_ISA_NAMES; i++)
- if (strcmp (isa_names_table[i].name,
- TREE_STRING_POINTER (param_string_cst)) == 0)
- break;
-
- if (i == NUM_ISA_NAMES)
- {
- error ("parameter to builtin not valid: %s",
- TREE_STRING_POINTER (param_string_cst));
- return integer_zero_node;
- }
-
- if (isa_names_table[i].feature >= 32)
- {
- tree __cpu_features2_var = make_var_decl (unsigned_type_node,
- "__cpu_features2");
-
- varpool_node::add (__cpu_features2_var);
- field_val = (1U << (isa_names_table[i].feature - 32));
- /* Return __cpu_features2 & field_val */
- final = build2 (BIT_AND_EXPR, unsigned_type_node,
- __cpu_features2_var,
- build_int_cstu (unsigned_type_node, field_val));
- return build1 (CONVERT_EXPR, integer_type_node, final);
- }
-
- field = TYPE_FIELDS (__processor_model_type);
- /* Get the last field, which is __cpu_features. */
- while (DECL_CHAIN (field))
- field = DECL_CHAIN (field);
-
- /* Get the appropriate field: __cpu_model.__cpu_features */
- ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
- field, NULL_TREE);
-
- /* Access the 0th element of __cpu_features array. */
- array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
- integer_zero_node, NULL_TREE, NULL_TREE);
-
- field_val = (1U << isa_names_table[i].feature);
- /* Return __cpu_model.__cpu_features[0] & field_val */
- final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
- build_int_cstu (unsigned_type_node, field_val));
- return build1 (CONVERT_EXPR, integer_type_node, final);
- }
- gcc_unreachable ();
-}
-
-#include "gt-i386-builtins.h"
diff --git a/gcc/config/i386/i386-d.c b/gcc/config/i386/i386-d.c
deleted file mode 100644
index 56fec11846e..00000000000
--- a/gcc/config/i386/i386-d.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Subroutines for the D front end on the x86 architecture.
- Copyright (C) 2017-2020 Free Software Foundation, Inc.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "tm.h"
-#include "d/d-target.h"
-#include "d/d-target-def.h"
-
-/* Implement TARGET_D_CPU_VERSIONS for x86 targets. */
-
-void
-ix86_d_target_versions (void)
-{
- if (TARGET_64BIT)
- {
- d_add_builtin_version ("X86_64");
-
- if (TARGET_X32)
- d_add_builtin_version ("D_X32");
- }
- else
- d_add_builtin_version ("X86");
-
- if (TARGET_80387)
- d_add_builtin_version ("D_HardFloat");
- else
- d_add_builtin_version ("D_SoftFloat");
-}
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
deleted file mode 100644
index 270585decb2..00000000000
--- a/gcc/config/i386/i386-expand.c
+++ /dev/null
@@ -1,20310 +0,0 @@
-/* Copyright (C) 1988-2020 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#define IN_TARGET_CODE 1
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "backend.h"
-#include "rtl.h"
-#include "tree.h"
-#include "memmodel.h"
-#include "gimple.h"
-#include "cfghooks.h"
-#include "cfgloop.h"
-#include "df.h"
-#include "tm_p.h"
-#include "stringpool.h"
-#include "expmed.h"
-#include "optabs.h"
-#include "regs.h"
-#include "emit-rtl.h"
-#include "recog.h"
-#include "cgraph.h"
-#include "diagnostic.h"
-#include "cfgbuild.h"
-#include "alias.h"
-#include "fold-const.h"
-#include "attribs.h"
-#include "calls.h"
-#include "stor-layout.h"
-#include "varasm.h"
-#include "output.h"
-#include "insn-attr.h"
-#include "flags.h"
-#include "except.h"
-#include "explow.h"
-#include "expr.h"
-#include "cfgrtl.h"
-#include "common/common-target.h"
-#include "langhooks.h"
-#include "reload.h"
-#include "gimplify.h"
-#include "dwarf2.h"
-#include "tm-constrs.h"
-#include "cselib.h"
-#include "sched-int.h"
-#include "opts.h"
-#include "tree-pass.h"
-#include "context.h"
-#include "pass_manager.h"
-#include "target-globals.h"
-#include "gimple-iterator.h"
-#include "tree-vectorizer.h"
-#include "shrink-wrap.h"
-#include "builtins.h"
-#include "rtl-iter.h"
-#include "tree-iterator.h"
-#include "dbgcnt.h"
-#include "case-cfn-macros.h"
-#include "dojump.h"
-#include "fold-const-call.h"
-#include "tree-vrp.h"
-#include "tree-ssanames.h"
-#include "selftest.h"
-#include "selftest-rtl.h"
-#include "print-rtl.h"
-#include "intl.h"
-#include "ifcvt.h"
-#include "symbol-summary.h"
-#include "ipa-prop.h"
-#include "ipa-fnsummary.h"
-#include "wide-int-bitmask.h"
-#include "tree-vector-builder.h"
-#include "debug.h"
-#include "dwarf2out.h"
-#include "i386-options.h"
-#include "i386-builtins.h"
-#include "i386-expand.h"
-
-/* Split one or more double-mode RTL references into pairs of half-mode
- references. The RTL can be REG, offsettable MEM, integer constant, or
- CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
- split and "num" is its length. lo_half and hi_half are output arrays
- that parallel "operands". */
-
-void
-split_double_mode (machine_mode mode, rtx operands[],
- int num, rtx lo_half[], rtx hi_half[])
-{
- machine_mode half_mode;
- unsigned int byte;
- rtx mem_op = NULL_RTX;
- int mem_num = 0;
-
- switch (mode)
- {
- case E_TImode:
- half_mode = DImode;
- break;
- case E_DImode:
- half_mode = SImode;
- break;
- default:
- gcc_unreachable ();
- }
-
- byte = GET_MODE_SIZE (half_mode);
-
- while (num--)
- {
- rtx op = operands[num];
-
- /* simplify_subreg refuse to split volatile memory addresses,
- but we still have to handle it. */
- if (MEM_P (op))
- {
- if (mem_op && rtx_equal_p (op, mem_op))
- {
- lo_half[num] = lo_half[mem_num];
- hi_half[num] = hi_half[mem_num];
- }
- else
- {
- mem_op = op;
- mem_num = num;
- lo_half[num] = adjust_address (op, half_mode, 0);
- hi_half[num] = adjust_address (op, half_mode, byte);
- }
- }
- else
- {
- lo_half[num] = simplify_gen_subreg (half_mode, op,
- GET_MODE (op) == VOIDmode
- ? mode : GET_MODE (op), 0);
- hi_half[num] = simplify_gen_subreg (half_mode, op,
- GET_MODE (op) == VOIDmode
- ? mode : GET_MODE (op), byte);
- }
- }
-}
-
-/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
- for the target. */
-
-void
-ix86_expand_clear (rtx dest)
-{
- rtx tmp;
-
- /* We play register width games, which are only valid after reload. */
- gcc_assert (reload_completed);
-
- /* Avoid HImode and its attendant prefix byte. */
- if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
- dest = gen_rtx_REG (SImode, REGNO (dest));
- tmp = gen_rtx_SET (dest, const0_rtx);
-
- if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
- {
- rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
- }
-
- emit_insn (tmp);
-}
-
-void
-ix86_expand_move (machine_mode mode, rtx operands[])
-{
- rtx op0, op1;
- rtx tmp, addend = NULL_RTX;
- enum tls_model model;
-
- op0 = operands[0];
- op1 = operands[1];
-
- switch (GET_CODE (op1))
- {
- case CONST:
- tmp = XEXP (op1, 0);
-
- if (GET_CODE (tmp) != PLUS
- || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
- break;
-
- op1 = XEXP (tmp, 0);
- addend = XEXP (tmp, 1);
- /* FALLTHRU */
-
- case SYMBOL_REF:
- model = SYMBOL_REF_TLS_MODEL (op1);
-
- if (model)
- op1 = legitimize_tls_address (op1, model, true);
- else if (ix86_force_load_from_GOT_p (op1))
- {
- /* Load the external function address via GOT slot to avoid PLT. */
- op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1),
- (TARGET_64BIT
- ? UNSPEC_GOTPCREL
- : UNSPEC_GOT));
- op1 = gen_rtx_CONST (Pmode, op1);
- op1 = gen_const_mem (Pmode, op1);
- set_mem_alias_set (op1, ix86_GOT_alias_set ());
- }
- else
- {
- tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX);
- if (tmp)
- {
- op1 = tmp;
- if (!addend)
- break;
- }
- else
- {
- op1 = operands[1];
- break;
- }
- }
-
- if (addend)
- {
- op1 = force_operand (op1, NULL_RTX);
- op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
- op0, 1, OPTAB_DIRECT);
- }
- else
- op1 = force_operand (op1, op0);
-
- if (op1 == op0)
- return;
-
- op1 = convert_to_mode (mode, op1, 1);
-
- default:
- break;
- }
-
- if ((flag_pic || MACHOPIC_INDIRECT)
- && symbolic_operand (op1, mode))
- {
- if (TARGET_MACHO && !TARGET_64BIT)
- {
-#if TARGET_MACHO
- /* dynamic-no-pic */
- if (MACHOPIC_INDIRECT)
- {
- rtx temp = (op0 && REG_P (op0) && mode == Pmode)
- ? op0 : gen_reg_rtx (Pmode);
- op1 = machopic_indirect_data_reference (op1, temp);
- if (MACHOPIC_PURE)
- op1 = machopic_legitimize_pic_address (op1, mode,
- temp == op1 ? 0 : temp);
- }
- if (op0 != op1 && GET_CODE (op0) != MEM)
- {
- rtx insn = gen_rtx_SET (op0, op1);
- emit_insn (insn);
- return;
- }
- if (GET_CODE (op0) == MEM)
- op1 = force_reg (Pmode, op1);
- else
- {
- rtx temp = op0;
- if (GET_CODE (temp) != REG)
- temp = gen_reg_rtx (Pmode);
- temp = legitimize_pic_address (op1, temp);
- if (temp == op0)
- return;
- op1 = temp;
- }
- /* dynamic-no-pic */
-#endif
- }
- else
- {
- if (MEM_P (op0))
- op1 = force_reg (mode, op1);
- else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
- {
- rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
- op1 = legitimize_pic_address (op1, reg);
- if (op0 == op1)
- return;
- op1 = convert_to_mode (mode, op1, 1);
- }
- }
- }
- else
- {
- if (MEM_P (op0)
- && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
- || !push_operand (op0, mode))
- && MEM_P (op1))
- op1 = force_reg (mode, op1);
-
- if (push_operand (op0, mode)
- && ! general_no_elim_operand (op1, mode))
- op1 = copy_to_mode_reg (mode, op1);
-
- /* Force large constants in 64bit compilation into register
- to get them CSEed. */
- if (can_create_pseudo_p ()
- && (mode == DImode) && TARGET_64BIT
- && immediate_operand (op1, mode)
- && !x86_64_zext_immediate_operand (op1, VOIDmode)
- && !register_operand (op0, mode)
- && optimize)
- op1 = copy_to_mode_reg (mode, op1);
-
- if (can_create_pseudo_p ()
- && CONST_DOUBLE_P (op1))
- {
- /* If we are loading a floating point constant to a register,
- force the value to memory now, since we'll get better code
- out the back end. */
-
- op1 = validize_mem (force_const_mem (mode, op1));
- if (!register_operand (op0, mode))
- {
- rtx temp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (temp, op1));
- emit_move_insn (op0, temp);
- return;
- }
- }
- }
-
- emit_insn (gen_rtx_SET (op0, op1));
-}
-
-void
-ix86_expand_vector_move (machine_mode mode, rtx operands[])
-{
- rtx op0 = operands[0], op1 = operands[1];
- /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
- psABI since the biggest alignment is 4 byte for IA MCU psABI. */
- unsigned int align = (TARGET_IAMCU
- ? GET_MODE_BITSIZE (mode)
- : GET_MODE_ALIGNMENT (mode));
-
- if (push_operand (op0, VOIDmode))
- op0 = emit_move_resolve_push (mode, op0);
-
- /* Force constants other than zero into memory. We do not know how
- the instructions used to build constants modify the upper 64 bits
- of the register, once we have that information we may be able
- to handle some of them more efficiently. */
- if (can_create_pseudo_p ()
- && (CONSTANT_P (op1)
- || (SUBREG_P (op1)
- && CONSTANT_P (SUBREG_REG (op1))))
- && ((register_operand (op0, mode)
- && !standard_sse_constant_p (op1, mode))
- /* ix86_expand_vector_move_misalign() does not like constants. */
- || (SSE_REG_MODE_P (mode)
- && MEM_P (op0)
- && MEM_ALIGN (op0) < align)))
- {
- if (SUBREG_P (op1))
- {
- machine_mode imode = GET_MODE (SUBREG_REG (op1));
- rtx r = force_const_mem (imode, SUBREG_REG (op1));
- if (r)
- r = validize_mem (r);
- else
- r = force_reg (imode, SUBREG_REG (op1));
- op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
- }
- else
- op1 = validize_mem (force_const_mem (mode, op1));
- }
-
- /* We need to check memory alignment for SSE mode since attribute
- can make operands unaligned. */
- if (can_create_pseudo_p ()
- && SSE_REG_MODE_P (mode)
- && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
- || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
- {
- rtx tmp[2];
-
- /* ix86_expand_vector_move_misalign() does not like both
- arguments in memory. */
- if (!register_operand (op0, mode)
- && !register_operand (op1, mode))
- op1 = force_reg (mode, op1);
-
- tmp[0] = op0; tmp[1] = op1;
- ix86_expand_vector_move_misalign (mode, tmp);
- return;
- }
-
- /* Make operand1 a register if it isn't already. */
- if (can_create_pseudo_p ()
- && !register_operand (op0, mode)
- && !register_operand (op1, mode))
- {
- emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
- return;
- }
-
- emit_insn (gen_rtx_SET (op0, op1));
-}
-
-/* Split 32-byte AVX unaligned load and store if needed. */
-
-static void
-ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
-{
- rtx m;
- rtx (*extract) (rtx, rtx, rtx);
- machine_mode mode;
-
- if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
- || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE))
- {
- emit_insn (gen_rtx_SET (op0, op1));
- return;
- }
-
- rtx orig_op0 = NULL_RTX;
- mode = GET_MODE (op0);
- switch (GET_MODE_CLASS (mode))
- {
- case MODE_VECTOR_INT:
- case MODE_INT:
- if (mode != V32QImode)
- {
- if (!MEM_P (op0))
- {
- orig_op0 = op0;
- op0 = gen_reg_rtx (V32QImode);
- }
- else
- op0 = gen_lowpart (V32QImode, op0);
- op1 = gen_lowpart (V32QImode, op1);
- mode = V32QImode;
- }
- break;
- case MODE_VECTOR_FLOAT:
- break;
- default:
- gcc_unreachable ();
- }
-
- switch (mode)
- {
- default:
- gcc_unreachable ();
- case E_V32QImode:
- extract = gen_avx_vextractf128v32qi;
- mode = V16QImode;
- break;
- case E_V8SFmode:
- extract = gen_avx_vextractf128v8sf;
- mode = V4SFmode;
- break;
- case E_V4DFmode:
- extract = gen_avx_vextractf128v4df;
- mode = V2DFmode;
- break;
- }
-
- if (MEM_P (op1))
- {
- rtx r = gen_reg_rtx (mode);
- m = adjust_address (op1, mode, 0);
- emit_move_insn (r, m);
- m = adjust_address (op1, mode, 16);
- r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
- emit_move_insn (op0, r);
- }
- else if (MEM_P (op0))
- {
- m = adjust_address (op0, mode, 0);
- emit_insn (extract (m, op1, const0_rtx));
- m = adjust_address (op0, mode, 16);
- emit_insn (extract (m, copy_rtx (op1), const1_rtx));
- }
- else
- gcc_unreachable ();
-
- if (orig_op0)
- emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
-}
-
-/* Implement the movmisalign patterns for SSE. Non-SSE modes go
- straight to ix86_expand_vector_move. */
-/* Code generation for scalar reg-reg moves of single and double precision data:
- if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
- movaps reg, reg
- else
- movss reg, reg
- if (x86_sse_partial_reg_dependency == true)
- movapd reg, reg
- else
- movsd reg, reg
-
- Code generation for scalar loads of double precision data:
- if (x86_sse_split_regs == true)
- movlpd mem, reg (gas syntax)
- else
- movsd mem, reg
-
- Code generation for unaligned packed loads of single precision data
- (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
- if (x86_sse_unaligned_move_optimal)
- movups mem, reg
-
- if (x86_sse_partial_reg_dependency == true)
- {
- xorps reg, reg
- movlps mem, reg
- movhps mem+8, reg
- }
- else
- {
- movlps mem, reg
- movhps mem+8, reg
- }
-
- Code generation for unaligned packed loads of double precision data
- (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
- if (x86_sse_unaligned_move_optimal)
- movupd mem, reg
-
- if (x86_sse_split_regs == true)
- {
- movlpd mem, reg
- movhpd mem+8, reg
- }
- else
- {
- movsd mem, reg
- movhpd mem+8, reg
- }
- */
-
-void
-ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
-{
- rtx op0, op1, m;
-
- op0 = operands[0];
- op1 = operands[1];
-
- /* Use unaligned load/store for AVX512 or when optimizing for size. */
- if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ())
- {
- emit_insn (gen_rtx_SET (op0, op1));
- return;
- }
-
- if (TARGET_AVX)
- {
- if (GET_MODE_SIZE (mode) == 32)
- ix86_avx256_split_vector_move_misalign (op0, op1);
- else
- /* Always use 128-bit mov<mode>_internal pattern for AVX. */
- emit_insn (gen_rtx_SET (op0, op1));
- return;
- }
-
- if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
- || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- {
- emit_insn (gen_rtx_SET (op0, op1));
- return;
- }
-
- /* ??? If we have typed data, then it would appear that using
- movdqu is the only way to get unaligned data loaded with
- integer type. */
- if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
- {
- emit_insn (gen_rtx_SET (op0, op1));
- return;
- }
-
- if (MEM_P (op1))
- {
- if (TARGET_SSE2 && mode == V2DFmode)
- {
- rtx zero;
-
- /* When SSE registers are split into halves, we can avoid
- writing to the top half twice. */
- if (TARGET_SSE_SPLIT_REGS)
- {
- emit_clobber (op0);
- zero = op0;
- }
- else
- {
- /* ??? Not sure about the best option for the Intel chips.
- The following would seem to satisfy; the register is
- entirely cleared, breaking the dependency chain. We
- then store to the upper half, with a dependency depth
- of one. A rumor has it that Intel recommends two movsd
- followed by an unpacklpd, but this is unconfirmed. And
- given that the dependency depth of the unpacklpd would
- still be one, I'm not sure why this would be better. */
- zero = CONST0_RTX (V2DFmode);
- }
-
- m = adjust_address (op1, DFmode, 0);
- emit_insn (gen_sse2_loadlpd (op0, zero, m));
- m = adjust_address (op1, DFmode, 8);
- emit_insn (gen_sse2_loadhpd (op0, op0, m));
- }
- else
- {
- rtx t;
-
- if (mode != V4SFmode)
- t = gen_reg_rtx (V4SFmode);
- else
- t = op0;
-
- if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
- emit_move_insn (t, CONST0_RTX (V4SFmode));
- else
- emit_clobber (t);
-
- m = adjust_address (op1, V2SFmode, 0);
- emit_insn (gen_sse_loadlps (t, t, m));
- m = adjust_address (op1, V2SFmode, 8);
- emit_insn (gen_sse_loadhps (t, t, m));
- if (mode != V4SFmode)
- emit_move_insn (op0, gen_lowpart (mode, t));
- }
- }
- else if (MEM_P (op0))
- {
- if (TARGET_SSE2 && mode == V2DFmode)
- {
- m = adjust_address (op0, DFmode, 0);
- emit_insn (gen_sse2_storelpd (m, op1));
- m = adjust_address (op0, DFmode, 8);
- emit_insn (gen_sse2_storehpd (m, op1));
- }
- else
- {
- if (mode != V4SFmode)
- op1 = gen_lowpart (V4SFmode, op1);
-
- m = adjust_address (op0, V2SFmode, 0);
- emit_insn (gen_sse_storelps (m, op1));
- m = adjust_address (op0, V2SFmode, 8);
- emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
- }
- }
- else
- gcc_unreachable ();
-}
-
-/* Move bits 64:95 to bits 32:63. */
-
-void
-ix86_move_vector_high_sse_to_mmx (rtx op)
-{
- rtx mask = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (4, GEN_INT (0), GEN_INT (2),
- GEN_INT (0), GEN_INT (0)));
- rtx dest = lowpart_subreg (V4SImode, op, GET_MODE (op));
- op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
- rtx insn = gen_rtx_SET (dest, op);
- emit_insn (insn);
-}
-
-/* Split MMX pack with signed/unsigned saturation with SSE/SSE2. */
-
-void
-ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
-{
- rtx op0 = operands[0];
- rtx op1 = operands[1];
- rtx op2 = operands[2];
-
- machine_mode dmode = GET_MODE (op0);
- machine_mode smode = GET_MODE (op1);
- machine_mode inner_dmode = GET_MODE_INNER (dmode);
- machine_mode inner_smode = GET_MODE_INNER (smode);
-
- /* Get the corresponding SSE mode for destination. */
- int nunits = 16 / GET_MODE_SIZE (inner_dmode);
- machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
- nunits).require ();
- machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
- nunits / 2).require ();
-
- /* Get the corresponding SSE mode for source. */
- nunits = 16 / GET_MODE_SIZE (inner_smode);
- machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
- nunits).require ();
-
- /* Generate SSE pack with signed/unsigned saturation. */
- rtx dest = lowpart_subreg (sse_dmode, op0, GET_MODE (op0));
- op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1));
- op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2));
-
- op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
- op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
- rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
- op1, op2));
- emit_insn (insn);
-
- ix86_move_vector_high_sse_to_mmx (op0);
-}
-
-/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. */
-
-void
-ix86_split_mmx_punpck (rtx operands[], bool high_p)
-{
- rtx op0 = operands[0];
- rtx op1 = operands[1];
- rtx op2 = operands[2];
- machine_mode mode = GET_MODE (op0);
- rtx mask;
- /* The corresponding SSE mode. */
- machine_mode sse_mode, double_sse_mode;
-
- switch (mode)
- {
- case E_V8QImode:
- sse_mode = V16QImode;
- double_sse_mode = V32QImode;
- mask = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (16,
- GEN_INT (0), GEN_INT (16),
- GEN_INT (1), GEN_INT (17),
- GEN_INT (2), GEN_INT (18),
- GEN_INT (3), GEN_INT (19),
- GEN_INT (4), GEN_INT (20),
- GEN_INT (5), GEN_INT (21),
- GEN_INT (6), GEN_INT (22),
- GEN_INT (7), GEN_INT (23)));
- break;
-
- case E_V4HImode:
- sse_mode = V8HImode;
- double_sse_mode = V16HImode;
- mask = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (8,
- GEN_INT (0), GEN_INT (8),
- GEN_INT (1), GEN_INT (9),
- GEN_INT (2), GEN_INT (10),
- GEN_INT (3), GEN_INT (11)));
- break;
-
- case E_V2SImode:
- sse_mode = V4SImode;
- double_sse_mode = V8SImode;
- mask = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (4,
- GEN_INT (0), GEN_INT (4),
- GEN_INT (1), GEN_INT (5)));
- break;
-
- default:
- gcc_unreachable ();
- }
-
- /* Generate SSE punpcklXX. */
- rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0));
- op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1));
- op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2));
-
- op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
- op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
- rtx insn = gen_rtx_SET (dest, op2);
- emit_insn (insn);
-
- if (high_p)
- {
- /* Move bits 64:127 to bits 0:63. */
- mask = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (4, GEN_INT (2), GEN_INT (3),
- GEN_INT (0), GEN_INT (0)));
- dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest));
- op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
- insn = gen_rtx_SET (dest, op1);
- emit_insn (insn);
- }
-}
-
-/* Helper function of ix86_fixup_binary_operands to canonicalize
- operand order. Returns true if the operands should be swapped. */
-
-static bool
-ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
- rtx operands[])
-{
- rtx dst = operands[0];
- rtx src1 = operands[1];
- rtx src2 = operands[2];
-
- /* If the operation is not commutative, we can't do anything. */
- if (GET_RTX_CLASS (code) != RTX_COMM_ARITH
- && GET_RTX_CLASS (code) != RTX_COMM_COMPARE)
- return false;
-
- /* Highest priority is that src1 should match dst. */
- if (rtx_equal_p (dst, src1))
- return false;
- if (rtx_equal_p (dst, src2))
- return true;
-
- /* Next highest priority is that immediate constants come second. */
- if (immediate_operand (src2, mode))
- return false;
- if (immediate_operand (src1, mode))
- return true;
-
- /* Lowest priority is that memory references should come second. */
- if (MEM_P (src2))
- return false;
- if (MEM_P (src1))
- return true;
-
- return false;
-}
-
-
-/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
- destination to use for the operation. If different from the true
- destination in operands[0], a copy operation will be required. */
-
-rtx
-ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
- rtx operands[])
-{
- rtx dst = operands[0];
- rtx src1 = operands[1];
- rtx src2 = operands[2];
-
- /* Canonicalize operand order. */
- if (ix86_swap_binary_operands_p (code, mode, operands))
- {
- /* It is invalid to swap operands of different modes. */
- gcc_assert (GET_MODE (src1) == GET_MODE (src2));
-
- std::swap (src1, src2);
- }
-
- /* Both source operands cannot be in memory. */
- if (MEM_P (src1) && MEM_P (src2))
- {
- /* Optimization: Only read from memory once. */
- if (rtx_equal_p (src1, src2))
- {
- src2 = force_reg (mode, src2);
- src1 = src2;
- }
- else if (rtx_equal_p (dst, src1))
- src2 = force_reg (mode, src2);
- else
- src1 = force_reg (mode, src1);
- }
-
- /* If the destination is memory, and we do not have matching source
- operands, do things in registers. */
- if (MEM_P (dst) && !rtx_equal_p (dst, src1))
- dst = gen_reg_rtx (mode);
-
- /* Source 1 cannot be a constant. */
- if (CONSTANT_P (src1))
- src1 = force_reg (mode, src1);
-
- /* Source 1 cannot be a non-matching memory. */
- if (MEM_P (src1) && !rtx_equal_p (dst, src1))
- src1 = force_reg (mode, src1);
-
- /* Improve address combine. */
- if (code == PLUS
- && GET_MODE_CLASS (mode) == MODE_INT
- && MEM_P (src2))
- src2 = force_reg (mode, src2);
-
- operands[1] = src1;
- operands[2] = src2;
- return dst;
-}
-
-/* Similarly, but assume that the destination has already been
- set up properly. */
-
-void
-ix86_fixup_binary_operands_no_copy (enum rtx_code code,
- machine_mode mode, rtx operands[])
-{
- rtx dst = ix86_fixup_binary_operands (code, mode, operands);
- gcc_assert (dst == operands[0]);
-}
-
-/* Attempt to expand a binary operator. Make the expansion closer to the
- actual machine, then just general_operand, which will allow 3 separate
- memory references (one output, two input) in a single insn. */
-
-void
-ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
- rtx operands[])
-{
- rtx src1, src2, dst, op, clob;
-
- dst = ix86_fixup_binary_operands (code, mode, operands);
- src1 = operands[1];
- src2 = operands[2];
-
- /* Emit the instruction. */
-
- op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
-
- if (reload_completed
- && code == PLUS
- && !rtx_equal_p (dst, src1))
- {
- /* This is going to be an LEA; avoid splitting it later. */
- emit_insn (op);
- }
- else
- {
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
- }
-
- /* Fix up the destination if needed. */
- if (dst != operands[0])
- emit_move_insn (operands[0], dst);
-}
-
-/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
- the given OPERANDS. */
-
-void
-ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
- rtx operands[])
-{
- rtx op1 = NULL_RTX, op2 = NULL_RTX;
- if (SUBREG_P (operands[1]))
- {
- op1 = operands[1];
- op2 = operands[2];
- }
- else if (SUBREG_P (operands[2]))
- {
- op1 = operands[2];
- op2 = operands[1];
- }
- /* Optimize (__m128i) d | (__m128i) e and similar code
- when d and e are float vectors into float vector logical
- insn. In C/C++ without using intrinsics there is no other way
- to express vector logical operation on float vectors than
- to cast them temporarily to integer vectors. */
- if (op1
- && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
- && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
- && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
- && SUBREG_BYTE (op1) == 0
- && (GET_CODE (op2) == CONST_VECTOR
- || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
- && SUBREG_BYTE (op2) == 0))
- && can_create_pseudo_p ())
- {
- rtx dst;
- switch (GET_MODE (SUBREG_REG (op1)))
- {
- case E_V4SFmode:
- case E_V8SFmode:
- case E_V16SFmode:
- case E_V2DFmode:
- case E_V4DFmode:
- case E_V8DFmode:
- dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
- if (GET_CODE (op2) == CONST_VECTOR)
- {
- op2 = gen_lowpart (GET_MODE (dst), op2);
- op2 = force_reg (GET_MODE (dst), op2);
- }
- else
- {
- op1 = operands[1];
- op2 = SUBREG_REG (operands[2]);
- if (!vector_operand (op2, GET_MODE (dst)))
- op2 = force_reg (GET_MODE (dst), op2);
- }
- op1 = SUBREG_REG (op1);
- if (!vector_operand (op1, GET_MODE (dst)))
- op1 = force_reg (GET_MODE (dst), op1);
- emit_insn (gen_rtx_SET (dst,
- gen_rtx_fmt_ee (code, GET_MODE (dst),
- op1, op2)));
- emit_move_insn (operands[0], gen_lowpart (mode, dst));
- return;
- default:
- break;
- }
- }
- if (!vector_operand (operands[1], mode))
- operands[1] = force_reg (mode, operands[1]);
- if (!vector_operand (operands[2], mode))
- operands[2] = force_reg (mode, operands[2]);
- ix86_fixup_binary_operands_no_copy (code, mode, operands);
- emit_insn (gen_rtx_SET (operands[0],
- gen_rtx_fmt_ee (code, mode, operands[1],
- operands[2])));
-}
-
-/* Return TRUE or FALSE depending on whether the binary operator meets the
- appropriate constraints. */
-
-bool
-ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
- rtx operands[3])
-{
- rtx dst = operands[0];
- rtx src1 = operands[1];
- rtx src2 = operands[2];
-
- /* Both source operands cannot be in memory. */
- if (MEM_P (src1) && MEM_P (src2))
- return false;
-
- /* Canonicalize operand order for commutative operators. */
- if (ix86_swap_binary_operands_p (code, mode, operands))
- std::swap (src1, src2);
-
- /* If the destination is memory, we must have a matching source operand. */
- if (MEM_P (dst) && !rtx_equal_p (dst, src1))
- return false;
-
- /* Source 1 cannot be a constant. */
- if (CONSTANT_P (src1))
- return false;
-
- /* Source 1 cannot be a non-matching memory. */
- if (MEM_P (src1) && !rtx_equal_p (dst, src1))
- /* Support "andhi/andsi/anddi" as a zero-extending move. */
- return (code == AND
- && (mode == HImode
- || mode == SImode
- || (TARGET_64BIT && mode == DImode))
- && satisfies_constraint_L (src2));
-
- return true;
-}
-
-/* Attempt to expand a unary operator. Make the expansion closer to the
- actual machine, then just general_operand, which will allow 2 separate
- memory references (one output, one input) in a single insn. */
-
-void
-ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
- rtx operands[])
-{
- bool matching_memory = false;
- rtx src, dst, op, clob;
-
- dst = operands[0];
- src = operands[1];
-
- /* If the destination is memory, and we do not have matching source
- operands, do things in registers. */
- if (MEM_P (dst))
- {
- if (rtx_equal_p (dst, src))
- matching_memory = true;
- else
- dst = gen_reg_rtx (mode);
- }
-
- /* When source operand is memory, destination must match. */
- if (MEM_P (src) && !matching_memory)
- src = force_reg (mode, src);
-
- /* Emit the instruction. */
-
- op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
-
- if (code == NOT)
- emit_insn (op);
- else
- {
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
- }
-
- /* Fix up the destination if needed. */
- if (dst != operands[0])
- emit_move_insn (operands[0], dst);
-}
-
-/* Predict just emitted jump instruction to be taken with probability PROB. */
-
-static void
-predict_jump (int prob)
-{
- rtx_insn *insn = get_last_insn ();
- gcc_assert (JUMP_P (insn));
- add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob));
-}
-
-/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
- divisor are within the range [0-255]. */
-
-void
-ix86_split_idivmod (machine_mode mode, rtx operands[],
- bool unsigned_p)
-{
- rtx_code_label *end_label, *qimode_label;
- rtx div, mod;
- rtx_insn *insn;
- rtx scratch, tmp0, tmp1, tmp2;
- rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
-
- switch (mode)
- {
- case E_SImode:
- if (GET_MODE (operands[0]) == SImode)
- {
- if (GET_MODE (operands[1]) == SImode)
- gen_divmod4_1 = unsigned_p ? gen_udivmodsi4_1 : gen_divmodsi4_1;
- else
- gen_divmod4_1
- = unsigned_p ? gen_udivmodsi4_zext_2 : gen_divmodsi4_zext_2;
- }
- else
- gen_divmod4_1
- = unsigned_p ? gen_udivmodsi4_zext_1 : gen_divmodsi4_zext_1;
- break;
-
- case E_DImode:
- gen_divmod4_1 = unsigned_p ? gen_udivmoddi4_1 : gen_divmoddi4_1;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- end_label = gen_label_rtx ();
- qimode_label = gen_label_rtx ();
-
- scratch = gen_reg_rtx (mode);
-
- /* Use 8bit unsigned divimod if dividend and divisor are within
- the range [0-255]. */
- emit_move_insn (scratch, operands[2]);
- scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
- scratch, 1, OPTAB_DIRECT);
- emit_insn (gen_test_ccno_1 (mode, scratch, GEN_INT (-0x100)));
- tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
- tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
- tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
- gen_rtx_LABEL_REF (VOIDmode, qimode_label),
- pc_rtx);
- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
- predict_jump (REG_BR_PROB_BASE * 50 / 100);
- JUMP_LABEL (insn) = qimode_label;
-
- /* Generate original signed/unsigned divimod. */
- div = gen_divmod4_1 (operands[0], operands[1],
- operands[2], operands[3]);
- emit_insn (div);
-
- /* Branch to the end. */
- emit_jump_insn (gen_jump (end_label));
- emit_barrier ();
-
- /* Generate 8bit unsigned divide. */
- emit_label (qimode_label);
- /* Don't use operands[0] for result of 8bit divide since not all
- registers support QImode ZERO_EXTRACT. */
- tmp0 = lowpart_subreg (HImode, scratch, mode);
- tmp1 = lowpart_subreg (HImode, operands[2], mode);
- tmp2 = lowpart_subreg (QImode, operands[3], mode);
- emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
-
- if (unsigned_p)
- {
- div = gen_rtx_UDIV (mode, operands[2], operands[3]);
- mod = gen_rtx_UMOD (mode, operands[2], operands[3]);
- }
- else
- {
- div = gen_rtx_DIV (mode, operands[2], operands[3]);
- mod = gen_rtx_MOD (mode, operands[2], operands[3]);
- }
- if (mode == SImode)
- {
- if (GET_MODE (operands[0]) != SImode)
- div = gen_rtx_ZERO_EXTEND (DImode, div);
- if (GET_MODE (operands[1]) != SImode)
- mod = gen_rtx_ZERO_EXTEND (DImode, mod);
- }
-
- /* Extract remainder from AH. */
- tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]),
- tmp0, GEN_INT (8), GEN_INT (8));
- if (REG_P (operands[1]))
- insn = emit_move_insn (operands[1], tmp1);
- else
- {
- /* Need a new scratch register since the old one has result
- of 8bit divide. */
- scratch = gen_reg_rtx (GET_MODE (operands[1]));
- emit_move_insn (scratch, tmp1);
- insn = emit_move_insn (operands[1], scratch);
- }
- set_unique_reg_note (insn, REG_EQUAL, mod);
-
- /* Zero extend quotient from AL. */
- tmp1 = gen_lowpart (QImode, tmp0);
- insn = emit_insn (gen_extend_insn
- (operands[0], tmp1,
- GET_MODE (operands[0]), QImode, 1));
- set_unique_reg_note (insn, REG_EQUAL, div);
-
- emit_label (end_label);
-}
-
-/* Emit x86 binary operand CODE in mode MODE, where the first operand
- matches destination. RTX includes clobber of FLAGS_REG. */
-
-void
-ix86_emit_binop (enum rtx_code code, machine_mode mode,
- rtx dst, rtx src)
-{
- rtx op, clob;
-
- op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
-}
-
-/* Return true if regno1 def is nearest to the insn. */
-
-static bool
-find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
-{
- rtx_insn *prev = insn;
- rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
-
- if (insn == start)
- return false;
- while (prev && prev != start)
- {
- if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
- {
- prev = PREV_INSN (prev);
- continue;
- }
- if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
- return true;
- else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
- return false;
- prev = PREV_INSN (prev);
- }
-
- /* None of the regs is defined in the bb. */
- return false;
-}
-
-/* Split lea instructions into a sequence of instructions
- which are executed on ALU to avoid AGU stalls.
- It is assumed that it is allowed to clobber flags register
- at lea position. */
-
-void
-ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
-{
- unsigned int regno0, regno1, regno2;
- struct ix86_address parts;
- rtx target, tmp;
- int ok, adds;
-
- ok = ix86_decompose_address (operands[1], &parts);
- gcc_assert (ok);
-
- target = gen_lowpart (mode, operands[0]);
-
- regno0 = true_regnum (target);
- regno1 = INVALID_REGNUM;
- regno2 = INVALID_REGNUM;
-
- if (parts.base)
- {
- parts.base = gen_lowpart (mode, parts.base);
- regno1 = true_regnum (parts.base);
- }
-
- if (parts.index)
- {
- parts.index = gen_lowpart (mode, parts.index);
- regno2 = true_regnum (parts.index);
- }
-
- if (parts.disp)
- parts.disp = gen_lowpart (mode, parts.disp);
-
- if (parts.scale > 1)
- {
- /* Case r1 = r1 + ... */
- if (regno1 == regno0)
- {
- /* If we have a case r1 = r1 + C * r2 then we
- should use multiplication which is very
- expensive. Assume cost model is wrong if we
- have such case here. */
- gcc_assert (regno2 != regno0);
-
- for (adds = parts.scale; adds > 0; adds--)
- ix86_emit_binop (PLUS, mode, target, parts.index);
- }
- else
- {
- /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
- if (regno0 != regno2)
- emit_insn (gen_rtx_SET (target, parts.index));
-
- /* Use shift for scaling. */
- ix86_emit_binop (ASHIFT, mode, target,
- GEN_INT (exact_log2 (parts.scale)));
-
- if (parts.base)
- ix86_emit_binop (PLUS, mode, target, parts.base);
-
- if (parts.disp && parts.disp != const0_rtx)
- ix86_emit_binop (PLUS, mode, target, parts.disp);
- }
- }
- else if (!parts.base && !parts.index)
- {
- gcc_assert(parts.disp);
- emit_insn (gen_rtx_SET (target, parts.disp));
- }
- else
- {
- if (!parts.base)
- {
- if (regno0 != regno2)
- emit_insn (gen_rtx_SET (target, parts.index));
- }
- else if (!parts.index)
- {
- if (regno0 != regno1)
- emit_insn (gen_rtx_SET (target, parts.base));
- }
- else
- {
- if (regno0 == regno1)
- tmp = parts.index;
- else if (regno0 == regno2)
- tmp = parts.base;
- else
- {
- rtx tmp1;
-
- /* Find better operand for SET instruction, depending
- on which definition is farther from the insn. */
- if (find_nearest_reg_def (insn, regno1, regno2))
- tmp = parts.index, tmp1 = parts.base;
- else
- tmp = parts.base, tmp1 = parts.index;
-
- emit_insn (gen_rtx_SET (target, tmp));
-
- if (parts.disp && parts.disp != const0_rtx)
- ix86_emit_binop (PLUS, mode, target, parts.disp);
-
- ix86_emit_binop (PLUS, mode, target, tmp1);
- return;
- }
-
- ix86_emit_binop (PLUS, mode, target, tmp);
- }
-
- if (parts.disp && parts.disp != const0_rtx)
- ix86_emit_binop (PLUS, mode, target, parts.disp);
- }
-}
-
-/* Post-reload splitter for converting an SF or DFmode value in an
- SSE register into an unsigned SImode. */
-
-void
-ix86_split_convert_uns_si_sse (rtx operands[])
-{
- machine_mode vecmode;
- rtx value, large, zero_or_two31, input, two31, x;
-
- large = operands[1];
- zero_or_two31 = operands[2];
- input = operands[3];
- two31 = operands[4];
- vecmode = GET_MODE (large);
- value = gen_rtx_REG (vecmode, REGNO (operands[0]));
-
- /* Load up the value into the low element. We must ensure that the other
- elements are valid floats -- zero is the easiest such value. */
- if (MEM_P (input))
- {
- if (vecmode == V4SFmode)
- emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
- else
- emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
- }
- else
- {
- input = gen_rtx_REG (vecmode, REGNO (input));
- emit_move_insn (value, CONST0_RTX (vecmode));
- if (vecmode == V4SFmode)
- emit_insn (gen_sse_movss (value, value, input));
- else
- emit_insn (gen_sse2_movsd (value, value, input));
- }
-
- emit_move_insn (large, two31);
- emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
-
- x = gen_rtx_fmt_ee (LE, vecmode, large, value);
- emit_insn (gen_rtx_SET (large, x));
-
- x = gen_rtx_AND (vecmode, zero_or_two31, large);
- emit_insn (gen_rtx_SET (zero_or_two31, x));
-
- x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
- emit_insn (gen_rtx_SET (value, x));
-
- large = gen_rtx_REG (V4SImode, REGNO (large));
- emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
-
- x = gen_rtx_REG (V4SImode, REGNO (value));
- if (vecmode == V4SFmode)
- emit_insn (gen_fix_truncv4sfv4si2 (x, value));
- else
- emit_insn (gen_sse2_cvttpd2dq (x, value));
- value = x;
-
- emit_insn (gen_xorv4si3 (value, value, large));
-}
-
-static bool ix86_expand_vector_init_one_nonzero (bool mmx_ok,
- machine_mode mode, rtx target,
- rtx var, int one_var);
-
-/* Convert an unsigned DImode value into a DFmode, using only SSE.
- Expects the 64-bit DImode to be supplied in a pair of integral
- registers. Requires SSE2; will use SSE3 if available. For x86_32,
- -mfpmath=sse, !optimize_size only. */
-
-void
-ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
-{
- REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
- rtx int_xmm, fp_xmm;
- rtx biases, exponents;
- rtx x;
-
- int_xmm = gen_reg_rtx (V4SImode);
- if (TARGET_INTER_UNIT_MOVES_TO_VEC)
- emit_insn (gen_movdi_to_sse (int_xmm, input));
- else if (TARGET_SSE_SPLIT_REGS)
- {
- emit_clobber (int_xmm);
- emit_move_insn (gen_lowpart (DImode, int_xmm), input);
- }
- else
- {
- x = gen_reg_rtx (V2DImode);
- ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
- emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
- }
-
- x = gen_rtx_CONST_VECTOR (V4SImode,
- gen_rtvec (4, GEN_INT (0x43300000UL),
- GEN_INT (0x45300000UL),
- const0_rtx, const0_rtx));
- exponents = validize_mem (force_const_mem (V4SImode, x));
-
- /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
- emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
-
- /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
- yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
- Similarly (0x45300000UL ## fp_value_hi_xmm) yields
- (0x1.0p84 + double(fp_value_hi_xmm)).
- Note these exponents differ by 32. */
-
- fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
-
- /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
- in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
- real_ldexp (&bias_lo_rvt, &dconst1, 52);
- real_ldexp (&bias_hi_rvt, &dconst1, 84);
- biases = const_double_from_real_value (bias_lo_rvt, DFmode);
- x = const_double_from_real_value (bias_hi_rvt, DFmode);
- biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
- biases = validize_mem (force_const_mem (V2DFmode, biases));
- emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
-
- /* Add the upper and lower DFmode values together. */
- if (TARGET_SSE3)
- emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
- else
- {
- x = copy_to_mode_reg (V2DFmode, fp_xmm);
- emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
- emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
- }
-
- ix86_expand_vector_extract (false, target, fp_xmm, 0);
-}
-
-/* Not used, but eases macroization of patterns. */
-void
-ix86_expand_convert_uns_sixf_sse (rtx, rtx)
-{
- gcc_unreachable ();
-}
-
-/* Convert an unsigned SImode value into a DFmode. Only currently used
- for SSE, but applicable anywhere. */
-
-void
-ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
-{
- REAL_VALUE_TYPE TWO31r;
- rtx x, fp;
-
- x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
- NULL, 1, OPTAB_DIRECT);
-
- fp = gen_reg_rtx (DFmode);
- emit_insn (gen_floatsidf2 (fp, x));
-
- real_ldexp (&TWO31r, &dconst1, 31);
- x = const_double_from_real_value (TWO31r, DFmode);
-
- x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
- if (x != target)
- emit_move_insn (target, x);
-}
-
-/* Convert a signed DImode value into a DFmode. Only used for SSE in
- 32-bit mode; otherwise we have a direct convert instruction. */
-
-void
-ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
-{
- REAL_VALUE_TYPE TWO32r;
- rtx fp_lo, fp_hi, x;
-
- fp_lo = gen_reg_rtx (DFmode);
- fp_hi = gen_reg_rtx (DFmode);
-
- emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
-
- real_ldexp (&TWO32r, &dconst1, 32);
- x = const_double_from_real_value (TWO32r, DFmode);
- fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
-
- ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
-
- x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
- 0, OPTAB_DIRECT);
- if (x != target)
- emit_move_insn (target, x);
-}
-
-/* Convert an unsigned SImode value into a SFmode, using only SSE.
- For x86_32, -mfpmath=sse, !optimize_size only. */
-void
-ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
-{
- REAL_VALUE_TYPE ONE16r;
- rtx fp_hi, fp_lo, int_hi, int_lo, x;
-
- real_ldexp (&ONE16r, &dconst1, 16);
- x = const_double_from_real_value (ONE16r, SFmode);
- int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
- NULL, 0, OPTAB_DIRECT);
- int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
- NULL, 0, OPTAB_DIRECT);
- fp_hi = gen_reg_rtx (SFmode);
- fp_lo = gen_reg_rtx (SFmode);
- emit_insn (gen_floatsisf2 (fp_hi, int_hi));
- emit_insn (gen_floatsisf2 (fp_lo, int_lo));
- fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
- 0, OPTAB_DIRECT);
- fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
- 0, OPTAB_DIRECT);
- if (!rtx_equal_p (target, fp_hi))
- emit_move_insn (target, fp_hi);
-}
-
-/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
- a vector of unsigned ints VAL to vector of floats TARGET. */
-
-void
-ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
-{
- rtx tmp[8];
- REAL_VALUE_TYPE TWO16r;
- machine_mode intmode = GET_MODE (val);
- machine_mode fltmode = GET_MODE (target);
- rtx (*cvt) (rtx, rtx);
-
- if (intmode == V4SImode)
- cvt = gen_floatv4siv4sf2;
- else
- cvt = gen_floatv8siv8sf2;
- tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
- tmp[0] = force_reg (intmode, tmp[0]);
- tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
- OPTAB_DIRECT);
- tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
- NULL_RTX, 1, OPTAB_DIRECT);
- tmp[3] = gen_reg_rtx (fltmode);
- emit_insn (cvt (tmp[3], tmp[1]));
- tmp[4] = gen_reg_rtx (fltmode);
- emit_insn (cvt (tmp[4], tmp[2]));
- real_ldexp (&TWO16r, &dconst1, 16);
- tmp[5] = const_double_from_real_value (TWO16r, SFmode);
- tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
- tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
- OPTAB_DIRECT);
- tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
- OPTAB_DIRECT);
- if (tmp[7] != target)
- emit_move_insn (target, tmp[7]);
-}
-
-/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
- pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
- This is done by doing just signed conversion if < 0x1p31, and otherwise by
- subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
-
-rtx
-ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
-{
- REAL_VALUE_TYPE TWO31r;
- rtx two31r, tmp[4];
- machine_mode mode = GET_MODE (val);
- machine_mode scalarmode = GET_MODE_INNER (mode);
- machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
- rtx (*cmp) (rtx, rtx, rtx, rtx);
- int i;
-
- for (i = 0; i < 3; i++)
- tmp[i] = gen_reg_rtx (mode);
- real_ldexp (&TWO31r, &dconst1, 31);
- two31r = const_double_from_real_value (TWO31r, scalarmode);
- two31r = ix86_build_const_vector (mode, 1, two31r);
- two31r = force_reg (mode, two31r);
- switch (mode)
- {
- case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
- case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
- case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
- case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
- default: gcc_unreachable ();
- }
- tmp[3] = gen_rtx_LE (mode, two31r, val);
- emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
- tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
- 0, OPTAB_DIRECT);
- if (intmode == V4SImode || TARGET_AVX2)
- *xorp = expand_simple_binop (intmode, ASHIFT,
- gen_lowpart (intmode, tmp[0]),
- GEN_INT (31), NULL_RTX, 0,
- OPTAB_DIRECT);
- else
- {
- rtx two31 = gen_int_mode (HOST_WIDE_INT_1U << 31, SImode);
- two31 = ix86_build_const_vector (intmode, 1, two31);
- *xorp = expand_simple_binop (intmode, AND,
- gen_lowpart (intmode, tmp[0]),
- two31, NULL_RTX, 0,
- OPTAB_DIRECT);
- }
- return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
- 0, OPTAB_DIRECT);
-}
-
-/* Generate code for floating point ABS or NEG. */
-
-void
-ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
- rtx operands[])
-{
- rtx set, dst, src;
- bool use_sse = false;
- bool vector_mode = VECTOR_MODE_P (mode);
- machine_mode vmode = mode;
- rtvec par;
-
- if (vector_mode || mode == TFmode)
- use_sse = true;
- else if (TARGET_SSE_MATH)
- {
- use_sse = SSE_FLOAT_MODE_P (mode);
- if (mode == SFmode)
- vmode = V4SFmode;
- else if (mode == DFmode)
- vmode = V2DFmode;
- }
-
- dst = operands[0];
- src = operands[1];
-
- set = gen_rtx_fmt_e (code, mode, src);
- set = gen_rtx_SET (dst, set);
-
- if (use_sse)
- {
- rtx mask, use, clob;
-
- /* NEG and ABS performed with SSE use bitwise mask operations.
- Create the appropriate mask now. */
- mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
- use = gen_rtx_USE (VOIDmode, mask);
- if (vector_mode || mode == TFmode)
- par = gen_rtvec (2, set, use);
- else
- {
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- par = gen_rtvec (3, set, use, clob);
- }
- }
- else
- {
- rtx clob;
-
- /* Changing of sign for FP values is doable using integer unit too. */
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- par = gen_rtvec (2, set, clob);
- }
-
- emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
-}
-
-/* Deconstruct a floating point ABS or NEG operation
- with integer registers into integer operations. */
-
-void
-ix86_split_fp_absneg_operator (enum rtx_code code, machine_mode mode,
- rtx operands[])
-{
- enum rtx_code absneg_op;
- rtx dst, set;
-
- gcc_assert (operands_match_p (operands[0], operands[1]));
-
- switch (mode)
- {
- case E_SFmode:
- dst = gen_lowpart (SImode, operands[0]);
-
- if (code == ABS)
- {
- set = gen_int_mode (0x7fffffff, SImode);
- absneg_op = AND;
- }
- else
- {
- set = gen_int_mode (0x80000000, SImode);
- absneg_op = XOR;
- }
- set = gen_rtx_fmt_ee (absneg_op, SImode, dst, set);
- break;
-
- case E_DFmode:
- if (TARGET_64BIT)
- {
- dst = gen_lowpart (DImode, operands[0]);
- dst = gen_rtx_ZERO_EXTRACT (DImode, dst, const1_rtx, GEN_INT (63));
-
- if (code == ABS)
- set = const0_rtx;
- else
- set = gen_rtx_NOT (DImode, dst);
- }
- else
- {
- dst = gen_highpart (SImode, operands[0]);
-
- if (code == ABS)
- {
- set = gen_int_mode (0x7fffffff, SImode);
- absneg_op = AND;
- }
- else
- {
- set = gen_int_mode (0x80000000, SImode);
- absneg_op = XOR;
- }
- set = gen_rtx_fmt_ee (absneg_op, SImode, dst, set);
- }
- break;
-
- case E_XFmode:
- dst = gen_rtx_REG (SImode,
- REGNO (operands[0]) + (TARGET_64BIT ? 1 : 2));
- if (code == ABS)
- {
- set = GEN_INT (0x7fff);
- absneg_op = AND;
- }
- else
- {
- set = GEN_INT (0x8000);
- absneg_op = XOR;
- }
- set = gen_rtx_fmt_ee (absneg_op, SImode, dst, set);
- break;
-
- default:
- gcc_unreachable ();
- }
-
- set = gen_rtx_SET (dst, set);
-
- rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- rtvec par = gen_rtvec (2, set, clob);
-
- emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
-}
-
-/* Expand a copysign operation. Special case operand 0 being a constant. */
-
-void
-ix86_expand_copysign (rtx operands[])
-{
- machine_mode mode, vmode;
- rtx dest, op0, op1, mask;
-
- dest = operands[0];
- op0 = operands[1];
- op1 = operands[2];
-
- mode = GET_MODE (dest);
-
- if (mode == SFmode)
- vmode = V4SFmode;
- else if (mode == DFmode)
- vmode = V2DFmode;
- else if (mode == TFmode)
- vmode = mode;
- else
- gcc_unreachable ();
-
- mask = ix86_build_signbit_mask (vmode, 0, 0);
-
- if (CONST_DOUBLE_P (op0))
- {
- if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
- op0 = simplify_unary_operation (ABS, mode, op0, mode);
-
- if (mode == SFmode || mode == DFmode)
- {
- if (op0 == CONST0_RTX (mode))
- op0 = CONST0_RTX (vmode);
- else
- {
- rtx v = ix86_build_const_vector (vmode, false, op0);
-
- op0 = force_reg (vmode, v);
- }
- }
- else if (op0 != CONST0_RTX (mode))
- op0 = force_reg (mode, op0);
-
- emit_insn (gen_copysign3_const (mode, dest, op0, op1, mask));
- }
- else
- {
- rtx nmask = ix86_build_signbit_mask (vmode, 0, 1);
-
- emit_insn (gen_copysign3_var
- (mode, dest, NULL_RTX, op0, op1, nmask, mask));
- }
-}
-
-/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
- be a constant, and so has already been expanded into a vector constant. */
-
-void
-ix86_split_copysign_const (rtx operands[])
-{
- machine_mode mode, vmode;
- rtx dest, op0, mask, x;
-
- dest = operands[0];
- op0 = operands[1];
- mask = operands[3];
-
- mode = GET_MODE (dest);
- vmode = GET_MODE (mask);
-
- dest = lowpart_subreg (vmode, dest, mode);
- x = gen_rtx_AND (vmode, dest, mask);
- emit_insn (gen_rtx_SET (dest, x));
-
- if (op0 != CONST0_RTX (vmode))
- {
- x = gen_rtx_IOR (vmode, dest, op0);
- emit_insn (gen_rtx_SET (dest, x));
- }
-}
-
-/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
- so we have to do two masks. */
-
-void
-ix86_split_copysign_var (rtx operands[])
-{
- machine_mode mode, vmode;
- rtx dest, scratch, op0, op1, mask, nmask, x;
-
- dest = operands[0];
- scratch = operands[1];
- op0 = operands[2];
- op1 = operands[3];
- nmask = operands[4];
- mask = operands[5];
-
- mode = GET_MODE (dest);
- vmode = GET_MODE (mask);
-
- if (rtx_equal_p (op0, op1))
- {
- /* Shouldn't happen often (it's useless, obviously), but when it does
- we'd generate incorrect code if we continue below. */
- emit_move_insn (dest, op0);
- return;
- }
-
- if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
- {
- gcc_assert (REGNO (op1) == REGNO (scratch));
-
- x = gen_rtx_AND (vmode, scratch, mask);
- emit_insn (gen_rtx_SET (scratch, x));
-
- dest = mask;
- op0 = lowpart_subreg (vmode, op0, mode);
- x = gen_rtx_NOT (vmode, dest);
- x = gen_rtx_AND (vmode, x, op0);
- emit_insn (gen_rtx_SET (dest, x));
- }
- else
- {
- if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
- {
- x = gen_rtx_AND (vmode, scratch, mask);
- }
- else /* alternative 2,4 */
- {
- gcc_assert (REGNO (mask) == REGNO (scratch));
- op1 = lowpart_subreg (vmode, op1, mode);
- x = gen_rtx_AND (vmode, scratch, op1);
- }
- emit_insn (gen_rtx_SET (scratch, x));
-
- if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
- {
- dest = lowpart_subreg (vmode, op0, mode);
- x = gen_rtx_AND (vmode, dest, nmask);
- }
- else /* alternative 3,4 */
- {
- gcc_assert (REGNO (nmask) == REGNO (dest));
- dest = nmask;
- op0 = lowpart_subreg (vmode, op0, mode);
- x = gen_rtx_AND (vmode, dest, op0);
- }
- emit_insn (gen_rtx_SET (dest, x));
- }
-
- x = gen_rtx_IOR (vmode, dest, scratch);
- emit_insn (gen_rtx_SET (dest, x));
-}
-
-/* Expand an xorsign operation. */
-
-void
-ix86_expand_xorsign (rtx operands[])
-{
- machine_mode mode, vmode;
- rtx dest, op0, op1, mask;
-
- dest = operands[0];
- op0 = operands[1];
- op1 = operands[2];
-
- mode = GET_MODE (dest);
-
- if (mode == SFmode)
- vmode = V4SFmode;
- else if (mode == DFmode)
- vmode = V2DFmode;
- else
- gcc_unreachable ();
-
- mask = ix86_build_signbit_mask (vmode, 0, 0);
-
- emit_insn (gen_xorsign3_1 (mode, dest, op0, op1, mask));
-}
-
-/* Deconstruct an xorsign operation into bit masks. */
-
-void
-ix86_split_xorsign (rtx operands[])
-{
- machine_mode mode, vmode;
- rtx dest, op0, mask, x;
-
- dest = operands[0];
- op0 = operands[1];
- mask = operands[3];
-
- mode = GET_MODE (dest);
- vmode = GET_MODE (mask);
-
- dest = lowpart_subreg (vmode, dest, mode);
- x = gen_rtx_AND (vmode, dest, mask);
- emit_insn (gen_rtx_SET (dest, x));
-
- op0 = lowpart_subreg (vmode, op0, mode);
- x = gen_rtx_XOR (vmode, dest, op0);
- emit_insn (gen_rtx_SET (dest, x));
-}
-
-static rtx ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1);
-
-void
-ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
-{
- machine_mode mode = GET_MODE (op0);
- rtx tmp;
-
- /* Handle special case - vector comparsion with boolean result, transform
- it using ptest instruction. */
- if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
- {
- rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG);
- machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode;
-
- gcc_assert (code == EQ || code == NE);
- /* Generate XOR since we can't check that one operand is zero vector. */
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1)));
- tmp = gen_lowpart (p_mode, tmp);
- emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG),
- gen_rtx_UNSPEC (CCmode,
- gen_rtvec (2, tmp, tmp),
- UNSPEC_PTEST)));
- tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode, label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
- return;
- }
-
- switch (mode)
- {
- case E_SFmode:
- case E_DFmode:
- case E_XFmode:
- case E_QImode:
- case E_HImode:
- case E_SImode:
- simple:
- tmp = ix86_expand_compare (code, op0, op1);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode, label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
- return;
-
- case E_DImode:
- if (TARGET_64BIT)
- goto simple;
- /* For 32-bit target DI comparison may be performed on
- SSE registers. To allow this we should avoid split
- to SI mode which is achieved by doing xor in DI mode
- and then comparing with zero (which is recognized by
- STV pass). We don't compare using xor when optimizing
- for size. */
- if (!optimize_insn_for_size_p ()
- && TARGET_STV
- && (code == EQ || code == NE))
- {
- op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
- op1 = const0_rtx;
- }
- /* FALLTHRU */
- case E_TImode:
- /* Expand DImode branch into multiple compare+branch. */
- {
- rtx lo[2], hi[2];
- rtx_code_label *label2;
- enum rtx_code code1, code2, code3;
- machine_mode submode;
-
- if (CONSTANT_P (op0) && !CONSTANT_P (op1))
- {
- std::swap (op0, op1);
- code = swap_condition (code);
- }
-
- split_double_mode (mode, &op0, 1, lo+0, hi+0);
- split_double_mode (mode, &op1, 1, lo+1, hi+1);
-
- submode = mode == DImode ? SImode : DImode;
-
- /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
- avoid two branches. This costs one extra insn, so disable when
- optimizing for size. */
-
- if ((code == EQ || code == NE)
- && (!optimize_insn_for_size_p ()
- || hi[1] == const0_rtx || lo[1] == const0_rtx))
- {
- rtx xor0, xor1;
-
- xor1 = hi[0];
- if (hi[1] != const0_rtx)
- xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
- NULL_RTX, 0, OPTAB_WIDEN);
-
- xor0 = lo[0];
- if (lo[1] != const0_rtx)
- xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
- NULL_RTX, 0, OPTAB_WIDEN);
-
- tmp = expand_binop (submode, ior_optab, xor1, xor0,
- NULL_RTX, 0, OPTAB_WIDEN);
-
- ix86_expand_branch (code, tmp, const0_rtx, label);
- return;
- }
-
- /* Otherwise, if we are doing less-than or greater-or-equal-than,
- op1 is a constant and the low word is zero, then we can just
- examine the high word. Similarly for low word -1 and
- less-or-equal-than or greater-than. */
-
- if (CONST_INT_P (hi[1]))
- switch (code)
- {
- case LT: case LTU: case GE: case GEU:
- if (lo[1] == const0_rtx)
- {
- ix86_expand_branch (code, hi[0], hi[1], label);
- return;
- }
- break;
- case LE: case LEU: case GT: case GTU:
- if (lo[1] == constm1_rtx)
- {
- ix86_expand_branch (code, hi[0], hi[1], label);
- return;
- }
- break;
- default:
- break;
- }
-
- /* Emulate comparisons that do not depend on Zero flag with
- double-word subtraction. Note that only Overflow, Sign
- and Carry flags are valid, so swap arguments and condition
- of comparisons that would otherwise test Zero flag. */
-
- switch (code)
- {
- case LE: case LEU: case GT: case GTU:
- std::swap (lo[0], lo[1]);
- std::swap (hi[0], hi[1]);
- code = swap_condition (code);
- /* FALLTHRU */
-
- case LT: case LTU: case GE: case GEU:
- {
- bool uns = (code == LTU || code == GEU);
- rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx)
- = uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz;
-
- if (!nonimmediate_operand (lo[0], submode))
- lo[0] = force_reg (submode, lo[0]);
- if (!x86_64_general_operand (lo[1], submode))
- lo[1] = force_reg (submode, lo[1]);
-
- if (!register_operand (hi[0], submode))
- hi[0] = force_reg (submode, hi[0]);
- if ((uns && !nonimmediate_operand (hi[1], submode))
- || (!uns && !x86_64_general_operand (hi[1], submode)))
- hi[1] = force_reg (submode, hi[1]);
-
- emit_insn (gen_cmp_1 (submode, lo[0], lo[1]));
-
- tmp = gen_rtx_SCRATCH (submode);
- emit_insn (sbb_insn (submode, tmp, hi[0], hi[1]));
-
- tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
- ix86_expand_branch (code, tmp, const0_rtx, label);
- return;
- }
-
- default:
- break;
- }
-
- /* Otherwise, we need two or three jumps. */
-
- label2 = gen_label_rtx ();
-
- code1 = code;
- code2 = swap_condition (code);
- code3 = unsigned_condition (code);
-
- switch (code)
- {
- case LT: case GT: case LTU: case GTU:
- break;
-
- case LE: code1 = LT; code2 = GT; break;
- case GE: code1 = GT; code2 = LT; break;
- case LEU: code1 = LTU; code2 = GTU; break;
- case GEU: code1 = GTU; code2 = LTU; break;
-
- case EQ: code1 = UNKNOWN; code2 = NE; break;
- case NE: code2 = UNKNOWN; break;
-
- default:
- gcc_unreachable ();
- }
-
- /*
- * a < b =>
- * if (hi(a) < hi(b)) goto true;
- * if (hi(a) > hi(b)) goto false;
- * if (lo(a) < lo(b)) goto true;
- * false:
- */
-
- if (code1 != UNKNOWN)
- ix86_expand_branch (code1, hi[0], hi[1], label);
- if (code2 != UNKNOWN)
- ix86_expand_branch (code2, hi[0], hi[1], label2);
-
- ix86_expand_branch (code3, lo[0], lo[1], label);
-
- if (code2 != UNKNOWN)
- emit_label (label2);
- return;
- }
-
- default:
- gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
- goto simple;
- }
-}
-
-/* Figure out whether to use unordered fp comparisons. */
-
-static bool
-ix86_unordered_fp_compare (enum rtx_code code)
-{
- if (!TARGET_IEEE_FP)
- return false;
-
- switch (code)
- {
- case LT:
- case LE:
- case GT:
- case GE:
- case LTGT:
- return false;
-
- case EQ:
- case NE:
-
- case UNORDERED:
- case ORDERED:
- case UNLT:
- case UNLE:
- case UNGT:
- case UNGE:
- case UNEQ:
- return true;
-
- default:
- gcc_unreachable ();
- }
-}
-
-/* Return a comparison we can do and that it is equivalent to
- swap_condition (code) apart possibly from orderedness.
- But, never change orderedness if TARGET_IEEE_FP, returning
- UNKNOWN in that case if necessary. */
-
-static enum rtx_code
-ix86_fp_swap_condition (enum rtx_code code)
-{
- switch (code)
- {
- case GT: /* GTU - CF=0 & ZF=0 */
- return TARGET_IEEE_FP ? UNKNOWN : UNLT;
- case GE: /* GEU - CF=0 */
- return TARGET_IEEE_FP ? UNKNOWN : UNLE;
- case UNLT: /* LTU - CF=1 */
- return TARGET_IEEE_FP ? UNKNOWN : GT;
- case UNLE: /* LEU - CF=1 | ZF=1 */
- return TARGET_IEEE_FP ? UNKNOWN : GE;
- default:
- return swap_condition (code);
- }
-}
-
-/* Return cost of comparison CODE using the best strategy for performance.
- All following functions do use number of instructions as a cost metrics.
- In future this should be tweaked to compute bytes for optimize_size and
- take into account performance of various instructions on various CPUs. */
-
-static int
-ix86_fp_comparison_cost (enum rtx_code code)
-{
- int arith_cost;
-
- /* The cost of code using bit-twiddling on %ah. */
- switch (code)
- {
- case UNLE:
- case UNLT:
- case LTGT:
- case GT:
- case GE:
- case UNORDERED:
- case ORDERED:
- case UNEQ:
- arith_cost = 4;
- break;
- case LT:
- case NE:
- case EQ:
- case UNGE:
- arith_cost = TARGET_IEEE_FP ? 5 : 4;
- break;
- case LE:
- case UNGT:
- arith_cost = TARGET_IEEE_FP ? 6 : 4;
- break;
- default:
- gcc_unreachable ();
- }
-
- switch (ix86_fp_comparison_strategy (code))
- {
- case IX86_FPCMP_COMI:
- return arith_cost > 4 ? 3 : 2;
- case IX86_FPCMP_SAHF:
- return arith_cost > 4 ? 4 : 3;
- default:
- return arith_cost;
- }
-}
-
-/* Swap, force into registers, or otherwise massage the two operands
- to a fp comparison. The operands are updated in place; the new
- comparison code is returned. */
-
-static enum rtx_code
-ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
-{
- bool unordered_compare = ix86_unordered_fp_compare (code);
- rtx op0 = *pop0, op1 = *pop1;
- machine_mode op_mode = GET_MODE (op0);
- bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
-
- /* All of the unordered compare instructions only work on registers.
- The same is true of the fcomi compare instructions. The XFmode
- compare instructions require registers except when comparing
- against zero or when converting operand 1 from fixed point to
- floating point. */
-
- if (!is_sse
- && (unordered_compare
- || (op_mode == XFmode
- && ! (standard_80387_constant_p (op0) == 1
- || standard_80387_constant_p (op1) == 1)
- && GET_CODE (op1) != FLOAT)
- || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
- {
- op0 = force_reg (op_mode, op0);
- op1 = force_reg (op_mode, op1);
- }
- else
- {
- /* %%% We only allow op1 in memory; op0 must be st(0). So swap
- things around if they appear profitable, otherwise force op0
- into a register. */
-
- if (standard_80387_constant_p (op0) == 0
- || (MEM_P (op0)
- && ! (standard_80387_constant_p (op1) == 0
- || MEM_P (op1))))
- {
- enum rtx_code new_code = ix86_fp_swap_condition (code);
- if (new_code != UNKNOWN)
- {
- std::swap (op0, op1);
- code = new_code;
- }
- }
-
- if (!REG_P (op0))
- op0 = force_reg (op_mode, op0);
-
- if (CONSTANT_P (op1))
- {
- int tmp = standard_80387_constant_p (op1);
- if (tmp == 0)
- op1 = validize_mem (force_const_mem (op_mode, op1));
- else if (tmp == 1)
- {
- if (TARGET_CMOVE)
- op1 = force_reg (op_mode, op1);
- }
- else
- op1 = force_reg (op_mode, op1);
- }
- }
-
- /* Try to rearrange the comparison to make it cheaper. */
- if (ix86_fp_comparison_cost (code)
- > ix86_fp_comparison_cost (swap_condition (code))
- && (REG_P (op1) || can_create_pseudo_p ()))
- {
- std::swap (op0, op1);
- code = swap_condition (code);
- if (!REG_P (op0))
- op0 = force_reg (op_mode, op0);
- }
-
- *pop0 = op0;
- *pop1 = op1;
- return code;
-}
-
-/* Generate insn patterns to do a floating point compare of OPERANDS. */
-
-static rtx
-ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1)
-{
- bool unordered_compare = ix86_unordered_fp_compare (code);
- machine_mode cmp_mode;
- rtx tmp, scratch;
-
- code = ix86_prepare_fp_compare_args (code, &op0, &op1);
-
- tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
- if (unordered_compare)
- tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
-
- /* Do fcomi/sahf based test when profitable. */
- switch (ix86_fp_comparison_strategy (code))
- {
- case IX86_FPCMP_COMI:
- cmp_mode = CCFPmode;
- emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp));
- break;
-
- case IX86_FPCMP_SAHF:
- cmp_mode = CCFPmode;
- tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
- scratch = gen_reg_rtx (HImode);
- emit_insn (gen_rtx_SET (scratch, tmp));
- emit_insn (gen_x86_sahf_1 (scratch));
- break;
-
- case IX86_FPCMP_ARITH:
- cmp_mode = CCNOmode;
- tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
- scratch = gen_reg_rtx (HImode);
- emit_insn (gen_rtx_SET (scratch, tmp));
-
- /* In the unordered case, we have to check C2 for NaN's, which
- doesn't happen to work out to anything nice combination-wise.
- So do some bit twiddling on the value we've got in AH to come
- up with an appropriate set of condition codes. */
-
- switch (code)
- {
- case GT:
- case UNGT:
- if (code == GT || !TARGET_IEEE_FP)
- {
- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
- code = EQ;
- }
- else
- {
- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
- cmp_mode = CCmode;
- code = GEU;
- }
- break;
- case LT:
- case UNLT:
- if (code == LT && TARGET_IEEE_FP)
- {
- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
- cmp_mode = CCmode;
- code = EQ;
- }
- else
- {
- emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx));
- code = NE;
- }
- break;
- case GE:
- case UNGE:
- if (code == GE || !TARGET_IEEE_FP)
- {
- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05)));
- code = EQ;
- }
- else
- {
- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx));
- code = NE;
- }
- break;
- case LE:
- case UNLE:
- if (code == LE && TARGET_IEEE_FP)
- {
- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
- cmp_mode = CCmode;
- code = LTU;
- }
- else
- {
- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45)));
- code = NE;
- }
- break;
- case EQ:
- case UNEQ:
- if (code == EQ && TARGET_IEEE_FP)
- {
- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
- cmp_mode = CCmode;
- code = EQ;
- }
- else
- {
- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
- code = NE;
- }
- break;
- case NE:
- case LTGT:
- if (code == NE && TARGET_IEEE_FP)
- {
- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45)));
- emit_insn (gen_xorqi_ext_1_cc (scratch, scratch,
- GEN_INT (0x40)));
- code = NE;
- }
- else
- {
- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40)));
- code = EQ;
- }
- break;
-
- case UNORDERED:
- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
- code = NE;
- break;
- case ORDERED:
- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04)));
- code = EQ;
- break;
-
- default:
- gcc_unreachable ();
- }
- break;
-
- default:
- gcc_unreachable();
- }
-
- /* Return the test that should be put into the flags user, i.e.
- the bcc, scc, or cmov instruction. */
- return gen_rtx_fmt_ee (code, VOIDmode,
- gen_rtx_REG (cmp_mode, FLAGS_REG),
- const0_rtx);
-}
-
-/* Generate insn patterns to do an integer compare of OPERANDS. */
-
-static rtx
-ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
-{
- machine_mode cmpmode;
- rtx tmp, flags;
-
- cmpmode = SELECT_CC_MODE (code, op0, op1);
- flags = gen_rtx_REG (cmpmode, FLAGS_REG);
-
- /* This is very simple, but making the interface the same as in the
- FP case makes the rest of the code easier. */
- tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
- emit_insn (gen_rtx_SET (flags, tmp));
-
- /* Return the test that should be put into the flags user, i.e.
- the bcc, scc, or cmov instruction. */
- return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
-}
-
-static rtx
-ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
-{
- rtx ret;
-
- if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
- ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
-
- else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
- {
- gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
- ret = ix86_expand_fp_compare (code, op0, op1);
- }
- else
- ret = ix86_expand_int_compare (code, op0, op1);
-
- return ret;
-}
-
-void
-ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
-{
- rtx ret;
-
- gcc_assert (GET_MODE (dest) == QImode);
-
- ret = ix86_expand_compare (code, op0, op1);
- PUT_MODE (ret, QImode);
- emit_insn (gen_rtx_SET (dest, ret));
-}
-
-/* Expand comparison setting or clearing carry flag. Return true when
- successful and set pop for the operation. */
-static bool
-ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
-{
- machine_mode mode
- = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
-
- /* Do not handle double-mode compares that go through special path. */
- if (mode == (TARGET_64BIT ? TImode : DImode))
- return false;
-
- if (SCALAR_FLOAT_MODE_P (mode))
- {
- rtx compare_op;
- rtx_insn *compare_seq;
-
- gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
-
- /* Shortcut: following common codes never translate
- into carry flag compares. */
- if (code == EQ || code == NE || code == UNEQ || code == LTGT
- || code == ORDERED || code == UNORDERED)
- return false;
-
- /* These comparisons require zero flag; swap operands so they won't. */
- if ((code == GT || code == UNLE || code == LE || code == UNGT)
- && !TARGET_IEEE_FP)
- {
- std::swap (op0, op1);
- code = swap_condition (code);
- }
-
- /* Try to expand the comparison and verify that we end up with
- carry flag based comparison. This fails to be true only when
- we decide to expand comparison using arithmetic that is not
- too common scenario. */
- start_sequence ();
- compare_op = ix86_expand_fp_compare (code, op0, op1);
- compare_seq = get_insns ();
- end_sequence ();
-
- if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
- code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
- else
- code = GET_CODE (compare_op);
-
- if (code != LTU && code != GEU)
- return false;
-
- emit_insn (compare_seq);
- *pop = compare_op;
- return true;
- }
-
- if (!INTEGRAL_MODE_P (mode))
- return false;
-
- switch (code)
- {
- case LTU:
- case GEU:
- break;
-
- /* Convert a==0 into (unsigned)a<1. */
- case EQ:
- case NE:
- if (op1 != const0_rtx)
- return false;
- op1 = const1_rtx;
- code = (code == EQ ? LTU : GEU);
- break;
-
- /* Convert a>b into b<a or a>=b-1. */
- case GTU:
- case LEU:
- if (CONST_INT_P (op1))
- {
- op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
- /* Bail out on overflow. We still can swap operands but that
- would force loading of the constant into register. */
- if (op1 == const0_rtx
- || !x86_64_immediate_operand (op1, GET_MODE (op1)))
- return false;
- code = (code == GTU ? GEU : LTU);
- }
- else
- {
- std::swap (op0, op1);
- code = (code == GTU ? LTU : GEU);
- }
- break;
-
- /* Convert a>=0 into (unsigned)a<0x80000000. */
- case LT:
- case GE:
- if (mode == DImode || op1 != const0_rtx)
- return false;
- op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
- code = (code == LT ? GEU : LTU);
- break;
- case LE:
- case GT:
- if (mode == DImode || op1 != constm1_rtx)
- return false;
- op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
- code = (code == LE ? GEU : LTU);
- break;
-
- default:
- return false;
- }
- /* Swapping operands may cause constant to appear as first operand. */
- if (!nonimmediate_operand (op0, VOIDmode))
- {
- if (!can_create_pseudo_p ())
- return false;
- op0 = force_reg (mode, op0);
- }
- *pop = ix86_expand_compare (code, op0, op1);
- gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
- return true;
-}
-
-/* Expand conditional increment or decrement using adb/sbb instructions.
- The default case using setcc followed by the conditional move can be
- done by generic code. */
-bool
-ix86_expand_int_addcc (rtx operands[])
-{
- enum rtx_code code = GET_CODE (operands[1]);
- rtx flags;
- rtx (*insn) (machine_mode, rtx, rtx, rtx, rtx, rtx);
- rtx compare_op;
- rtx val = const0_rtx;
- bool fpcmp = false;
- machine_mode mode;
- rtx op0 = XEXP (operands[1], 0);
- rtx op1 = XEXP (operands[1], 1);
-
- if (operands[3] != const1_rtx
- && operands[3] != constm1_rtx)
- return false;
- if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
- return false;
- code = GET_CODE (compare_op);
-
- flags = XEXP (compare_op, 0);
-
- if (GET_MODE (flags) == CCFPmode)
- {
- fpcmp = true;
- code = ix86_fp_compare_code_to_integer (code);
- }
-
- if (code != LTU)
- {
- val = constm1_rtx;
- if (fpcmp)
- PUT_CODE (compare_op,
- reverse_condition_maybe_unordered
- (GET_CODE (compare_op)));
- else
- PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
- }
-
- mode = GET_MODE (operands[0]);
-
- /* Construct either adc or sbb insn. */
- if ((code == LTU) == (operands[3] == constm1_rtx))
- insn = gen_sub3_carry;
- else
- insn = gen_add3_carry;
-
- emit_insn (insn (mode, operands[0], operands[2], val, flags, compare_op));
-
- return true;
-}
-
-bool
-ix86_expand_int_movcc (rtx operands[])
-{
- enum rtx_code code = GET_CODE (operands[1]), compare_code;
- rtx_insn *compare_seq;
- rtx compare_op;
- machine_mode mode = GET_MODE (operands[0]);
- bool sign_bit_compare_p = false;
- rtx op0 = XEXP (operands[1], 0);
- rtx op1 = XEXP (operands[1], 1);
-
- if (GET_MODE (op0) == TImode
- || (GET_MODE (op0) == DImode
- && !TARGET_64BIT))
- return false;
-
- start_sequence ();
- compare_op = ix86_expand_compare (code, op0, op1);
- compare_seq = get_insns ();
- end_sequence ();
-
- compare_code = GET_CODE (compare_op);
-
- if ((op1 == const0_rtx && (code == GE || code == LT))
- || (op1 == constm1_rtx && (code == GT || code == LE)))
- sign_bit_compare_p = true;
-
- /* Don't attempt mode expansion here -- if we had to expand 5 or 6
- HImode insns, we'd be swallowed in word prefix ops. */
-
- if ((mode != HImode || TARGET_FAST_PREFIX)
- && (mode != (TARGET_64BIT ? TImode : DImode))
- && CONST_INT_P (operands[2])
- && CONST_INT_P (operands[3]))
- {
- rtx out = operands[0];
- HOST_WIDE_INT ct = INTVAL (operands[2]);
- HOST_WIDE_INT cf = INTVAL (operands[3]);
- HOST_WIDE_INT diff;
-
- diff = ct - cf;
- /* Sign bit compares are better done using shifts than we do by using
- sbb. */
- if (sign_bit_compare_p
- || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
- {
- /* Detect overlap between destination and compare sources. */
- rtx tmp = out;
-
- if (!sign_bit_compare_p)
- {
- rtx flags;
- bool fpcmp = false;
-
- compare_code = GET_CODE (compare_op);
-
- flags = XEXP (compare_op, 0);
-
- if (GET_MODE (flags) == CCFPmode)
- {
- fpcmp = true;
- compare_code
- = ix86_fp_compare_code_to_integer (compare_code);
- }
-
- /* To simplify rest of code, restrict to the GEU case. */
- if (compare_code == LTU)
- {
- std::swap (ct, cf);
- compare_code = reverse_condition (compare_code);
- code = reverse_condition (code);
- }
- else
- {
- if (fpcmp)
- PUT_CODE (compare_op,
- reverse_condition_maybe_unordered
- (GET_CODE (compare_op)));
- else
- PUT_CODE (compare_op,
- reverse_condition (GET_CODE (compare_op)));
- }
- diff = ct - cf;
-
- if (reg_overlap_mentioned_p (out, op0)
- || reg_overlap_mentioned_p (out, op1))
- tmp = gen_reg_rtx (mode);
-
- if (mode == DImode)
- emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
- else
- emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
- flags, compare_op));
- }
- else
- {
- if (code == GT || code == GE)
- code = reverse_condition (code);
- else
- {
- std::swap (ct, cf);
- diff = ct - cf;
- }
- tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
- }
-
- if (diff == 1)
- {
- /*
- * cmpl op0,op1
- * sbbl dest,dest
- * [addl dest, ct]
- *
- * Size 5 - 8.
- */
- if (ct)
- tmp = expand_simple_binop (mode, PLUS,
- tmp, GEN_INT (ct),
- copy_rtx (tmp), 1, OPTAB_DIRECT);
- }
- else if (cf == -1)
- {
- /*
- * cmpl op0,op1
- * sbbl dest,dest
- * orl $ct, dest
- *
- * Size 8.
- */
- tmp = expand_simple_binop (mode, IOR,
- tmp, GEN_INT (ct),
- copy_rtx (tmp), 1, OPTAB_DIRECT);
- }
- else if (diff == -1 && ct)
- {
- /*
- * cmpl op0,op1
- * sbbl dest,dest
- * notl dest
- * [addl dest, cf]
- *
- * Size 8 - 11.
- */
- tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
- if (cf)
- tmp = expand_simple_binop (mode, PLUS,
- copy_rtx (tmp), GEN_INT (cf),
- copy_rtx (tmp), 1, OPTAB_DIRECT);
- }
- else
- {
- /*
- * cmpl op0,op1
- * sbbl dest,dest
- * [notl dest]
- * andl cf - ct, dest
- * [addl dest, ct]
- *
- * Size 8 - 11.
- */
-
- if (cf == 0)
- {
- cf = ct;
- ct = 0;
- tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
- }
-
- tmp = expand_simple_binop (mode, AND,
- copy_rtx (tmp),
- gen_int_mode (cf - ct, mode),
- copy_rtx (tmp), 1, OPTAB_DIRECT);
- if (ct)
- tmp = expand_simple_binop (mode, PLUS,
- copy_rtx (tmp), GEN_INT (ct),
- copy_rtx (tmp), 1, OPTAB_DIRECT);
- }
-
- if (!rtx_equal_p (tmp, out))
- emit_move_insn (copy_rtx (out), copy_rtx (tmp));
-
- return true;
- }
-
- if (diff < 0)
- {
- machine_mode cmp_mode = GET_MODE (op0);
- enum rtx_code new_code;
-
- if (SCALAR_FLOAT_MODE_P (cmp_mode))
- {
- gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
-
- /* We may be reversing a non-trapping
- comparison to a trapping comparison. */
- if (HONOR_NANS (cmp_mode) && flag_trapping_math
- && code != EQ && code != NE
- && code != ORDERED && code != UNORDERED)
- new_code = UNKNOWN;
- else
- new_code = reverse_condition_maybe_unordered (code);
- }
- else
- new_code = ix86_reverse_condition (code, cmp_mode);
- if (new_code != UNKNOWN)
- {
- std::swap (ct, cf);
- diff = -diff;
- code = new_code;
- }
- }
-
- compare_code = UNKNOWN;
- if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
- && CONST_INT_P (op1))
- {
- if (op1 == const0_rtx
- && (code == LT || code == GE))
- compare_code = code;
- else if (op1 == constm1_rtx)
- {
- if (code == LE)
- compare_code = LT;
- else if (code == GT)
- compare_code = GE;
- }
- }
-
- /* Optimize dest = (op0 < 0) ? -1 : cf. */
- if (compare_code != UNKNOWN
- && GET_MODE (op0) == GET_MODE (out)
- && (cf == -1 || ct == -1))
- {
- /* If lea code below could be used, only optimize
- if it results in a 2 insn sequence. */
-
- if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
- || diff == 3 || diff == 5 || diff == 9)
- || (compare_code == LT && ct == -1)
- || (compare_code == GE && cf == -1))
- {
- /*
- * notl op1 (if necessary)
- * sarl $31, op1
- * orl cf, op1
- */
- if (ct != -1)
- {
- cf = ct;
- ct = -1;
- code = reverse_condition (code);
- }
-
- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
-
- out = expand_simple_binop (mode, IOR,
- out, GEN_INT (cf),
- out, 1, OPTAB_DIRECT);
- if (out != operands[0])
- emit_move_insn (operands[0], out);
-
- return true;
- }
- }
-
-
- if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
- || diff == 3 || diff == 5 || diff == 9)
- && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
- && (mode != DImode
- || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
- {
- /*
- * xorl dest,dest
- * cmpl op1,op2
- * setcc dest
- * lea cf(dest*(ct-cf)),dest
- *
- * Size 14.
- *
- * This also catches the degenerate setcc-only case.
- */
-
- rtx tmp;
- int nops;
-
- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
-
- nops = 0;
- /* On x86_64 the lea instruction operates on Pmode, so we need
- to get arithmetics done in proper mode to match. */
- if (diff == 1)
- tmp = copy_rtx (out);
- else
- {
- rtx out1;
- out1 = copy_rtx (out);
- tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
- nops++;
- if (diff & 1)
- {
- tmp = gen_rtx_PLUS (mode, tmp, out1);
- nops++;
- }
- }
- if (cf != 0)
- {
- tmp = plus_constant (mode, tmp, cf);
- nops++;
- }
- if (!rtx_equal_p (tmp, out))
- {
- if (nops == 1)
- out = force_operand (tmp, copy_rtx (out));
- else
- emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
- }
- if (!rtx_equal_p (out, operands[0]))
- emit_move_insn (operands[0], copy_rtx (out));
-
- return true;
- }
-
- /*
- * General case: Jumpful:
- * xorl dest,dest cmpl op1, op2
- * cmpl op1, op2 movl ct, dest
- * setcc dest jcc 1f
- * decl dest movl cf, dest
- * andl (cf-ct),dest 1:
- * addl ct,dest
- *
- * Size 20. Size 14.
- *
- * This is reasonably steep, but branch mispredict costs are
- * high on modern cpus, so consider failing only if optimizing
- * for space.
- */
-
- if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
- && BRANCH_COST (optimize_insn_for_speed_p (),
- false) >= 2)
- {
- if (cf == 0)
- {
- machine_mode cmp_mode = GET_MODE (op0);
- enum rtx_code new_code;
-
- if (SCALAR_FLOAT_MODE_P (cmp_mode))
- {
- gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
-
- /* We may be reversing a non-trapping
- comparison to a trapping comparison. */
- if (HONOR_NANS (cmp_mode) && flag_trapping_math
- && code != EQ && code != NE
- && code != ORDERED && code != UNORDERED)
- new_code = UNKNOWN;
- else
- new_code = reverse_condition_maybe_unordered (code);
-
- }
- else
- {
- new_code = ix86_reverse_condition (code, cmp_mode);
- if (compare_code != UNKNOWN && new_code != UNKNOWN)
- compare_code = reverse_condition (compare_code);
- }
-
- if (new_code != UNKNOWN)
- {
- cf = ct;
- ct = 0;
- code = new_code;
- }
- }
-
- if (compare_code != UNKNOWN)
- {
- /* notl op1 (if needed)
- sarl $31, op1
- andl (cf-ct), op1
- addl ct, op1
-
- For x < 0 (resp. x <= -1) there will be no notl,
- so if possible swap the constants to get rid of the
- complement.
- True/false will be -1/0 while code below (store flag
- followed by decrement) is 0/-1, so the constants need
- to be exchanged once more. */
-
- if (compare_code == GE || !cf)
- {
- code = reverse_condition (code);
- compare_code = LT;
- }
- else
- std::swap (ct, cf);
-
- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
- }
- else
- {
- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
-
- out = expand_simple_binop (mode, PLUS, copy_rtx (out),
- constm1_rtx,
- copy_rtx (out), 1, OPTAB_DIRECT);
- }
-
- out = expand_simple_binop (mode, AND, copy_rtx (out),
- gen_int_mode (cf - ct, mode),
- copy_rtx (out), 1, OPTAB_DIRECT);
- if (ct)
- out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
- copy_rtx (out), 1, OPTAB_DIRECT);
- if (!rtx_equal_p (out, operands[0]))
- emit_move_insn (operands[0], copy_rtx (out));
-
- return true;
- }
- }
-
- if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
- {
- /* Try a few things more with specific constants and a variable. */
-
- optab op;
- rtx var, orig_out, out, tmp;
-
- if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
- return false;
-
- /* If one of the two operands is an interesting constant, load a
- constant with the above and mask it in with a logical operation. */
-
- if (CONST_INT_P (operands[2]))
- {
- var = operands[3];
- if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
- operands[3] = constm1_rtx, op = and_optab;
- else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
- operands[3] = const0_rtx, op = ior_optab;
- else
- return false;
- }
- else if (CONST_INT_P (operands[3]))
- {
- var = operands[2];
- if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
- operands[2] = constm1_rtx, op = and_optab;
- else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
- operands[2] = const0_rtx, op = ior_optab;
- else
- return false;
- }
- else
- return false;
-
- orig_out = operands[0];
- tmp = gen_reg_rtx (mode);
- operands[0] = tmp;
-
- /* Recurse to get the constant loaded. */
- if (!ix86_expand_int_movcc (operands))
- return false;
-
- /* Mask in the interesting variable. */
- out = expand_binop (mode, op, var, tmp, orig_out, 0,
- OPTAB_WIDEN);
- if (!rtx_equal_p (out, orig_out))
- emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
-
- return true;
- }
-
- /*
- * For comparison with above,
- *
- * movl cf,dest
- * movl ct,tmp
- * cmpl op1,op2
- * cmovcc tmp,dest
- *
- * Size 15.
- */
-
- if (! nonimmediate_operand (operands[2], mode))
- operands[2] = force_reg (mode, operands[2]);
- if (! nonimmediate_operand (operands[3], mode))
- operands[3] = force_reg (mode, operands[3]);
-
- if (! register_operand (operands[2], VOIDmode)
- && (mode == QImode
- || ! register_operand (operands[3], VOIDmode)))
- operands[2] = force_reg (mode, operands[2]);
-
- if (mode == QImode
- && ! register_operand (operands[3], VOIDmode))
- operands[3] = force_reg (mode, operands[3]);
-
- emit_insn (compare_seq);
- emit_insn (gen_rtx_SET (operands[0],
- gen_rtx_IF_THEN_ELSE (mode,
- compare_op, operands[2],
- operands[3])));
- return true;
-}
-
-/* Detect conditional moves that exactly match min/max operational
- semantics. Note that this is IEEE safe, as long as we don't
- interchange the operands.
-
- Returns FALSE if this conditional move doesn't match a MIN/MAX,
- and TRUE if the operation is successful and instructions are emitted. */
-
-static bool
-ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
- rtx cmp_op1, rtx if_true, rtx if_false)
-{
- machine_mode mode;
- bool is_min;
- rtx tmp;
-
- if (code == LT)
- ;
- else if (code == UNGE)
- std::swap (if_true, if_false);
- else
- return false;
-
- if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
- is_min = true;
- else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
- is_min = false;
- else
- return false;
-
- mode = GET_MODE (dest);
-
- /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
- but MODE may be a vector mode and thus not appropriate. */
- if (!flag_finite_math_only || flag_signed_zeros)
- {
- int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
- rtvec v;
-
- if_true = force_reg (mode, if_true);
- v = gen_rtvec (2, if_true, if_false);
- tmp = gen_rtx_UNSPEC (mode, v, u);
- }
- else
- {
- code = is_min ? SMIN : SMAX;
- if (MEM_P (if_true) && MEM_P (if_false))
- if_true = force_reg (mode, if_true);
- tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
- }
-
- emit_insn (gen_rtx_SET (dest, tmp));
- return true;
-}
-
-/* Return true if MODE is valid for vector compare to mask register,
- Same result for conditionl vector move with mask register. */
-static bool
-ix86_valid_mask_cmp_mode (machine_mode mode)
-{
- /* XOP has its own vector conditional movement. */
- if (TARGET_XOP && !TARGET_AVX512F)
- return false;
-
- /* AVX512F is needed for mask operation. */
- if (!(TARGET_AVX512F && VECTOR_MODE_P (mode)))
- return false;
-
- /* AVX512BW is needed for vector QI/HImode,
- AVX512VL is needed for 128/256-bit vector. */
- machine_mode inner_mode = GET_MODE_INNER (mode);
- int vector_size = GET_MODE_SIZE (mode);
- if ((inner_mode == QImode || inner_mode == HImode) && !TARGET_AVX512BW)
- return false;
-
- return vector_size == 64 || TARGET_AVX512VL;
-}
-
-/* Expand an SSE comparison. Return the register with the result. */
-
-static rtx
-ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
- rtx op_true, rtx op_false)
-{
- machine_mode mode = GET_MODE (dest);
- machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
-
- /* In general case result of comparison can differ from operands' type. */
- machine_mode cmp_mode;
-
- /* In AVX512F the result of comparison is an integer mask. */
- bool maskcmp = false;
- rtx x;
-
- if (ix86_valid_mask_cmp_mode (cmp_ops_mode))
- {
- unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode);
- maskcmp = true;
- cmp_mode = nbits > 8 ? int_mode_for_size (nbits, 0).require () : E_QImode;
- }
- else
- cmp_mode = cmp_ops_mode;
-
- cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
-
- int (*op1_predicate)(rtx, machine_mode)
- = VECTOR_MODE_P (cmp_ops_mode) ? vector_operand : nonimmediate_operand;
-
- if (!op1_predicate (cmp_op1, cmp_ops_mode))
- cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
-
- if (optimize
- || (maskcmp && cmp_mode != mode)
- || (op_true && reg_overlap_mentioned_p (dest, op_true))
- || (op_false && reg_overlap_mentioned_p (dest, op_false)))
- dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
-
- x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
-
- if (cmp_mode != mode && !maskcmp)
- {
- x = force_reg (cmp_ops_mode, x);
- convert_move (dest, x, false);
- }
- else
- emit_insn (gen_rtx_SET (dest, x));
-
- return dest;
-}
-
-/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
- operations. This is used for both scalar and vector conditional moves. */
-
-void
-ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
-{
- machine_mode mode = GET_MODE (dest);
- machine_mode cmpmode = GET_MODE (cmp);
-
- /* In AVX512F the result of comparison is an integer mask. */
- bool maskcmp = mode != cmpmode && ix86_valid_mask_cmp_mode (mode);
-
- rtx t2, t3, x;
-
- /* If we have an integer mask and FP value then we need
- to cast mask to FP mode. */
- if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
- {
- cmp = force_reg (cmpmode, cmp);
- cmp = gen_rtx_SUBREG (mode, cmp, 0);
- }
-
- if (maskcmp)
- {
- /* Using vector move with mask register. */
- cmp = force_reg (cmpmode, cmp);
- /* Optimize for mask zero. */
- op_true = (op_true != CONST0_RTX (mode)
- ? force_reg (mode, op_true) : op_true);
- op_false = (op_false != CONST0_RTX (mode)
- ? force_reg (mode, op_false) : op_false);
- if (op_true == CONST0_RTX (mode))
- {
- rtx (*gen_not) (rtx, rtx);
- switch (cmpmode)
- {
- case E_QImode: gen_not = gen_knotqi; break;
- case E_HImode: gen_not = gen_knothi; break;
- case E_SImode: gen_not = gen_knotsi; break;
- case E_DImode: gen_not = gen_knotdi; break;
- default: gcc_unreachable ();
- }
- rtx n = gen_reg_rtx (cmpmode);
- emit_insn (gen_not (n, cmp));
- cmp = n;
- /* Reverse op_true op_false. */
- std::swap (op_true, op_false);
- }
-
- rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
- emit_insn (gen_rtx_SET (dest, vec_merge));
- return;
- }
- else if (vector_all_ones_operand (op_true, mode)
- && op_false == CONST0_RTX (mode))
- {
- emit_insn (gen_rtx_SET (dest, cmp));
- return;
- }
- else if (op_false == CONST0_RTX (mode))
- {
- op_true = force_reg (mode, op_true);
- x = gen_rtx_AND (mode, cmp, op_true);
- emit_insn (gen_rtx_SET (dest, x));
- return;
- }
- else if (op_true == CONST0_RTX (mode))
- {
- op_false = force_reg (mode, op_false);
- x = gen_rtx_NOT (mode, cmp);
- x = gen_rtx_AND (mode, x, op_false);
- emit_insn (gen_rtx_SET (dest, x));
- return;
- }
- else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
- {
- op_false = force_reg (mode, op_false);
- x = gen_rtx_IOR (mode, cmp, op_false);
- emit_insn (gen_rtx_SET (dest, x));
- return;
- }
- else if (TARGET_XOP)
- {
- op_true = force_reg (mode, op_true);
-
- if (!nonimmediate_operand (op_false, mode))
- op_false = force_reg (mode, op_false);
-
- emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
- op_true,
- op_false)));
- return;
- }
-
- rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
- rtx d = dest;
-
- if (!vector_operand (op_true, mode))
- op_true = force_reg (mode, op_true);
-
- op_false = force_reg (mode, op_false);
-
- switch (mode)
- {
- case E_V4SFmode:
- if (TARGET_SSE4_1)
- gen = gen_sse4_1_blendvps;
- break;
- case E_V2DFmode:
- if (TARGET_SSE4_1)
- gen = gen_sse4_1_blendvpd;
- break;
- case E_SFmode:
- if (TARGET_SSE4_1)
- {
- gen = gen_sse4_1_blendvss;
- op_true = force_reg (mode, op_true);
- }
- break;
- case E_DFmode:
- if (TARGET_SSE4_1)
- {
- gen = gen_sse4_1_blendvsd;
- op_true = force_reg (mode, op_true);
- }
- break;
- case E_V16QImode:
- case E_V8HImode:
- case E_V4SImode:
- case E_V2DImode:
- if (TARGET_SSE4_1)
- {
- gen = gen_sse4_1_pblendvb;
- if (mode != V16QImode)
- d = gen_reg_rtx (V16QImode);
- op_false = gen_lowpart (V16QImode, op_false);
- op_true = gen_lowpart (V16QImode, op_true);
- cmp = gen_lowpart (V16QImode, cmp);
- }
- break;
- case E_V8SFmode:
- if (TARGET_AVX)
- gen = gen_avx_blendvps256;
- break;
- case E_V4DFmode:
- if (TARGET_AVX)
- gen = gen_avx_blendvpd256;
- break;
- case E_V32QImode:
- case E_V16HImode:
- case E_V8SImode:
- case E_V4DImode:
- if (TARGET_AVX2)
- {
- gen = gen_avx2_pblendvb;
- if (mode != V32QImode)
- d = gen_reg_rtx (V32QImode);
- op_false = gen_lowpart (V32QImode, op_false);
- op_true = gen_lowpart (V32QImode, op_true);
- cmp = gen_lowpart (V32QImode, cmp);
- }
- break;
-
- case E_V64QImode:
- gen = gen_avx512bw_blendmv64qi;
- break;
- case E_V32HImode:
- gen = gen_avx512bw_blendmv32hi;
- break;
- case E_V16SImode:
- gen = gen_avx512f_blendmv16si;
- break;
- case E_V8DImode:
- gen = gen_avx512f_blendmv8di;
- break;
- case E_V8DFmode:
- gen = gen_avx512f_blendmv8df;
- break;
- case E_V16SFmode:
- gen = gen_avx512f_blendmv16sf;
- break;
-
- default:
- break;
- }
-
- if (gen != NULL)
- {
- emit_insn (gen (d, op_false, op_true, cmp));
- if (d != dest)
- emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
- }
- else
- {
- op_true = force_reg (mode, op_true);
-
- t2 = gen_reg_rtx (mode);
- if (optimize)
- t3 = gen_reg_rtx (mode);
- else
- t3 = dest;
-
- x = gen_rtx_AND (mode, op_true, cmp);
- emit_insn (gen_rtx_SET (t2, x));
-
- x = gen_rtx_NOT (mode, cmp);
- x = gen_rtx_AND (mode, x, op_false);
- emit_insn (gen_rtx_SET (t3, x));
-
- x = gen_rtx_IOR (mode, t3, t2);
- emit_insn (gen_rtx_SET (dest, x));
- }
-}
-
-/* Swap, force into registers, or otherwise massage the two operands
- to an sse comparison with a mask result. Thus we differ a bit from
- ix86_prepare_fp_compare_args which expects to produce a flags result.
-
- The DEST operand exists to help determine whether to commute commutative
- operators. The POP0/POP1 operands are updated in place. The new
- comparison code is returned, or UNKNOWN if not implementable. */
-
-static enum rtx_code
-ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
- rtx *pop0, rtx *pop1)
-{
- switch (code)
- {
- case LTGT:
- case UNEQ:
- /* AVX supports all the needed comparisons. */
- if (TARGET_AVX)
- break;
- /* We have no LTGT as an operator. We could implement it with
- NE & ORDERED, but this requires an extra temporary. It's
- not clear that it's worth it. */
- return UNKNOWN;
-
- case LT:
- case LE:
- case UNGT:
- case UNGE:
- /* These are supported directly. */
- break;
-
- case EQ:
- case NE:
- case UNORDERED:
- case ORDERED:
- /* AVX has 3 operand comparisons, no need to swap anything. */
- if (TARGET_AVX)
- break;
- /* For commutative operators, try to canonicalize the destination
- operand to be first in the comparison - this helps reload to
- avoid extra moves. */
- if (!dest || !rtx_equal_p (dest, *pop1))
- break;
- /* FALLTHRU */
-
- case GE:
- case GT:
- case UNLE:
- case UNLT:
- /* These are not supported directly before AVX, and furthermore
- ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
- comparison operands to transform into something that is
- supported. */
- std::swap (*pop0, *pop1);
- code = swap_condition (code);
- break;
-
- default:
- gcc_unreachable ();
- }
-
- return code;
-}
-
-/* Expand a floating-point conditional move. Return true if successful. */
-
-bool
-ix86_expand_fp_movcc (rtx operands[])
-{
- machine_mode mode = GET_MODE (operands[0]);
- enum rtx_code code = GET_CODE (operands[1]);
- rtx tmp, compare_op;
- rtx op0 = XEXP (operands[1], 0);
- rtx op1 = XEXP (operands[1], 1);
-
- if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
- {
- machine_mode cmode;
-
- /* Since we've no cmove for sse registers, don't force bad register
- allocation just to gain access to it. Deny movcc when the
- comparison mode doesn't match the move mode. */
- cmode = GET_MODE (op0);
- if (cmode == VOIDmode)
- cmode = GET_MODE (op1);
- if (cmode != mode)
- return false;
-
- code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
- if (code == UNKNOWN)
- return false;
-
- if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
- operands[2], operands[3]))
- return true;
-
- tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
- operands[2], operands[3]);
- ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
- return true;
- }
-
- if (GET_MODE (op0) == TImode
- || (GET_MODE (op0) == DImode
- && !TARGET_64BIT))
- return false;
-
- /* The floating point conditional move instructions don't directly
- support conditions resulting from a signed integer comparison. */
-
- compare_op = ix86_expand_compare (code, op0, op1);
- if (!fcmov_comparison_operator (compare_op, VOIDmode))
- {
- tmp = gen_reg_rtx (QImode);
- ix86_expand_setcc (tmp, code, op0, op1);
-
- compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
- }
-
- emit_insn (gen_rtx_SET (operands[0],
- gen_rtx_IF_THEN_ELSE (mode, compare_op,
- operands[2], operands[3])));
-
- return true;
-}
-
-/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
-
-static int
-ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
-{
- switch (code)
- {
- case EQ:
- return 0;
- case LT:
- case LTU:
- return 1;
- case LE:
- case LEU:
- return 2;
- case NE:
- return 4;
- case GE:
- case GEU:
- return 5;
- case GT:
- case GTU:
- return 6;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
-
-static int
-ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
-{
- switch (code)
- {
- case EQ:
- return 0x00;
- case NE:
- return 0x04;
- case GT:
- return 0x0e;
- case LE:
- return 0x02;
- case GE:
- return 0x0d;
- case LT:
- return 0x01;
- case UNLE:
- return 0x0a;
- case UNLT:
- return 0x09;
- case UNGE:
- return 0x05;
- case UNGT:
- return 0x06;
- case UNEQ:
- return 0x18;
- case LTGT:
- return 0x0c;
- case ORDERED:
- return 0x07;
- case UNORDERED:
- return 0x03;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Return immediate value to be used in UNSPEC_PCMP
- for comparison CODE in MODE. */
-
-static int
-ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
-{
- if (FLOAT_MODE_P (mode))
- return ix86_fp_cmp_code_to_pcmp_immediate (code);
- return ix86_int_cmp_code_to_pcmp_immediate (code);
-}
-
-/* Expand AVX-512 vector comparison. */
-
-bool
-ix86_expand_mask_vec_cmp (rtx operands[])
-{
- machine_mode mask_mode = GET_MODE (operands[0]);
- machine_mode cmp_mode = GET_MODE (operands[2]);
- enum rtx_code code = GET_CODE (operands[1]);
- rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
- int unspec_code;
- rtx unspec;
-
- switch (code)
- {
- case LEU:
- case GTU:
- case GEU:
- case LTU:
- unspec_code = UNSPEC_UNSIGNED_PCMP;
- break;
-
- default:
- unspec_code = UNSPEC_PCMP;
- }
-
- unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
- operands[3], imm),
- unspec_code);
- emit_insn (gen_rtx_SET (operands[0], unspec));
-
- return true;
-}
-
-/* Expand fp vector comparison. */
-
-bool
-ix86_expand_fp_vec_cmp (rtx operands[])
-{
- enum rtx_code code = GET_CODE (operands[1]);
- rtx cmp;
-
- code = ix86_prepare_sse_fp_compare_args (operands[0], code,
- &operands[2], &operands[3]);
- if (code == UNKNOWN)
- {
- rtx temp;
- switch (GET_CODE (operands[1]))
- {
- case LTGT:
- temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
- operands[3], NULL, NULL);
- cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
- operands[3], NULL, NULL);
- code = AND;
- break;
- case UNEQ:
- temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
- operands[3], NULL, NULL);
- cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
- operands[3], NULL, NULL);
- code = IOR;
- break;
- default:
- gcc_unreachable ();
- }
- cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
- OPTAB_DIRECT);
- }
- else
- cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
- operands[1], operands[2]);
-
- if (operands[0] != cmp)
- emit_move_insn (operands[0], cmp);
-
- return true;
-}
-
-static rtx
-ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
- rtx op_true, rtx op_false, bool *negate)
-{
- machine_mode data_mode = GET_MODE (dest);
- machine_mode mode = GET_MODE (cop0);
- rtx x;
-
- *negate = false;
-
- /* XOP supports all of the comparisons on all 128-bit vector int types. */
- if (TARGET_XOP
- && (mode == V16QImode || mode == V8HImode
- || mode == V4SImode || mode == V2DImode))
- ;
- /* AVX512F supports all of the comparsions
- on all 128/256/512-bit vector int types. */
- else if (ix86_valid_mask_cmp_mode (mode))
- ;
- else
- {
- /* Canonicalize the comparison to EQ, GT, GTU. */
- switch (code)
- {
- case EQ:
- case GT:
- case GTU:
- break;
-
- case NE:
- case LE:
- case LEU:
- code = reverse_condition (code);
- *negate = true;
- break;
-
- case GE:
- case GEU:
- code = reverse_condition (code);
- *negate = true;
- /* FALLTHRU */
-
- case LT:
- case LTU:
- std::swap (cop0, cop1);
- code = swap_condition (code);
- break;
-
- default:
- gcc_unreachable ();
- }
-
- /* Only SSE4.1/SSE4.2 supports V2DImode. */
- if (mode == V2DImode)
- {
- switch (code)
- {
- case EQ:
- /* SSE4.1 supports EQ. */
- if (!TARGET_SSE4_1)
- return NULL;
- break;
-
- case GT:
- case GTU:
- /* SSE4.2 supports GT/GTU. */
- if (!TARGET_SSE4_2)
- return NULL;
- break;
-
- default:
- gcc_unreachable ();
- }
- }
-
- rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode);
- rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode);
- if (*negate)
- std::swap (optrue, opfalse);
-
- /* Transform x > y ? 0 : -1 (i.e. x <= y ? -1 : 0 or x <= y) when
- not using integer masks into min (x, y) == x ? -1 : 0 (i.e.
- min (x, y) == x). While we add one instruction (the minimum),
- we remove the need for two instructions in the negation, as the
- result is done this way.
- When using masks, do it for SI/DImode element types, as it is shorter
- than the two subtractions. */
- if ((code != EQ
- && GET_MODE_SIZE (mode) != 64
- && vector_all_ones_operand (opfalse, data_mode)
- && optrue == CONST0_RTX (data_mode))
- || (code == GTU
- && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4
- /* Don't do it if not using integer masks and we'd end up with
- the right values in the registers though. */
- && (GET_MODE_SIZE (mode) == 64
- || !vector_all_ones_operand (optrue, data_mode)
- || opfalse != CONST0_RTX (data_mode))))
- {
- rtx (*gen) (rtx, rtx, rtx) = NULL;
-
- switch (mode)
- {
- case E_V16SImode:
- gen = (code == GTU) ? gen_uminv16si3 : gen_sminv16si3;
- break;
- case E_V8DImode:
- gen = (code == GTU) ? gen_uminv8di3 : gen_sminv8di3;
- cop0 = force_reg (mode, cop0);
- cop1 = force_reg (mode, cop1);
- break;
- case E_V32QImode:
- if (TARGET_AVX2)
- gen = (code == GTU) ? gen_uminv32qi3 : gen_sminv32qi3;
- break;
- case E_V16HImode:
- if (TARGET_AVX2)
- gen = (code == GTU) ? gen_uminv16hi3 : gen_sminv16hi3;
- break;
- case E_V8SImode:
- if (TARGET_AVX2)
- gen = (code == GTU) ? gen_uminv8si3 : gen_sminv8si3;
- break;
- case E_V4DImode:
- if (TARGET_AVX512VL)
- {
- gen = (code == GTU) ? gen_uminv4di3 : gen_sminv4di3;
- cop0 = force_reg (mode, cop0);
- cop1 = force_reg (mode, cop1);
- }
- break;
- case E_V16QImode:
- if (code == GTU && TARGET_SSE2)
- gen = gen_uminv16qi3;
- else if (code == GT && TARGET_SSE4_1)
- gen = gen_sminv16qi3;
- break;
- case E_V8HImode:
- if (code == GTU && TARGET_SSE4_1)
- gen = gen_uminv8hi3;
- else if (code == GT && TARGET_SSE2)
- gen = gen_sminv8hi3;
- break;
- case E_V4SImode:
- if (TARGET_SSE4_1)
- gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3;
- break;
- case E_V2DImode:
- if (TARGET_AVX512VL)
- {
- gen = (code == GTU) ? gen_uminv2di3 : gen_sminv2di3;
- cop0 = force_reg (mode, cop0);
- cop1 = force_reg (mode, cop1);
- }
- break;
- default:
- break;
- }
-
- if (gen)
- {
- rtx tem = gen_reg_rtx (mode);
- if (!vector_operand (cop0, mode))
- cop0 = force_reg (mode, cop0);
- if (!vector_operand (cop1, mode))
- cop1 = force_reg (mode, cop1);
- *negate = !*negate;
- emit_insn (gen (tem, cop0, cop1));
- cop1 = tem;
- code = EQ;
- }
- }
-
- /* Unsigned parallel compare is not supported by the hardware.
- Play some tricks to turn this into a signed comparison
- against 0. */
- if (code == GTU)
- {
- cop0 = force_reg (mode, cop0);
-
- switch (mode)
- {
- case E_V16SImode:
- case E_V8DImode:
- case E_V8SImode:
- case E_V4DImode:
- case E_V4SImode:
- case E_V2DImode:
- {
- rtx t1, t2, mask;
-
- /* Subtract (-(INT MAX) - 1) from both operands to make
- them signed. */
- mask = ix86_build_signbit_mask (mode, true, false);
- t1 = gen_reg_rtx (mode);
- emit_insn (gen_sub3_insn (t1, cop0, mask));
-
- t2 = gen_reg_rtx (mode);
- emit_insn (gen_sub3_insn (t2, cop1, mask));
-
- cop0 = t1;
- cop1 = t2;
- code = GT;
- }
- break;
-
- case E_V64QImode:
- case E_V32HImode:
- case E_V32QImode:
- case E_V16HImode:
- case E_V16QImode:
- case E_V8HImode:
- /* Perform a parallel unsigned saturating subtraction. */
- x = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET
- (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
- cop0 = x;
- cop1 = CONST0_RTX (mode);
- code = EQ;
- *negate = !*negate;
- break;
-
- default:
- gcc_unreachable ();
- }
- }
- }
-
- if (*negate)
- std::swap (op_true, op_false);
-
- /* Allow the comparison to be done in one mode, but the movcc to
- happen in another mode. */
- if (data_mode == mode)
- {
- x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
- op_true, op_false);
- }
- else
- {
- gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
- x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
- op_true, op_false);
- if (GET_MODE (x) == mode)
- x = gen_lowpart (data_mode, x);
- }
-
- return x;
-}
-
-/* Expand integer vector comparison. */
-
-bool
-ix86_expand_int_vec_cmp (rtx operands[])
-{
- rtx_code code = GET_CODE (operands[1]);
- bool negate = false;
- rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
- operands[3], NULL, NULL, &negate);
-
- if (!cmp)
- return false;
-
- if (negate)
- cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
- CONST0_RTX (GET_MODE (cmp)),
- NULL, NULL, &negate);
-
- gcc_assert (!negate);
-
- if (operands[0] != cmp)
- emit_move_insn (operands[0], cmp);
-
- return true;
-}
-
-/* Expand a floating-point vector conditional move; a vcond operation
- rather than a movcc operation. */
-
-bool
-ix86_expand_fp_vcond (rtx operands[])
-{
- enum rtx_code code = GET_CODE (operands[3]);
- rtx cmp;
-
- code = ix86_prepare_sse_fp_compare_args (operands[0], code,
- &operands[4], &operands[5]);
- if (code == UNKNOWN)
- {
- rtx temp;
- switch (GET_CODE (operands[3]))
- {
- case LTGT:
- temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
- operands[5], operands[0], operands[0]);
- cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
- operands[5], operands[1], operands[2]);
- code = AND;
- break;
- case UNEQ:
- temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
- operands[5], operands[0], operands[0]);
- cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
- operands[5], operands[1], operands[2]);
- code = IOR;
- break;
- default:
- gcc_unreachable ();
- }
- cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
- OPTAB_DIRECT);
- ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
- return true;
- }
-
- if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
- operands[5], operands[1], operands[2]))
- return true;
-
- cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
- operands[1], operands[2]);
- ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
- return true;
-}
-
-/* Expand a signed/unsigned integral vector conditional move. */
-
-bool
-ix86_expand_int_vcond (rtx operands[])
-{
- machine_mode data_mode = GET_MODE (operands[0]);
- machine_mode mode = GET_MODE (operands[4]);
- enum rtx_code code = GET_CODE (operands[3]);
- bool negate = false;
- rtx x, cop0, cop1;
-
- cop0 = operands[4];
- cop1 = operands[5];
-
- /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
- and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
- if ((code == LT || code == GE)
- && data_mode == mode
- && cop1 == CONST0_RTX (mode)
- && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
- && GET_MODE_UNIT_SIZE (data_mode) > 1
- && GET_MODE_UNIT_SIZE (data_mode) <= 8
- && (GET_MODE_SIZE (data_mode) == 16
- || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
- {
- rtx negop = operands[2 - (code == LT)];
- int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
- if (negop == CONST1_RTX (data_mode))
- {
- rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
- operands[0], 1, OPTAB_DIRECT);
- if (res != operands[0])
- emit_move_insn (operands[0], res);
- return true;
- }
- else if (GET_MODE_INNER (data_mode) != DImode
- && vector_all_ones_operand (negop, data_mode))
- {
- rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
- operands[0], 0, OPTAB_DIRECT);
- if (res != operands[0])
- emit_move_insn (operands[0], res);
- return true;
- }
- }
-
- if (!nonimmediate_operand (cop1, mode))
- cop1 = force_reg (mode, cop1);
- if (!general_operand (operands[1], data_mode))
- operands[1] = force_reg (data_mode, operands[1]);
- if (!general_operand (operands[2], data_mode))
- operands[2] = force_reg (data_mode, operands[2]);
-
- x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
- operands[1], operands[2], &negate);
-
- if (!x)
- return false;
-
- ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
- operands[2-negate]);
- return true;
-}
-
-static bool
-ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1,
- struct expand_vec_perm_d *d)
-{
- /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
- expander, so args are either in d, or in op0, op1 etc. */
- machine_mode mode = GET_MODE (d ? d->op0 : op0);
- machine_mode maskmode = mode;
- rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
-
- switch (mode)
- {
- case E_V8HImode:
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_vpermt2varv8hi3;
- break;
- case E_V16HImode:
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen = gen_avx512vl_vpermt2varv16hi3;
- break;
- case E_V64QImode:
- if (TARGET_AVX512VBMI)
- gen = gen_avx512bw_vpermt2varv64qi3;
- break;
- case E_V32HImode:
- if (TARGET_AVX512BW)
- gen = gen_avx512bw_vpermt2varv32hi3;
- break;
- case E_V4SImode:
- if (TARGET_AVX512VL)
- gen = gen_avx512vl_vpermt2varv4si3;
- break;
- case E_V8SImode:
- if (TARGET_AVX512VL)
- gen = gen_avx512vl_vpermt2varv8si3;
- break;
- case E_V16SImode:
- if (TARGET_AVX512F)
- gen = gen_avx512f_vpermt2varv16si3;
- break;
- case E_V4SFmode:
- if (TARGET_AVX512VL)
- {
- gen = gen_avx512vl_vpermt2varv4sf3;
- maskmode = V4SImode;
- }
- break;
- case E_V8SFmode:
- if (TARGET_AVX512VL)
- {
- gen = gen_avx512vl_vpermt2varv8sf3;
- maskmode = V8SImode;
- }
- break;
- case E_V16SFmode:
- if (TARGET_AVX512F)
- {
- gen = gen_avx512f_vpermt2varv16sf3;
- maskmode = V16SImode;
- }
- break;
- case E_V2DImode:
- if (TARGET_AVX512VL)
- gen = gen_avx512vl_vpermt2varv2di3;
- break;
- case E_V4DImode:
- if (TARGET_AVX512VL)
- gen = gen_avx512vl_vpermt2varv4di3;
- break;
- case E_V8DImode:
- if (TARGET_AVX512F)
- gen = gen_avx512f_vpermt2varv8di3;
- break;
- case E_V2DFmode:
- if (TARGET_AVX512VL)
- {
- gen = gen_avx512vl_vpermt2varv2df3;
- maskmode = V2DImode;
- }
- break;
- case E_V4DFmode:
- if (TARGET_AVX512VL)
- {
- gen = gen_avx512vl_vpermt2varv4df3;
- maskmode = V4DImode;
- }
- break;
- case E_V8DFmode:
- if (TARGET_AVX512F)
- {
- gen = gen_avx512f_vpermt2varv8df3;
- maskmode = V8DImode;
- }
- break;
- default:
- break;
- }
-
- if (gen == NULL)
- return false;
-
- /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const
- expander, so args are either in d, or in op0, op1 etc. */
- if (d)
- {
- rtx vec[64];
- target = d->target;
- op0 = d->op0;
- op1 = d->op1;
- for (int i = 0; i < d->nelt; ++i)
- vec[i] = GEN_INT (d->perm[i]);
- mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
- }
-
- emit_insn (gen (target, force_reg (maskmode, mask), op0, op1));
- return true;
-}
-
-/* Expand a variable vector permutation. */
-
-void
-ix86_expand_vec_perm (rtx operands[])
-{
- rtx target = operands[0];
- rtx op0 = operands[1];
- rtx op1 = operands[2];
- rtx mask = operands[3];
- rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
- machine_mode mode = GET_MODE (op0);
- machine_mode maskmode = GET_MODE (mask);
- int w, e, i;
- bool one_operand_shuffle = rtx_equal_p (op0, op1);
-
- /* Number of elements in the vector. */
- w = GET_MODE_NUNITS (mode);
- e = GET_MODE_UNIT_SIZE (mode);
- gcc_assert (w <= 64);
-
- if (TARGET_AVX512F && one_operand_shuffle)
- {
- rtx (*gen) (rtx, rtx, rtx) = NULL;
- switch (mode)
- {
- case E_V16SImode:
- gen =gen_avx512f_permvarv16si;
- break;
- case E_V16SFmode:
- gen = gen_avx512f_permvarv16sf;
- break;
- case E_V8DImode:
- gen = gen_avx512f_permvarv8di;
- break;
- case E_V8DFmode:
- gen = gen_avx512f_permvarv8df;
- break;
- default:
- break;
- }
- if (gen != NULL)
- {
- emit_insn (gen (target, op0, mask));
- return;
- }
- }
-
- if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL))
- return;
-
- if (TARGET_AVX2)
- {
- if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
- {
- /* Unfortunately, the VPERMQ and VPERMPD instructions only support
- an constant shuffle operand. With a tiny bit of effort we can
- use VPERMD instead. A re-interpretation stall for V4DFmode is
- unfortunate but there's no avoiding it.
- Similarly for V16HImode we don't have instructions for variable
- shuffling, while for V32QImode we can use after preparing suitable
- masks vpshufb; vpshufb; vpermq; vpor. */
-
- if (mode == V16HImode)
- {
- maskmode = mode = V32QImode;
- w = 32;
- e = 1;
- }
- else
- {
- maskmode = mode = V8SImode;
- w = 8;
- e = 4;
- }
- t1 = gen_reg_rtx (maskmode);
-
- /* Replicate the low bits of the V4DImode mask into V8SImode:
- mask = { A B C D }
- t1 = { A A B B C C D D }. */
- for (i = 0; i < w / 2; ++i)
- vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
- vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
- vt = force_reg (maskmode, vt);
- mask = gen_lowpart (maskmode, mask);
- if (maskmode == V8SImode)
- emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
- else
- emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
-
- /* Multiply the shuffle indicies by two. */
- t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
- OPTAB_DIRECT);
-
- /* Add one to the odd shuffle indicies:
- t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
- for (i = 0; i < w / 2; ++i)
- {
- vec[i * 2] = const0_rtx;
- vec[i * 2 + 1] = const1_rtx;
- }
- vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
- vt = validize_mem (force_const_mem (maskmode, vt));
- t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
- OPTAB_DIRECT);
-
- /* Continue as if V8SImode (resp. V32QImode) was used initially. */
- operands[3] = mask = t1;
- target = gen_reg_rtx (mode);
- op0 = gen_lowpart (mode, op0);
- op1 = gen_lowpart (mode, op1);
- }
-
- switch (mode)
- {
- case E_V8SImode:
- /* The VPERMD and VPERMPS instructions already properly ignore
- the high bits of the shuffle elements. No need for us to
- perform an AND ourselves. */
- if (one_operand_shuffle)
- {
- emit_insn (gen_avx2_permvarv8si (target, op0, mask));
- if (target != operands[0])
- emit_move_insn (operands[0],
- gen_lowpart (GET_MODE (operands[0]), target));
- }
- else
- {
- t1 = gen_reg_rtx (V8SImode);
- t2 = gen_reg_rtx (V8SImode);
- emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
- emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
- goto merge_two;
- }
- return;
-
- case E_V8SFmode:
- mask = gen_lowpart (V8SImode, mask);
- if (one_operand_shuffle)
- emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
- else
- {
- t1 = gen_reg_rtx (V8SFmode);
- t2 = gen_reg_rtx (V8SFmode);
- emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
- emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
- goto merge_two;
- }
- return;
-
- case E_V4SImode:
- /* By combining the two 128-bit input vectors into one 256-bit
- input vector, we can use VPERMD and VPERMPS for the full
- two-operand shuffle. */
- t1 = gen_reg_rtx (V8SImode);
- t2 = gen_reg_rtx (V8SImode);
- emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
- emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
- emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
- emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
- return;
-
- case E_V4SFmode:
- t1 = gen_reg_rtx (V8SFmode);
- t2 = gen_reg_rtx (V8SImode);
- mask = gen_lowpart (V4SImode, mask);
- emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
- emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
- emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
- emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
- return;
-
- case E_V32QImode:
- t1 = gen_reg_rtx (V32QImode);
- t2 = gen_reg_rtx (V32QImode);
- t3 = gen_reg_rtx (V32QImode);
- vt2 = GEN_INT (-128);
- vt = gen_const_vec_duplicate (V32QImode, vt2);
- vt = force_reg (V32QImode, vt);
- for (i = 0; i < 32; i++)
- vec[i] = i < 16 ? vt2 : const0_rtx;
- vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
- vt2 = force_reg (V32QImode, vt2);
- /* From mask create two adjusted masks, which contain the same
- bits as mask in the low 7 bits of each vector element.
- The first mask will have the most significant bit clear
- if it requests element from the same 128-bit lane
- and MSB set if it requests element from the other 128-bit lane.
- The second mask will have the opposite values of the MSB,
- and additionally will have its 128-bit lanes swapped.
- E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
- t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
- t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
- stands for other 12 bytes. */
- /* The bit whether element is from the same lane or the other
- lane is bit 4, so shift it up by 3 to the MSB position. */
- t5 = gen_reg_rtx (V4DImode);
- emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
- GEN_INT (3)));
- /* Clear MSB bits from the mask just in case it had them set. */
- emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
- /* After this t1 will have MSB set for elements from other lane. */
- emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
- /* Clear bits other than MSB. */
- emit_insn (gen_andv32qi3 (t1, t1, vt));
- /* Or in the lower bits from mask into t3. */
- emit_insn (gen_iorv32qi3 (t3, t1, t2));
- /* And invert MSB bits in t1, so MSB is set for elements from the same
- lane. */
- emit_insn (gen_xorv32qi3 (t1, t1, vt));
- /* Swap 128-bit lanes in t3. */
- t6 = gen_reg_rtx (V4DImode);
- emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
- const2_rtx, GEN_INT (3),
- const0_rtx, const1_rtx));
- /* And or in the lower bits from mask into t1. */
- emit_insn (gen_iorv32qi3 (t1, t1, t2));
- if (one_operand_shuffle)
- {
- /* Each of these shuffles will put 0s in places where
- element from the other 128-bit lane is needed, otherwise
- will shuffle in the requested value. */
- emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
- gen_lowpart (V32QImode, t6)));
- emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
- /* For t3 the 128-bit lanes are swapped again. */
- t7 = gen_reg_rtx (V4DImode);
- emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
- const2_rtx, GEN_INT (3),
- const0_rtx, const1_rtx));
- /* And oring both together leads to the result. */
- emit_insn (gen_iorv32qi3 (target, t1,
- gen_lowpart (V32QImode, t7)));
- if (target != operands[0])
- emit_move_insn (operands[0],
- gen_lowpart (GET_MODE (operands[0]), target));
- return;
- }
-
- t4 = gen_reg_rtx (V32QImode);
- /* Similarly to the above one_operand_shuffle code,
- just for repeated twice for each operand. merge_two:
- code will merge the two results together. */
- emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
- gen_lowpart (V32QImode, t6)));
- emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
- gen_lowpart (V32QImode, t6)));
- emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
- emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
- t7 = gen_reg_rtx (V4DImode);
- emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
- const2_rtx, GEN_INT (3),
- const0_rtx, const1_rtx));
- t8 = gen_reg_rtx (V4DImode);
- emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
- const2_rtx, GEN_INT (3),
- const0_rtx, const1_rtx));
- emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
- emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
- t1 = t4;
- t2 = t3;
- goto merge_two;
-
- default:
- gcc_assert (GET_MODE_SIZE (mode) <= 16);
- break;
- }
- }
-
- if (TARGET_XOP)
- {
- /* The XOP VPPERM insn supports three inputs. By ignoring the
- one_operand_shuffle special case, we avoid creating another
- set of constant vectors in memory. */
- one_operand_shuffle = false;
-
- /* mask = mask & {2*w-1, ...} */
- vt = GEN_INT (2*w - 1);
- }
- else
- {
- /* mask = mask & {w-1, ...} */
- vt = GEN_INT (w - 1);
- }
-
- vt = gen_const_vec_duplicate (maskmode, vt);
- mask = expand_simple_binop (maskmode, AND, mask, vt,
- NULL_RTX, 0, OPTAB_DIRECT);
-
- /* For non-QImode operations, convert the word permutation control
- into a byte permutation control. */
- if (mode != V16QImode)
- {
- mask = expand_simple_binop (maskmode, ASHIFT, mask,
- GEN_INT (exact_log2 (e)),
- NULL_RTX, 0, OPTAB_DIRECT);
-
- /* Convert mask to vector of chars. */
- mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
-
- /* Replicate each of the input bytes into byte positions:
- (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
- (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
- (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
- for (i = 0; i < 16; ++i)
- vec[i] = GEN_INT (i/e * e);
- vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
- vt = validize_mem (force_const_mem (V16QImode, vt));
- if (TARGET_XOP)
- emit_insn (gen_xop_pperm (mask, mask, mask, vt));
- else
- emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
-
- /* Convert it into the byte positions by doing
- mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
- for (i = 0; i < 16; ++i)
- vec[i] = GEN_INT (i % e);
- vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
- vt = validize_mem (force_const_mem (V16QImode, vt));
- emit_insn (gen_addv16qi3 (mask, mask, vt));
- }
-
- /* The actual shuffle operations all operate on V16QImode. */
- op0 = gen_lowpart (V16QImode, op0);
- op1 = gen_lowpart (V16QImode, op1);
-
- if (TARGET_XOP)
- {
- if (GET_MODE (target) != V16QImode)
- target = gen_reg_rtx (V16QImode);
- emit_insn (gen_xop_pperm (target, op0, op1, mask));
- if (target != operands[0])
- emit_move_insn (operands[0],
- gen_lowpart (GET_MODE (operands[0]), target));
- }
- else if (one_operand_shuffle)
- {
- if (GET_MODE (target) != V16QImode)
- target = gen_reg_rtx (V16QImode);
- emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
- if (target != operands[0])
- emit_move_insn (operands[0],
- gen_lowpart (GET_MODE (operands[0]), target));
- }
- else
- {
- rtx xops[6];
- bool ok;
-
- /* Shuffle the two input vectors independently. */
- t1 = gen_reg_rtx (V16QImode);
- t2 = gen_reg_rtx (V16QImode);
- emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
- emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
-
- merge_two:
- /* Then merge them together. The key is whether any given control
- element contained a bit set that indicates the second word. */
- mask = operands[3];
- vt = GEN_INT (w);
- if (maskmode == V2DImode && !TARGET_SSE4_1)
- {
- /* Without SSE4.1, we don't have V2DImode EQ. Perform one
- more shuffle to convert the V2DI input mask into a V4SI
- input mask. At which point the masking that expand_int_vcond
- will work as desired. */
- rtx t3 = gen_reg_rtx (V4SImode);
- emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
- const0_rtx, const0_rtx,
- const2_rtx, const2_rtx));
- mask = t3;
- maskmode = V4SImode;
- e = w = 4;
- }
-
- vt = gen_const_vec_duplicate (maskmode, vt);
- vt = force_reg (maskmode, vt);
- mask = expand_simple_binop (maskmode, AND, mask, vt,
- NULL_RTX, 0, OPTAB_DIRECT);
-
- if (GET_MODE (target) != mode)
- target = gen_reg_rtx (mode);
- xops[0] = target;
- xops[1] = gen_lowpart (mode, t2);
- xops[2] = gen_lowpart (mode, t1);
- xops[3] = gen_rtx_EQ (maskmode, mask, vt);
- xops[4] = mask;
- xops[5] = vt;
- ok = ix86_expand_int_vcond (xops);
- gcc_assert (ok);
- if (target != operands[0])
- emit_move_insn (operands[0],
- gen_lowpart (GET_MODE (operands[0]), target));
- }
-}
-
-/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
- true if we should do zero extension, else sign extension. HIGH_P is
- true if we want the N/2 high elements, else the low elements. */
-
-void
-ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
-{
- machine_mode imode = GET_MODE (src);
- rtx tmp;
-
- if (TARGET_SSE4_1)
- {
- rtx (*unpack)(rtx, rtx);
- rtx (*extract)(rtx, rtx) = NULL;
- machine_mode halfmode = BLKmode;
-
- switch (imode)
- {
- case E_V64QImode:
- if (unsigned_p)
- unpack = gen_avx512bw_zero_extendv32qiv32hi2;
- else
- unpack = gen_avx512bw_sign_extendv32qiv32hi2;
- halfmode = V32QImode;
- extract
- = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
- break;
- case E_V32QImode:
- if (unsigned_p)
- unpack = gen_avx2_zero_extendv16qiv16hi2;
- else
- unpack = gen_avx2_sign_extendv16qiv16hi2;
- halfmode = V16QImode;
- extract
- = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
- break;
- case E_V32HImode:
- if (unsigned_p)
- unpack = gen_avx512f_zero_extendv16hiv16si2;
- else
- unpack = gen_avx512f_sign_extendv16hiv16si2;
- halfmode = V16HImode;
- extract
- = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
- break;
- case E_V16HImode:
- if (unsigned_p)
- unpack = gen_avx2_zero_extendv8hiv8si2;
- else
- unpack = gen_avx2_sign_extendv8hiv8si2;
- halfmode = V8HImode;
- extract
- = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
- break;
- case E_V16SImode:
- if (unsigned_p)
- unpack = gen_avx512f_zero_extendv8siv8di2;
- else
- unpack = gen_avx512f_sign_extendv8siv8di2;
- halfmode = V8SImode;
- extract
- = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
- break;
- case E_V8SImode:
- if (unsigned_p)
- unpack = gen_avx2_zero_extendv4siv4di2;
- else
- unpack = gen_avx2_sign_extendv4siv4di2;
- halfmode = V4SImode;
- extract
- = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
- break;
- case E_V16QImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv8qiv8hi2;
- else
- unpack = gen_sse4_1_sign_extendv8qiv8hi2;
- break;
- case E_V8HImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv4hiv4si2;
- else
- unpack = gen_sse4_1_sign_extendv4hiv4si2;
- break;
- case E_V4SImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv2siv2di2;
- else
- unpack = gen_sse4_1_sign_extendv2siv2di2;
- break;
- default:
- gcc_unreachable ();
- }
-
- if (GET_MODE_SIZE (imode) >= 32)
- {
- tmp = gen_reg_rtx (halfmode);
- emit_insn (extract (tmp, src));
- }
- else if (high_p)
- {
- /* Shift higher 8 bytes to lower 8 bytes. */
- tmp = gen_reg_rtx (V1TImode);
- emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
- GEN_INT (64)));
- tmp = gen_lowpart (imode, tmp);
- }
- else
- tmp = src;
-
- emit_insn (unpack (dest, tmp));
- }
- else
- {
- rtx (*unpack)(rtx, rtx, rtx);
-
- switch (imode)
- {
- case E_V16QImode:
- if (high_p)
- unpack = gen_vec_interleave_highv16qi;
- else
- unpack = gen_vec_interleave_lowv16qi;
- break;
- case E_V8HImode:
- if (high_p)
- unpack = gen_vec_interleave_highv8hi;
- else
- unpack = gen_vec_interleave_lowv8hi;
- break;
- case E_V4SImode:
- if (high_p)
- unpack = gen_vec_interleave_highv4si;
- else
- unpack = gen_vec_interleave_lowv4si;
- break;
- default:
- gcc_unreachable ();
- }
-
- if (unsigned_p)
- tmp = force_reg (imode, CONST0_RTX (imode));
- else
- tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
- src, pc_rtx, pc_rtx);
-
- rtx tmp2 = gen_reg_rtx (imode);
- emit_insn (unpack (tmp2, src, tmp));
- emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
- }
-}
-
-/* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
- but works for floating pointer parameters and nonoffsetable memories.
- For pushes, it returns just stack offsets; the values will be saved
- in the right order. Maximally three parts are generated. */
-
-static int
-ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
-{
- int size;
-
- if (!TARGET_64BIT)
- size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
- else
- size = (GET_MODE_SIZE (mode) + 4) / 8;
-
- gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
- gcc_assert (size >= 2 && size <= 4);
-
- /* Optimize constant pool reference to immediates. This is used by fp
- moves, that force all constants to memory to allow combining. */
- if (MEM_P (operand) && MEM_READONLY_P (operand))
- operand = avoid_constant_pool_reference (operand);
-
- if (MEM_P (operand) && !offsettable_memref_p (operand))
- {
- /* The only non-offsetable memories we handle are pushes. */
- int ok = push_operand (operand, VOIDmode);
-
- gcc_assert (ok);
-
- operand = copy_rtx (operand);
- PUT_MODE (operand, word_mode);
- parts[0] = parts[1] = parts[2] = parts[3] = operand;
- return size;
- }
-
- if (GET_CODE (operand) == CONST_VECTOR)
- {
- scalar_int_mode imode = int_mode_for_mode (mode).require ();
- /* Caution: if we looked through a constant pool memory above,
- the operand may actually have a different mode now. That's
- ok, since we want to pun this all the way back to an integer. */
- operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
- gcc_assert (operand != NULL);
- mode = imode;
- }
-
- if (!TARGET_64BIT)
- {
- if (mode == DImode)
- split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
- else
- {
- int i;
-
- if (REG_P (operand))
- {
- gcc_assert (reload_completed);
- for (i = 0; i < size; i++)
- parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
- }
- else if (offsettable_memref_p (operand))
- {
- operand = adjust_address (operand, SImode, 0);
- parts[0] = operand;
- for (i = 1; i < size; i++)
- parts[i] = adjust_address (operand, SImode, 4 * i);
- }
- else if (CONST_DOUBLE_P (operand))
- {
- const REAL_VALUE_TYPE *r;
- long l[4];
-
- r = CONST_DOUBLE_REAL_VALUE (operand);
- switch (mode)
- {
- case E_TFmode:
- real_to_target (l, r, mode);
- parts[3] = gen_int_mode (l[3], SImode);
- parts[2] = gen_int_mode (l[2], SImode);
- break;
- case E_XFmode:
- /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
- long double may not be 80-bit. */
- real_to_target (l, r, mode);
- parts[2] = gen_int_mode (l[2], SImode);
- break;
- case E_DFmode:
- REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
- break;
- default:
- gcc_unreachable ();
- }
- parts[1] = gen_int_mode (l[1], SImode);
- parts[0] = gen_int_mode (l[0], SImode);
- }
- else
- gcc_unreachable ();
- }
- }
- else
- {
- if (mode == TImode)
- split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
- if (mode == XFmode || mode == TFmode)
- {
- machine_mode upper_mode = mode==XFmode ? SImode : DImode;
- if (REG_P (operand))
- {
- gcc_assert (reload_completed);
- parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
- parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
- }
- else if (offsettable_memref_p (operand))
- {
- operand = adjust_address (operand, DImode, 0);
- parts[0] = operand;
- parts[1] = adjust_address (operand, upper_mode, 8);
- }
- else if (CONST_DOUBLE_P (operand))
- {
- long l[4];
-
- real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
-
- /* real_to_target puts 32-bit pieces in each long. */
- parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff))
- | ((l[1] & HOST_WIDE_INT_C (0xffffffff))
- << 32), DImode);
-
- if (upper_mode == SImode)
- parts[1] = gen_int_mode (l[2], SImode);
- else
- parts[1]
- = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff))
- | ((l[3] & HOST_WIDE_INT_C (0xffffffff))
- << 32), DImode);
- }
- else
- gcc_unreachable ();
- }
- }
-
- return size;
-}
-
-/* Emit insns to perform a move or push of DI, DF, XF, and TF values.
- Return false when normal moves are needed; true when all required
- insns have been emitted. Operands 2-4 contain the input values
- int the correct order; operands 5-7 contain the output values. */
-
-void
-ix86_split_long_move (rtx operands[])
-{
- rtx part[2][4];
- int nparts, i, j;
- int push = 0;
- int collisions = 0;
- machine_mode mode = GET_MODE (operands[0]);
- bool collisionparts[4];
-
- /* The DFmode expanders may ask us to move double.
- For 64bit target this is single move. By hiding the fact
- here we simplify i386.md splitters. */
- if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
- {
- /* Optimize constant pool reference to immediates. This is used by
- fp moves, that force all constants to memory to allow combining. */
-
- if (MEM_P (operands[1])
- && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
- operands[1] = get_pool_constant (XEXP (operands[1], 0));
- if (push_operand (operands[0], VOIDmode))
- {
- operands[0] = copy_rtx (operands[0]);
- PUT_MODE (operands[0], word_mode);
- }
- else
- operands[0] = gen_lowpart (DImode, operands[0]);
- operands[1] = gen_lowpart (DImode, operands[1]);
- emit_move_insn (operands[0], operands[1]);
- return;
- }
-
- /* The only non-offsettable memory we handle is push. */
- if (push_operand (operands[0], VOIDmode))
- push = 1;
- else
- gcc_assert (!MEM_P (operands[0])
- || offsettable_memref_p (operands[0]));
-
- nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
- ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
-
- /* When emitting push, take care for source operands on the stack. */
- if (push && MEM_P (operands[1])
- && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
- {
- rtx src_base = XEXP (part[1][nparts - 1], 0);
-
- /* Compensate for the stack decrement by 4. */
- if (!TARGET_64BIT && nparts == 3
- && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
- src_base = plus_constant (Pmode, src_base, 4);
-
- /* src_base refers to the stack pointer and is
- automatically decreased by emitted push. */
- for (i = 0; i < nparts; i++)
- part[1][i] = change_address (part[1][i],
- GET_MODE (part[1][i]), src_base);
- }
-
- /* We need to do copy in the right order in case an address register
- of the source overlaps the destination. */
- if (REG_P (part[0][0]) && MEM_P (part[1][0]))
- {
- rtx tmp;
-
- for (i = 0; i < nparts; i++)
- {
- collisionparts[i]
- = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
- if (collisionparts[i])
- collisions++;
- }
-
- /* Collision in the middle part can be handled by reordering. */
- if (collisions == 1 && nparts == 3 && collisionparts [1])
- {
- std::swap (part[0][1], part[0][2]);
- std::swap (part[1][1], part[1][2]);
- }
- else if (collisions == 1
- && nparts == 4
- && (collisionparts [1] || collisionparts [2]))
- {
- if (collisionparts [1])
- {
- std::swap (part[0][1], part[0][2]);
- std::swap (part[1][1], part[1][2]);
- }
- else
- {
- std::swap (part[0][2], part[0][3]);
- std::swap (part[1][2], part[1][3]);
- }
- }
-
- /* If there are more collisions, we can't handle it by reordering.
- Do an lea to the last part and use only one colliding move. */
- else if (collisions > 1)
- {
- rtx base, addr;
-
- collisions = 1;
-
- base = part[0][nparts - 1];
-
- /* Handle the case when the last part isn't valid for lea.
- Happens in 64-bit mode storing the 12-byte XFmode. */
- if (GET_MODE (base) != Pmode)
- base = gen_rtx_REG (Pmode, REGNO (base));
-
- addr = XEXP (part[1][0], 0);
- if (TARGET_TLS_DIRECT_SEG_REFS)
- {
- struct ix86_address parts;
- int ok = ix86_decompose_address (addr, &parts);
- gcc_assert (ok);
- /* It is not valid to use %gs: or %fs: in lea. */
- gcc_assert (parts.seg == ADDR_SPACE_GENERIC);
- }
- emit_insn (gen_rtx_SET (base, addr));
- part[1][0] = replace_equiv_address (part[1][0], base);
- for (i = 1; i < nparts; i++)
- {
- tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
- part[1][i] = replace_equiv_address (part[1][i], tmp);
- }
- }
- }
-
- if (push)
- {
- if (!TARGET_64BIT)
- {
- if (nparts == 3)
- {
- if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
- emit_insn (gen_add2_insn (stack_pointer_rtx, GEN_INT (-4)));
- emit_move_insn (part[0][2], part[1][2]);
- }
- else if (nparts == 4)
- {
- emit_move_insn (part[0][3], part[1][3]);
- emit_move_insn (part[0][2], part[1][2]);
- }
- }
- else
- {
- /* In 64bit mode we don't have 32bit push available. In case this is
- register, it is OK - we will just use larger counterpart. We also
- retype memory - these comes from attempt to avoid REX prefix on
- moving of second half of TFmode value. */
- if (GET_MODE (part[1][1]) == SImode)
- {
- switch (GET_CODE (part[1][1]))
- {
- case MEM:
- part[1][1] = adjust_address (part[1][1], DImode, 0);
- break;
-
- case REG:
- part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
- break;
-
- default:
- gcc_unreachable ();
- }
-
- if (GET_MODE (part[1][0]) == SImode)
- part[1][0] = part[1][1];
- }
- }
- emit_move_insn (part[0][1], part[1][1]);
- emit_move_insn (part[0][0], part[1][0]);
- return;
- }
-
- /* Choose correct order to not overwrite the source before it is copied. */
- if ((REG_P (part[0][0])
- && REG_P (part[1][1])
- && (REGNO (part[0][0]) == REGNO (part[1][1])
- || (nparts == 3
- && REGNO (part[0][0]) == REGNO (part[1][2]))
- || (nparts == 4
- && REGNO (part[0][0]) == REGNO (part[1][3]))))
- || (collisions > 0
- && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
- {
- for (i = 0, j = nparts - 1; i < nparts; i++, j--)
- {
- operands[2 + i] = part[0][j];
- operands[6 + i] = part[1][j];
- }
- }
- else
- {
- for (i = 0; i < nparts; i++)
- {
- operands[2 + i] = part[0][i];
- operands[6 + i] = part[1][i];
- }
- }
-
- /* If optimizing for size, attempt to locally unCSE nonzero constants. */
- if (optimize_insn_for_size_p ())
- {
- for (j = 0; j < nparts - 1; j++)
- if (CONST_INT_P (operands[6 + j])
- && operands[6 + j] != const0_rtx
- && REG_P (operands[2 + j]))
- for (i = j; i < nparts - 1; i++)
- if (CONST_INT_P (operands[7 + i])
- && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
- operands[7 + i] = operands[2 + j];
- }
-
- for (i = 0; i < nparts; i++)
- emit_move_insn (operands[2 + i], operands[6 + i]);
-
- return;
-}
-
-/* Helper function of ix86_split_ashl used to generate an SImode/DImode
- left shift by a constant, either using a single shift or
- a sequence of add instructions. */
-
-static void
-ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
-{
- if (count == 1
- || (count * ix86_cost->add <= ix86_cost->shift_const
- && !optimize_insn_for_size_p ()))
- {
- while (count-- > 0)
- emit_insn (gen_add2_insn (operand, operand));
- }
- else
- {
- rtx (*insn)(rtx, rtx, rtx);
-
- insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
- emit_insn (insn (operand, operand, GEN_INT (count)));
- }
-}
-
-void
-ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
-{
- rtx (*gen_ashl3)(rtx, rtx, rtx);
- rtx (*gen_shld)(rtx, rtx, rtx);
- int half_width = GET_MODE_BITSIZE (mode) >> 1;
- machine_mode half_mode;
-
- rtx low[2], high[2];
- int count;
-
- if (CONST_INT_P (operands[2]))
- {
- split_double_mode (mode, operands, 2, low, high);
- count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
-
- if (count >= half_width)
- {
- emit_move_insn (high[0], low[1]);
- emit_move_insn (low[0], const0_rtx);
-
- if (count > half_width)
- ix86_expand_ashl_const (high[0], count - half_width, mode);
- }
- else
- {
- gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
-
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
-
- emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
- ix86_expand_ashl_const (low[0], count, mode);
- }
- return;
- }
-
- split_double_mode (mode, operands, 1, low, high);
- half_mode = mode == DImode ? SImode : DImode;
-
- gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
-
- if (operands[1] == const1_rtx)
- {
- /* Assuming we've chosen a QImode capable registers, then 1 << N
- can be done with two 32/64-bit shifts, no branches, no cmoves. */
- if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
- {
- rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
-
- ix86_expand_clear (low[0]);
- ix86_expand_clear (high[0]);
- emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
-
- d = gen_lowpart (QImode, low[0]);
- d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
- s = gen_rtx_EQ (QImode, flags, const0_rtx);
- emit_insn (gen_rtx_SET (d, s));
-
- d = gen_lowpart (QImode, high[0]);
- d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
- s = gen_rtx_NE (QImode, flags, const0_rtx);
- emit_insn (gen_rtx_SET (d, s));
- }
-
- /* Otherwise, we can get the same results by manually performing
- a bit extract operation on bit 5/6, and then performing the two
- shifts. The two methods of getting 0/1 into low/high are exactly
- the same size. Avoiding the shift in the bit extract case helps
- pentium4 a bit; no one else seems to care much either way. */
- else
- {
- rtx (*gen_lshr3)(rtx, rtx, rtx);
- rtx (*gen_and3)(rtx, rtx, rtx);
- rtx (*gen_xor3)(rtx, rtx, rtx);
- HOST_WIDE_INT bits;
- rtx x;
-
- if (mode == DImode)
- {
- gen_lshr3 = gen_lshrsi3;
- gen_and3 = gen_andsi3;
- gen_xor3 = gen_xorsi3;
- bits = 5;
- }
- else
- {
- gen_lshr3 = gen_lshrdi3;
- gen_and3 = gen_anddi3;
- gen_xor3 = gen_xordi3;
- bits = 6;
- }
-
- if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
- x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
- else
- x = gen_lowpart (half_mode, operands[2]);
- emit_insn (gen_rtx_SET (high[0], x));
-
- emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
- emit_insn (gen_and3 (high[0], high[0], const1_rtx));
- emit_move_insn (low[0], high[0]);
- emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
- }
-
- emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
- emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
- return;
- }
-
- if (operands[1] == constm1_rtx)
- {
- /* For -1 << N, we can avoid the shld instruction, because we
- know that we're shifting 0...31/63 ones into a -1. */
- emit_move_insn (low[0], constm1_rtx);
- if (optimize_insn_for_size_p ())
- emit_move_insn (high[0], low[0]);
- else
- emit_move_insn (high[0], constm1_rtx);
- }
- else
- {
- gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
-
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
-
- split_double_mode (mode, operands, 1, low, high);
- emit_insn (gen_shld (high[0], low[0], operands[2]));
- }
-
- emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
-
- if (TARGET_CMOVE && scratch)
- {
- ix86_expand_clear (scratch);
- emit_insn (gen_x86_shift_adj_1
- (half_mode, high[0], low[0], operands[2], scratch));
- }
- else
- emit_insn (gen_x86_shift_adj_2 (half_mode, high[0], low[0], operands[2]));
-}
-
-void
-ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
-{
- rtx (*gen_ashr3)(rtx, rtx, rtx)
- = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
- rtx (*gen_shrd)(rtx, rtx, rtx);
- int half_width = GET_MODE_BITSIZE (mode) >> 1;
-
- rtx low[2], high[2];
- int count;
-
- if (CONST_INT_P (operands[2]))
- {
- split_double_mode (mode, operands, 2, low, high);
- count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
-
- if (count == GET_MODE_BITSIZE (mode) - 1)
- {
- emit_move_insn (high[0], high[1]);
- emit_insn (gen_ashr3 (high[0], high[0],
- GEN_INT (half_width - 1)));
- emit_move_insn (low[0], high[0]);
-
- }
- else if (count >= half_width)
- {
- emit_move_insn (low[0], high[1]);
- emit_move_insn (high[0], low[0]);
- emit_insn (gen_ashr3 (high[0], high[0],
- GEN_INT (half_width - 1)));
-
- if (count > half_width)
- emit_insn (gen_ashr3 (low[0], low[0],
- GEN_INT (count - half_width)));
- }
- else
- {
- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
-
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
-
- emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
- emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
- }
- }
- else
- {
- machine_mode half_mode;
-
- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
-
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
-
- split_double_mode (mode, operands, 1, low, high);
- half_mode = mode == DImode ? SImode : DImode;
-
- emit_insn (gen_shrd (low[0], high[0], operands[2]));
- emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
-
- if (TARGET_CMOVE && scratch)
- {
- emit_move_insn (scratch, high[0]);
- emit_insn (gen_ashr3 (scratch, scratch,
- GEN_INT (half_width - 1)));
- emit_insn (gen_x86_shift_adj_1
- (half_mode, low[0], high[0], operands[2], scratch));
- }
- else
- emit_insn (gen_x86_shift_adj_3
- (half_mode, low[0], high[0], operands[2]));
- }
-}
-
-void
-ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
-{
- rtx (*gen_lshr3)(rtx, rtx, rtx)
- = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
- rtx (*gen_shrd)(rtx, rtx, rtx);
- int half_width = GET_MODE_BITSIZE (mode) >> 1;
-
- rtx low[2], high[2];
- int count;
-
- if (CONST_INT_P (operands[2]))
- {
- split_double_mode (mode, operands, 2, low, high);
- count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
-
- if (count >= half_width)
- {
- emit_move_insn (low[0], high[1]);
- ix86_expand_clear (high[0]);
-
- if (count > half_width)
- emit_insn (gen_lshr3 (low[0], low[0],
- GEN_INT (count - half_width)));
- }
- else
- {
- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
-
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
-
- emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
- emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
- }
- }
- else
- {
- machine_mode half_mode;
-
- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
-
- if (!rtx_equal_p (operands[0], operands[1]))
- emit_move_insn (operands[0], operands[1]);
-
- split_double_mode (mode, operands, 1, low, high);
- half_mode = mode == DImode ? SImode : DImode;
-
- emit_insn (gen_shrd (low[0], high[0], operands[2]));
- emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
-
- if (TARGET_CMOVE && scratch)
- {
- ix86_expand_clear (scratch);
- emit_insn (gen_x86_shift_adj_1
- (half_mode, low[0], high[0], operands[2], scratch));
- }
- else
- emit_insn (gen_x86_shift_adj_2
- (half_mode, low[0], high[0], operands[2]));
- }
-}
-
-/* Return mode for the memcpy/memset loop counter. Prefer SImode over
- DImode for constant loop counts. */
-
-static machine_mode
-counter_mode (rtx count_exp)
-{
- if (GET_MODE (count_exp) != VOIDmode)
- return GET_MODE (count_exp);
- if (!CONST_INT_P (count_exp))
- return Pmode;
- if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
- return DImode;
- return SImode;
-}
-
-/* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
- to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
- specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
- memory by VALUE (supposed to be in MODE).
-
- The size is rounded down to whole number of chunk size moved at once.
- SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
-
-
-static void
-expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
- rtx destptr, rtx srcptr, rtx value,
- rtx count, machine_mode mode, int unroll,
- int expected_size, bool issetmem)
-{
- rtx_code_label *out_label, *top_label;
- rtx iter, tmp;
- machine_mode iter_mode = counter_mode (count);
- int piece_size_n = GET_MODE_SIZE (mode) * unroll;
- rtx piece_size = GEN_INT (piece_size_n);
- rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
- rtx size;
- int i;
-
- top_label = gen_label_rtx ();
- out_label = gen_label_rtx ();
- iter = gen_reg_rtx (iter_mode);
-
- size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
- NULL, 1, OPTAB_DIRECT);
- /* Those two should combine. */
- if (piece_size == const1_rtx)
- {
- emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
- true, out_label);
- predict_jump (REG_BR_PROB_BASE * 10 / 100);
- }
- emit_move_insn (iter, const0_rtx);
-
- emit_label (top_label);
-
- tmp = convert_modes (Pmode, iter_mode, iter, true);
-
- /* This assert could be relaxed - in this case we'll need to compute
- smallest power of two, containing in PIECE_SIZE_N and pass it to
- offset_address. */
- gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
- destmem = offset_address (destmem, tmp, piece_size_n);
- destmem = adjust_address (destmem, mode, 0);
-
- if (!issetmem)
- {
- srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
- srcmem = adjust_address (srcmem, mode, 0);
-
- /* When unrolling for chips that reorder memory reads and writes,
- we can save registers by using single temporary.
- Also using 4 temporaries is overkill in 32bit mode. */
- if (!TARGET_64BIT && 0)
- {
- for (i = 0; i < unroll; i++)
- {
- if (i)
- {
- destmem = adjust_address (copy_rtx (destmem), mode,
- GET_MODE_SIZE (mode));
- srcmem = adjust_address (copy_rtx (srcmem), mode,
- GET_MODE_SIZE (mode));
- }
- emit_move_insn (destmem, srcmem);
- }
- }
- else
- {
- rtx tmpreg[4];
- gcc_assert (unroll <= 4);
- for (i = 0; i < unroll; i++)
- {
- tmpreg[i] = gen_reg_rtx (mode);
- if (i)
- srcmem = adjust_address (copy_rtx (srcmem), mode,
- GET_MODE_SIZE (mode));
- emit_move_insn (tmpreg[i], srcmem);
- }
- for (i = 0; i < unroll; i++)
- {
- if (i)
- destmem = adjust_address (copy_rtx (destmem), mode,
- GET_MODE_SIZE (mode));
- emit_move_insn (destmem, tmpreg[i]);
- }
- }
- }
- else
- for (i = 0; i < unroll; i++)
- {
- if (i)
- destmem = adjust_address (copy_rtx (destmem), mode,
- GET_MODE_SIZE (mode));
- emit_move_insn (destmem, value);
- }
-
- tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
- true, OPTAB_LIB_WIDEN);
- if (tmp != iter)
- emit_move_insn (iter, tmp);
-
- emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
- true, top_label);
- if (expected_size != -1)
- {
- expected_size /= GET_MODE_SIZE (mode) * unroll;
- if (expected_size == 0)
- predict_jump (0);
- else if (expected_size > REG_BR_PROB_BASE)
- predict_jump (REG_BR_PROB_BASE - 1);
- else
- predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2)
- / expected_size);
- }
- else
- predict_jump (REG_BR_PROB_BASE * 80 / 100);
- iter = ix86_zero_extend_to_Pmode (iter);
- tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
- true, OPTAB_LIB_WIDEN);
- if (tmp != destptr)
- emit_move_insn (destptr, tmp);
- if (!issetmem)
- {
- tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
- true, OPTAB_LIB_WIDEN);
- if (tmp != srcptr)
- emit_move_insn (srcptr, tmp);
- }
- emit_label (out_label);
-}
-
-/* Divide COUNTREG by SCALE. */
-static rtx
-scale_counter (rtx countreg, int scale)
-{
- rtx sc;
-
- if (scale == 1)
- return countreg;
- if (CONST_INT_P (countreg))
- return GEN_INT (INTVAL (countreg) / scale);
- gcc_assert (REG_P (countreg));
-
- sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
- GEN_INT (exact_log2 (scale)),
- NULL, 1, OPTAB_DIRECT);
- return sc;
-}
-
-/* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
- When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
- When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
- For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
- ORIG_VALUE is the original value passed to memset to fill the memory with.
- Other arguments have same meaning as for previous function. */
-
-static void
-expand_set_or_cpymem_via_rep (rtx destmem, rtx srcmem,
- rtx destptr, rtx srcptr, rtx value, rtx orig_value,
- rtx count,
- machine_mode mode, bool issetmem)
-{
- rtx destexp;
- rtx srcexp;
- rtx countreg;
- HOST_WIDE_INT rounded_count;
-
- /* If possible, it is shorter to use rep movs.
- TODO: Maybe it is better to move this logic to decide_alg. */
- if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
- && (!issetmem || orig_value == const0_rtx))
- mode = SImode;
-
- if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
- destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
-
- countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
- GET_MODE_SIZE (mode)));
- if (mode != QImode)
- {
- destexp = gen_rtx_ASHIFT (Pmode, countreg,
- GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
- destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
- }
- else
- destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
- if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
- {
- rounded_count
- = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
- destmem = shallow_copy_rtx (destmem);
- set_mem_size (destmem, rounded_count);
- }
- else if (MEM_SIZE_KNOWN_P (destmem))
- clear_mem_size (destmem);
-
- if (issetmem)
- {
- value = force_reg (mode, gen_lowpart (mode, value));
- emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
- }
- else
- {
- if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
- srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
- if (mode != QImode)
- {
- srcexp = gen_rtx_ASHIFT (Pmode, countreg,
- GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
- srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
- }
- else
- srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
- if (CONST_INT_P (count))
- {
- rounded_count
- = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
- srcmem = shallow_copy_rtx (srcmem);
- set_mem_size (srcmem, rounded_count);
- }
- else
- {
- if (MEM_SIZE_KNOWN_P (srcmem))
- clear_mem_size (srcmem);
- }
- emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
- destexp, srcexp));
- }
-}
-
-/* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
- DESTMEM.
- SRC is passed by pointer to be updated on return.
- Return value is updated DST. */
-static rtx
-emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
- HOST_WIDE_INT size_to_move)
-{
- rtx dst = destmem, src = *srcmem, tempreg;
- enum insn_code code;
- machine_mode move_mode;
- int piece_size, i;
-
- /* Find the widest mode in which we could perform moves.
- Start with the biggest power of 2 less than SIZE_TO_MOVE and half
- it until move of such size is supported. */
- piece_size = 1 << floor_log2 (size_to_move);
- while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode)
- || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
- {
- gcc_assert (piece_size > 1);
- piece_size >>= 1;
- }
-
- /* Find the corresponding vector mode with the same size as MOVE_MODE.
- MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
- if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
- {
- int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
- if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
- || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing)
- {
- move_mode = word_mode;
- piece_size = GET_MODE_SIZE (move_mode);
- code = optab_handler (mov_optab, move_mode);
- }
- }
- gcc_assert (code != CODE_FOR_nothing);
-
- dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
- src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
-
- /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
- gcc_assert (size_to_move % piece_size == 0);
-
- for (i = 0; i < size_to_move; i += piece_size)
- {
- /* We move from memory to memory, so we'll need to do it via
- a temporary register. */
- tempreg = gen_reg_rtx (move_mode);
- emit_insn (GEN_FCN (code) (tempreg, src));
- emit_insn (GEN_FCN (code) (dst, tempreg));
-
- emit_move_insn (destptr,
- plus_constant (Pmode, copy_rtx (destptr), piece_size));
- emit_move_insn (srcptr,
- plus_constant (Pmode, copy_rtx (srcptr), piece_size));
-
- dst = adjust_automodify_address_nv (dst, move_mode, destptr,
- piece_size);
- src = adjust_automodify_address_nv (src, move_mode, srcptr,
- piece_size);
- }
-
- /* Update DST and SRC rtx. */
- *srcmem = src;
- return dst;
-}
-
-/* Helper function for the string operations below. Dest VARIABLE whether
- it is aligned to VALUE bytes. If true, jump to the label. */
-
-static rtx_code_label *
-ix86_expand_aligntest (rtx variable, int value, bool epilogue)
-{
- rtx_code_label *label = gen_label_rtx ();
- rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
- if (GET_MODE (variable) == DImode)
- emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
- else
- emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
- emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
- 1, label);
- if (epilogue)
- predict_jump (REG_BR_PROB_BASE * 50 / 100);
- else
- predict_jump (REG_BR_PROB_BASE * 90 / 100);
- return label;
-}
-
-
-/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
-
-static void
-expand_cpymem_epilogue (rtx destmem, rtx srcmem,
- rtx destptr, rtx srcptr, rtx count, int max_size)
-{
- rtx src, dest;
- if (CONST_INT_P (count))
- {
- HOST_WIDE_INT countval = INTVAL (count);
- HOST_WIDE_INT epilogue_size = countval % max_size;
- int i;
-
- /* For now MAX_SIZE should be a power of 2. This assert could be
- relaxed, but it'll require a bit more complicated epilogue
- expanding. */
- gcc_assert ((max_size & (max_size - 1)) == 0);
- for (i = max_size; i >= 1; i >>= 1)
- {
- if (epilogue_size & i)
- destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
- }
- return;
- }
- if (max_size > 8)
- {
- count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
- count, 1, OPTAB_DIRECT);
- expand_set_or_cpymem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
- count, QImode, 1, 4, false);
- return;
- }
-
- /* When there are stringops, we can cheaply increase dest and src pointers.
- Otherwise we save code size by maintaining offset (zero is readily
- available from preceding rep operation) and using x86 addressing modes.
- */
- if (TARGET_SINGLE_STRINGOP)
- {
- if (max_size > 4)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
- src = change_address (srcmem, SImode, srcptr);
- dest = change_address (destmem, SImode, destptr);
- emit_insn (gen_strmov (destptr, dest, srcptr, src));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (max_size > 2)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
- src = change_address (srcmem, HImode, srcptr);
- dest = change_address (destmem, HImode, destptr);
- emit_insn (gen_strmov (destptr, dest, srcptr, src));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (max_size > 1)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
- src = change_address (srcmem, QImode, srcptr);
- dest = change_address (destmem, QImode, destptr);
- emit_insn (gen_strmov (destptr, dest, srcptr, src));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- }
- else
- {
- rtx offset = force_reg (Pmode, const0_rtx);
- rtx tmp;
-
- if (max_size > 4)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
- src = change_address (srcmem, SImode, srcptr);
- dest = change_address (destmem, SImode, destptr);
- emit_move_insn (dest, src);
- tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
- true, OPTAB_LIB_WIDEN);
- if (tmp != offset)
- emit_move_insn (offset, tmp);
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (max_size > 2)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
- tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
- src = change_address (srcmem, HImode, tmp);
- tmp = gen_rtx_PLUS (Pmode, destptr, offset);
- dest = change_address (destmem, HImode, tmp);
- emit_move_insn (dest, src);
- tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
- true, OPTAB_LIB_WIDEN);
- if (tmp != offset)
- emit_move_insn (offset, tmp);
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (max_size > 1)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
- tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
- src = change_address (srcmem, QImode, tmp);
- tmp = gen_rtx_PLUS (Pmode, destptr, offset);
- dest = change_address (destmem, QImode, tmp);
- emit_move_insn (dest, src);
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- }
-}
-
-/* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
- with value PROMOTED_VAL.
- SRC is passed by pointer to be updated on return.
- Return value is updated DST. */
-static rtx
-emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
- HOST_WIDE_INT size_to_move)
-{
- rtx dst = destmem;
- enum insn_code code;
- machine_mode move_mode;
- int piece_size, i;
-
- /* Find the widest mode in which we could perform moves.
- Start with the biggest power of 2 less than SIZE_TO_MOVE and half
- it until move of such size is supported. */
- move_mode = GET_MODE (promoted_val);
- if (move_mode == VOIDmode)
- move_mode = QImode;
- if (size_to_move < GET_MODE_SIZE (move_mode))
- {
- unsigned int move_bits = size_to_move * BITS_PER_UNIT;
- move_mode = int_mode_for_size (move_bits, 0).require ();
- promoted_val = gen_lowpart (move_mode, promoted_val);
- }
- piece_size = GET_MODE_SIZE (move_mode);
- code = optab_handler (mov_optab, move_mode);
- gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
-
- dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
-
- /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
- gcc_assert (size_to_move % piece_size == 0);
-
- for (i = 0; i < size_to_move; i += piece_size)
- {
- if (piece_size <= GET_MODE_SIZE (word_mode))
- {
- emit_insn (gen_strset (destptr, dst, promoted_val));
- dst = adjust_automodify_address_nv (dst, move_mode, destptr,
- piece_size);
- continue;
- }
-
- emit_insn (GEN_FCN (code) (dst, promoted_val));
-
- emit_move_insn (destptr,
- plus_constant (Pmode, copy_rtx (destptr), piece_size));
-
- dst = adjust_automodify_address_nv (dst, move_mode, destptr,
- piece_size);
- }
-
- /* Update DST rtx. */
- return dst;
-}
-/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
-static void
-expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
- rtx count, int max_size)
-{
- count = expand_simple_binop (counter_mode (count), AND, count,
- GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
- expand_set_or_cpymem_via_loop (destmem, NULL, destptr, NULL,
- gen_lowpart (QImode, value), count, QImode,
- 1, max_size / 2, true);
-}
-
-/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
-static void
-expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
- rtx count, int max_size)
-{
- rtx dest;
-
- if (CONST_INT_P (count))
- {
- HOST_WIDE_INT countval = INTVAL (count);
- HOST_WIDE_INT epilogue_size = countval % max_size;
- int i;
-
- /* For now MAX_SIZE should be a power of 2. This assert could be
- relaxed, but it'll require a bit more complicated epilogue
- expanding. */
- gcc_assert ((max_size & (max_size - 1)) == 0);
- for (i = max_size; i >= 1; i >>= 1)
- {
- if (epilogue_size & i)
- {
- if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
- destmem = emit_memset (destmem, destptr, vec_value, i);
- else
- destmem = emit_memset (destmem, destptr, value, i);
- }
- }
- return;
- }
- if (max_size > 32)
- {
- expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
- return;
- }
- if (max_size > 16)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
- if (TARGET_64BIT)
- {
- dest = change_address (destmem, DImode, destptr);
- emit_insn (gen_strset (destptr, dest, value));
- dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
- emit_insn (gen_strset (destptr, dest, value));
- }
- else
- {
- dest = change_address (destmem, SImode, destptr);
- emit_insn (gen_strset (destptr, dest, value));
- dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
- emit_insn (gen_strset (destptr, dest, value));
- dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
- emit_insn (gen_strset (destptr, dest, value));
- dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
- emit_insn (gen_strset (destptr, dest, value));
- }
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (max_size > 8)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
- if (TARGET_64BIT)
- {
- dest = change_address (destmem, DImode, destptr);
- emit_insn (gen_strset (destptr, dest, value));
- }
- else
- {
- dest = change_address (destmem, SImode, destptr);
- emit_insn (gen_strset (destptr, dest, value));
- dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
- emit_insn (gen_strset (destptr, dest, value));
- }
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (max_size > 4)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
- dest = change_address (destmem, SImode, destptr);
- emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (max_size > 2)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
- dest = change_address (destmem, HImode, destptr);
- emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
- if (max_size > 1)
- {
- rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
- dest = change_address (destmem, QImode, destptr);
- emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
-}
-
-/* Adjust COUNTER by the VALUE. */
-static void
-ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
-{
- emit_insn (gen_add2_insn (countreg, GEN_INT (-value)));
-}
-
-/* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
- DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
- Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
- ignored.
- Return value is updated DESTMEM. */
-
-static rtx
-expand_set_or_cpymem_prologue (rtx destmem, rtx srcmem,
- rtx destptr, rtx srcptr, rtx value,
- rtx vec_value, rtx count, int align,
- int desired_alignment, bool issetmem)
-{
- int i;
- for (i = 1; i < desired_alignment; i <<= 1)
- {
- if (align <= i)
- {
- rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
- if (issetmem)
- {
- if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
- destmem = emit_memset (destmem, destptr, vec_value, i);
- else
- destmem = emit_memset (destmem, destptr, value, i);
- }
- else
- destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
- ix86_adjust_counter (count, i);
- emit_label (label);
- LABEL_NUSES (label) = 1;
- set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
- }
- }
- return destmem;
-}
-
-/* Test if COUNT&SIZE is nonzero and if so, expand movme
- or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
- and jump to DONE_LABEL. */
-static void
-expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem,
- rtx destptr, rtx srcptr,
- rtx value, rtx vec_value,
- rtx count, int size,
- rtx done_label, bool issetmem)
-{
- rtx_code_label *label = ix86_expand_aligntest (count, size, false);
- machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
- rtx modesize;
- int n;
-
- /* If we do not have vector value to copy, we must reduce size. */
- if (issetmem)
- {
- if (!vec_value)
- {
- if (GET_MODE (value) == VOIDmode && size > 8)
- mode = Pmode;
- else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
- mode = GET_MODE (value);
- }
- else
- mode = GET_MODE (vec_value), value = vec_value;
- }
- else
- {
- /* Choose appropriate vector mode. */
- if (size >= 32)
- mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
- else if (size >= 16)
- mode = TARGET_SSE ? V16QImode : DImode;
- srcmem = change_address (srcmem, mode, srcptr);
- }
- destmem = change_address (destmem, mode, destptr);
- modesize = GEN_INT (GET_MODE_SIZE (mode));
- gcc_assert (GET_MODE_SIZE (mode) <= size);
- for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
- {
- if (issetmem)
- emit_move_insn (destmem, gen_lowpart (mode, value));
- else
- {
- emit_move_insn (destmem, srcmem);
- srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
- }
- destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
- }
-
- destmem = offset_address (destmem, count, 1);
- destmem = offset_address (destmem, GEN_INT (-2 * size),
- GET_MODE_SIZE (mode));
- if (!issetmem)
- {
- srcmem = offset_address (srcmem, count, 1);
- srcmem = offset_address (srcmem, GEN_INT (-2 * size),
- GET_MODE_SIZE (mode));
- }
- for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
- {
- if (issetmem)
- emit_move_insn (destmem, gen_lowpart (mode, value));
- else
- {
- emit_move_insn (destmem, srcmem);
- srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
- }
- destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
- }
- emit_jump_insn (gen_jump (done_label));
- emit_barrier ();
-
- emit_label (label);
- LABEL_NUSES (label) = 1;
-}
-
-/* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
- and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
- bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
- proceed with an loop copying SIZE bytes at once. Do moves in MODE.
- DONE_LABEL is a label after the whole copying sequence. The label is created
- on demand if *DONE_LABEL is NULL.
- MIN_SIZE is minimal size of block copied. This value gets adjusted for new
- bounds after the initial copies.
-
- DESTMEM/SRCMEM are memory expressions pointing to the copies block,
- DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
- we will dispatch to a library call for large blocks.
-
- In pseudocode we do:
-
- if (COUNT < SIZE)
- {
- Assume that SIZE is 4. Bigger sizes are handled analogously
- if (COUNT & 4)
- {
- copy 4 bytes from SRCPTR to DESTPTR
- copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
- goto done_label
- }
- if (!COUNT)
- goto done_label;
- copy 1 byte from SRCPTR to DESTPTR
- if (COUNT & 2)
- {
- copy 2 bytes from SRCPTR to DESTPTR
- copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
- }
- }
- else
- {
- copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
- copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
-
- OLD_DESPTR = DESTPTR;
- Align DESTPTR up to DESIRED_ALIGN
- SRCPTR += DESTPTR - OLD_DESTPTR
- COUNT -= DEST_PTR - OLD_DESTPTR
- if (DYNAMIC_CHECK)
- Round COUNT down to multiple of SIZE
- << optional caller supplied zero size guard is here >>
- << optional caller supplied dynamic check is here >>
- << caller supplied main copy loop is here >>
- }
- done_label:
- */
-static void
-expand_set_or_cpymem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
- rtx *destptr, rtx *srcptr,
- machine_mode mode,
- rtx value, rtx vec_value,
- rtx *count,
- rtx_code_label **done_label,
- int size,
- int desired_align,
- int align,
- unsigned HOST_WIDE_INT *min_size,
- bool dynamic_check,
- bool issetmem)
-{
- rtx_code_label *loop_label = NULL, *label;
- int n;
- rtx modesize;
- int prolog_size = 0;
- rtx mode_value;
-
- /* Chose proper value to copy. */
- if (issetmem && VECTOR_MODE_P (mode))
- mode_value = vec_value;
- else
- mode_value = value;
- gcc_assert (GET_MODE_SIZE (mode) <= size);
-
- /* See if block is big or small, handle small blocks. */
- if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
- {
- int size2 = size;
- loop_label = gen_label_rtx ();
-
- if (!*done_label)
- *done_label = gen_label_rtx ();
-
- emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
- 1, loop_label);
- size2 >>= 1;
-
- /* Handle sizes > 3. */
- for (;size2 > 2; size2 >>= 1)
- expand_small_cpymem_or_setmem (destmem, srcmem,
- *destptr, *srcptr,
- value, vec_value,
- *count,
- size2, *done_label, issetmem);
- /* Nothing to copy? Jump to DONE_LABEL if so */
- emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
- 1, *done_label);
-
- /* Do a byte copy. */
- destmem = change_address (destmem, QImode, *destptr);
- if (issetmem)
- emit_move_insn (destmem, gen_lowpart (QImode, value));
- else
- {
- srcmem = change_address (srcmem, QImode, *srcptr);
- emit_move_insn (destmem, srcmem);
- }
-
- /* Handle sizes 2 and 3. */
- label = ix86_expand_aligntest (*count, 2, false);
- destmem = change_address (destmem, HImode, *destptr);
- destmem = offset_address (destmem, *count, 1);
- destmem = offset_address (destmem, GEN_INT (-2), 2);
- if (issetmem)
- emit_move_insn (destmem, gen_lowpart (HImode, value));
- else
- {
- srcmem = change_address (srcmem, HImode, *srcptr);
- srcmem = offset_address (srcmem, *count, 1);
- srcmem = offset_address (srcmem, GEN_INT (-2), 2);
- emit_move_insn (destmem, srcmem);
- }
-
- emit_label (label);
- LABEL_NUSES (label) = 1;
- emit_jump_insn (gen_jump (*done_label));
- emit_barrier ();
- }
- else
- gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
- || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
-
- /* Start memcpy for COUNT >= SIZE. */
- if (loop_label)
- {
- emit_label (loop_label);
- LABEL_NUSES (loop_label) = 1;
- }
-
- /* Copy first desired_align bytes. */
- if (!issetmem)
- srcmem = change_address (srcmem, mode, *srcptr);
- destmem = change_address (destmem, mode, *destptr);
- modesize = GEN_INT (GET_MODE_SIZE (mode));
- for (n = 0; prolog_size < desired_align - align; n++)
- {
- if (issetmem)
- emit_move_insn (destmem, mode_value);
- else
- {
- emit_move_insn (destmem, srcmem);
- srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
- }
- destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
- prolog_size += GET_MODE_SIZE (mode);
- }
-
-
- /* Copy last SIZE bytes. */
- destmem = offset_address (destmem, *count, 1);
- destmem = offset_address (destmem,
- GEN_INT (-size - prolog_size),
- 1);
- if (issetmem)
- emit_move_insn (destmem, mode_value);
- else
- {
- srcmem = offset_address (srcmem, *count, 1);
- srcmem = offset_address (srcmem,
- GEN_INT (-size - prolog_size),
- 1);
- emit_move_insn (destmem, srcmem);
- }
- for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
- {
- destmem = offset_address (destmem, modesize, 1);
- if (issetmem)
- emit_move_insn (destmem, mode_value);
- else
- {
- srcmem = offset_address (srcmem, modesize, 1);
- emit_move_insn (destmem, srcmem);
- }
- }
-
- /* Align destination. */
- if (desired_align > 1 && desired_align > align)
- {
- rtx saveddest = *destptr;
-
- gcc_assert (desired_align <= size);
- /* Align destptr up, place it to new register. */
- *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
- GEN_INT (prolog_size),
- NULL_RTX, 1, OPTAB_DIRECT);
- if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
- REG_POINTER (*destptr) = 1;
- *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
- GEN_INT (-desired_align),
- *destptr, 1, OPTAB_DIRECT);
- /* See how many bytes we skipped. */
- saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
- *destptr,
- saveddest, 1, OPTAB_DIRECT);
- /* Adjust srcptr and count. */
- if (!issetmem)
- *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
- saveddest, *srcptr, 1, OPTAB_DIRECT);
- *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
- saveddest, *count, 1, OPTAB_DIRECT);
- /* We copied at most size + prolog_size. */
- if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
- *min_size
- = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
- else
- *min_size = 0;
-
- /* Our loops always round down the block size, but for dispatch to
- library we need precise value. */
- if (dynamic_check)
- *count = expand_simple_binop (GET_MODE (*count), AND, *count,
- GEN_INT (-size), *count, 1, OPTAB_DIRECT);
- }
- else
- {
- gcc_assert (prolog_size == 0);
- /* Decrease count, so we won't end up copying last word twice. */
- if (!CONST_INT_P (*count))
- *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
- constm1_rtx, *count, 1, OPTAB_DIRECT);
- else
- *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
- (unsigned HOST_WIDE_INT)size));
- if (*min_size)
- *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
- }
-}
-
-
-/* This function is like the previous one, except here we know how many bytes
- need to be copied. That allows us to update alignment not only of DST, which
- is returned, but also of SRC, which is passed as a pointer for that
- reason. */
-static rtx
-expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
- rtx srcreg, rtx value, rtx vec_value,
- int desired_align, int align_bytes,
- bool issetmem)
-{
- rtx src = NULL;
- rtx orig_dst = dst;
- rtx orig_src = NULL;
- int piece_size = 1;
- int copied_bytes = 0;
-
- if (!issetmem)
- {
- gcc_assert (srcp != NULL);
- src = *srcp;
- orig_src = src;
- }
-
- for (piece_size = 1;
- piece_size <= desired_align && copied_bytes < align_bytes;
- piece_size <<= 1)
- {
- if (align_bytes & piece_size)
- {
- if (issetmem)
- {
- if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
- dst = emit_memset (dst, destreg, vec_value, piece_size);
- else
- dst = emit_memset (dst, destreg, value, piece_size);
- }
- else
- dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
- copied_bytes += piece_size;
- }
- }
- if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
- set_mem_align (dst, desired_align * BITS_PER_UNIT);
- if (MEM_SIZE_KNOWN_P (orig_dst))
- set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
-
- if (!issetmem)
- {
- int src_align_bytes = get_mem_align_offset (src, desired_align
- * BITS_PER_UNIT);
- if (src_align_bytes >= 0)
- src_align_bytes = desired_align - src_align_bytes;
- if (src_align_bytes >= 0)
- {
- unsigned int src_align;
- for (src_align = desired_align; src_align >= 2; src_align >>= 1)
- {
- if ((src_align_bytes & (src_align - 1))
- == (align_bytes & (src_align - 1)))
- break;
- }
- if (src_align > (unsigned int) desired_align)
- src_align = desired_align;
- if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
- set_mem_align (src, src_align * BITS_PER_UNIT);
- }
- if (MEM_SIZE_KNOWN_P (orig_src))
- set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
- *srcp = src;
- }
-
- return dst;
-}
-
-/* Return true if ALG can be used in current context.
- Assume we expand memset if MEMSET is true. */
-static bool
-alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
-{
- if (alg == no_stringop)
- return false;
- if (alg == vector_loop)
- return TARGET_SSE || TARGET_AVX;
- /* Algorithms using the rep prefix want at least edi and ecx;
- additionally, memset wants eax and memcpy wants esi. Don't
- consider such algorithms if the user has appropriated those
- registers for their own purposes, or if we have a non-default
- address space, since some string insns cannot override the segment. */
- if (alg == rep_prefix_1_byte
- || alg == rep_prefix_4_byte
- || alg == rep_prefix_8_byte)
- {
- if (have_as)
- return false;
- if (fixed_regs[CX_REG]
- || fixed_regs[DI_REG]
- || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
- return false;
- }
- return true;
-}
-
-/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
-static enum stringop_alg
-decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
- unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
- bool memset, bool zero_memset, bool have_as,
- int *dynamic_check, bool *noalign, bool recur)
-{
- const struct stringop_algs *algs;
- bool optimize_for_speed;
- int max = 0;
- const struct processor_costs *cost;
- int i;
- bool any_alg_usable_p = false;
-
- *noalign = false;
- *dynamic_check = -1;
-
- /* Even if the string operation call is cold, we still might spend a lot
- of time processing large blocks. */
- if (optimize_function_for_size_p (cfun)
- || (optimize_insn_for_size_p ()
- && (max_size < 256
- || (expected_size != -1 && expected_size < 256))))
- optimize_for_speed = false;
- else
- optimize_for_speed = true;
-
- cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
- if (memset)
- algs = &cost->memset[TARGET_64BIT != 0];
- else
- algs = &cost->memcpy[TARGET_64BIT != 0];
-
- /* See maximal size for user defined algorithm. */
- for (i = 0; i < MAX_STRINGOP_ALGS; i++)
- {
- enum stringop_alg candidate = algs->size[i].alg;
- bool usable = alg_usable_p (candidate, memset, have_as);
- any_alg_usable_p |= usable;
-
- if (candidate != libcall && candidate && usable)
- max = algs->size[i].max;
- }
-
- /* If expected size is not known but max size is small enough
- so inline version is a win, set expected size into
- the range. */
- if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
- && expected_size == -1)
- expected_size = min_size / 2 + max_size / 2;
-
- /* If user specified the algorithm, honor it if possible. */
- if (ix86_stringop_alg != no_stringop
- && alg_usable_p (ix86_stringop_alg, memset, have_as))
- return ix86_stringop_alg;
- /* rep; movq or rep; movl is the smallest variant. */
- else if (!optimize_for_speed)
- {
- *noalign = true;
- if (!count || (count & 3) || (memset && !zero_memset))
- return alg_usable_p (rep_prefix_1_byte, memset, have_as)
- ? rep_prefix_1_byte : loop_1_byte;
- else
- return alg_usable_p (rep_prefix_4_byte, memset, have_as)
- ? rep_prefix_4_byte : loop;
- }
- /* Very tiny blocks are best handled via the loop, REP is expensive to
- setup. */
- else if (expected_size != -1 && expected_size < 4)
- return loop_1_byte;
- else if (expected_size != -1)
- {
- enum stringop_alg alg = libcall;
- bool alg_noalign = false;
- for (i = 0; i < MAX_STRINGOP_ALGS; i++)
- {
- /* We get here if the algorithms that were not libcall-based
- were rep-prefix based and we are unable to use rep prefixes
- based on global register usage. Break out of the loop and
- use the heuristic below. */
- if (algs->size[i].max == 0)
- break;
- if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
- {
- enum stringop_alg candidate = algs->size[i].alg;
-
- if (candidate != libcall
- && alg_usable_p (candidate, memset, have_as))
- {
- alg = candidate;
- alg_noalign = algs->size[i].noalign;
- }
- /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
- last non-libcall inline algorithm. */
- if (TARGET_INLINE_ALL_STRINGOPS)
- {
- /* When the current size is best to be copied by a libcall,
- but we are still forced to inline, run the heuristic below
- that will pick code for medium sized blocks. */
- if (alg != libcall)
- {
- *noalign = alg_noalign;
- return alg;
- }
- else if (!any_alg_usable_p)
- break;
- }
- else if (alg_usable_p (candidate, memset, have_as))
- {
- *noalign = algs->size[i].noalign;
- return candidate;
- }
- }
- }
- }
- /* When asked to inline the call anyway, try to pick meaningful choice.
- We look for maximal size of block that is faster to copy by hand and
- take blocks of at most of that size guessing that average size will
- be roughly half of the block.
-
- If this turns out to be bad, we might simply specify the preferred
- choice in ix86_costs. */
- if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
- && (algs->unknown_size == libcall
- || !alg_usable_p (algs->unknown_size, memset, have_as)))
- {
- enum stringop_alg alg;
- HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
-
- /* If there aren't any usable algorithms or if recursing already,
- then recursing on smaller sizes or same size isn't going to
- find anything. Just return the simple byte-at-a-time copy loop. */
- if (!any_alg_usable_p || recur)
- {
- /* Pick something reasonable. */
- if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
- *dynamic_check = 128;
- return loop_1_byte;
- }
- alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
- zero_memset, have_as, dynamic_check, noalign, true);
- gcc_assert (*dynamic_check == -1);
- if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
- *dynamic_check = max;
- else
- gcc_assert (alg != libcall);
- return alg;
- }
- return (alg_usable_p (algs->unknown_size, memset, have_as)
- ? algs->unknown_size : libcall);
-}
-
-/* Decide on alignment. We know that the operand is already aligned to ALIGN
- (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
-static int
-decide_alignment (int align,
- enum stringop_alg alg,
- int expected_size,
- machine_mode move_mode)
-{
- int desired_align = 0;
-
- gcc_assert (alg != no_stringop);
-
- if (alg == libcall)
- return 0;
- if (move_mode == VOIDmode)
- return 0;
-
- desired_align = GET_MODE_SIZE (move_mode);
- /* PentiumPro has special logic triggering for 8 byte aligned blocks.
- copying whole cacheline at once. */
- if (TARGET_PENTIUMPRO
- && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
- desired_align = 8;
-
- if (optimize_size)
- desired_align = 1;
- if (desired_align < align)
- desired_align = align;
- if (expected_size != -1 && expected_size < 4)
- desired_align = align;
-
- return desired_align;
-}
-
-
-/* Helper function for memcpy. For QImode value 0xXY produce
- 0xXYXYXYXY of wide specified by MODE. This is essentially
- a * 0x10101010, but we can do slightly better than
- synth_mult by unwinding the sequence by hand on CPUs with
- slow multiply. */
-static rtx
-promote_duplicated_reg (machine_mode mode, rtx val)
-{
- machine_mode valmode = GET_MODE (val);
- rtx tmp;
- int nops = mode == DImode ? 3 : 2;
-
- gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
- if (val == const0_rtx)
- return copy_to_mode_reg (mode, CONST0_RTX (mode));
- if (CONST_INT_P (val))
- {
- HOST_WIDE_INT v = INTVAL (val) & 255;
-
- v |= v << 8;
- v |= v << 16;
- if (mode == DImode)
- v |= (v << 16) << 16;
- return copy_to_mode_reg (mode, gen_int_mode (v, mode));
- }
-
- if (valmode == VOIDmode)
- valmode = QImode;
- if (valmode != QImode)
- val = gen_lowpart (QImode, val);
- if (mode == QImode)
- return val;
- if (!TARGET_PARTIAL_REG_STALL)
- nops--;
- if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
- + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
- <= (ix86_cost->shift_const + ix86_cost->add) * nops
- + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
- {
- rtx reg = convert_modes (mode, QImode, val, true);
- tmp = promote_duplicated_reg (mode, const1_rtx);
- return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
- OPTAB_DIRECT);
- }
- else
- {
- rtx reg = convert_modes (mode, QImode, val, true);
-
- if (!TARGET_PARTIAL_REG_STALL)
- if (mode == SImode)
- emit_insn (gen_insvsi_1 (reg, reg));
- else
- emit_insn (gen_insvdi_1 (reg, reg));
- else
- {
- tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
- NULL, 1, OPTAB_DIRECT);
- reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1,
- OPTAB_DIRECT);
- }
- tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
- NULL, 1, OPTAB_DIRECT);
- reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
- if (mode == SImode)
- return reg;
- tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
- NULL, 1, OPTAB_DIRECT);
- reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
- return reg;
- }
-}
-
-/* Duplicate value VAL using promote_duplicated_reg into maximal size that will
- be needed by main loop copying SIZE_NEEDED chunks and prologue getting
- alignment from ALIGN to DESIRED_ALIGN. */
-static rtx
-promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
- int align)
-{
- rtx promoted_val;
-
- if (TARGET_64BIT
- && (size_needed > 4 || (desired_align > align && desired_align > 4)))
- promoted_val = promote_duplicated_reg (DImode, val);
- else if (size_needed > 2 || (desired_align > align && desired_align > 2))
- promoted_val = promote_duplicated_reg (SImode, val);
- else if (size_needed > 1 || (desired_align > align && desired_align > 1))
- promoted_val = promote_duplicated_reg (HImode, val);
- else
- promoted_val = val;
-
- return promoted_val;
-}
-
-/* Copy the address to a Pmode register. This is used for x32 to
- truncate DImode TLS address to a SImode register. */
-
-static rtx
-ix86_copy_addr_to_reg (rtx addr)
-{
- rtx reg;
- if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
- {
- reg = copy_addr_to_reg (addr);
- REG_POINTER (reg) = 1;
- return reg;
- }
- else
- {
- gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
- reg = copy_to_mode_reg (DImode, addr);
- REG_POINTER (reg) = 1;
- return gen_rtx_SUBREG (SImode, reg, 0);
- }
-}
-
-/* Expand string move (memcpy) ot store (memset) operation. Use i386 string
- operations when profitable. The code depends upon architecture, block size
- and alignment, but always has one of the following overall structures:
-
- Aligned move sequence:
-
- 1) Prologue guard: Conditional that jumps up to epilogues for small
- blocks that can be handled by epilogue alone. This is faster
- but also needed for correctness, since prologue assume the block
- is larger than the desired alignment.
-
- Optional dynamic check for size and libcall for large
- blocks is emitted here too, with -minline-stringops-dynamically.
-
- 2) Prologue: copy first few bytes in order to get destination
- aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
- than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
- copied. We emit either a jump tree on power of two sized
- blocks, or a byte loop.
-
- 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
- with specified algorithm.
-
- 4) Epilogue: code copying tail of the block that is too small to be
- handled by main body (or up to size guarded by prologue guard).
-
- Misaligned move sequence
-
- 1) missaligned move prologue/epilogue containing:
- a) Prologue handling small memory blocks and jumping to done_label
- (skipped if blocks are known to be large enough)
- b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
- needed by single possibly misaligned move
- (skipped if alignment is not needed)
- c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
-
- 2) Zero size guard dispatching to done_label, if needed
-
- 3) dispatch to library call, if needed,
-
- 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
- with specified algorithm. */
-bool
-ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
- rtx align_exp, rtx expected_align_exp,
- rtx expected_size_exp, rtx min_size_exp,
- rtx max_size_exp, rtx probable_max_size_exp,
- bool issetmem)
-{
- rtx destreg;
- rtx srcreg = NULL;
- rtx_code_label *label = NULL;
- rtx tmp;
- rtx_code_label *jump_around_label = NULL;
- HOST_WIDE_INT align = 1;
- unsigned HOST_WIDE_INT count = 0;
- HOST_WIDE_INT expected_size = -1;
- int size_needed = 0, epilogue_size_needed;
- int desired_align = 0, align_bytes = 0;
- enum stringop_alg alg;
- rtx promoted_val = NULL;
- rtx vec_promoted_val = NULL;
- bool force_loopy_epilogue = false;
- int dynamic_check;
- bool need_zero_guard = false;
- bool noalign;
- machine_mode move_mode = VOIDmode;
- machine_mode wider_mode;
- int unroll_factor = 1;
- /* TODO: Once value ranges are available, fill in proper data. */
- unsigned HOST_WIDE_INT min_size = 0;
- unsigned HOST_WIDE_INT max_size = -1;
- unsigned HOST_WIDE_INT probable_max_size = -1;
- bool misaligned_prologue_used = false;
- bool have_as;
-
- if (CONST_INT_P (align_exp))
- align = INTVAL (align_exp);
- /* i386 can do misaligned access on reasonably increased cost. */
- if (CONST_INT_P (expected_align_exp)
- && INTVAL (expected_align_exp) > align)
- align = INTVAL (expected_align_exp);
- /* ALIGN is the minimum of destination and source alignment, but we care here
- just about destination alignment. */
- else if (!issetmem
- && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
- align = MEM_ALIGN (dst) / BITS_PER_UNIT;
-
- if (CONST_INT_P (count_exp))
- {
- min_size = max_size = probable_max_size = count = expected_size
- = INTVAL (count_exp);
- /* When COUNT is 0, there is nothing to do. */
- if (!count)
- return true;
- }
- else
- {
- if (min_size_exp)
- min_size = INTVAL (min_size_exp);
- if (max_size_exp)
- max_size = INTVAL (max_size_exp);
- if (probable_max_size_exp)
- probable_max_size = INTVAL (probable_max_size_exp);
- if (CONST_INT_P (expected_size_exp))
- expected_size = INTVAL (expected_size_exp);
- }
-
- /* Make sure we don't need to care about overflow later on. */
- if (count > (HOST_WIDE_INT_1U << 30))
- return false;
-
- have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
- if (!issetmem)
- have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
-
- /* Step 0: Decide on preferred algorithm, desired alignment and
- size of chunks to be copied by main loop. */
- alg = decide_alg (count, expected_size, min_size, probable_max_size,
- issetmem,
- issetmem && val_exp == const0_rtx, have_as,
- &dynamic_check, &noalign, false);
-
- if (dump_file)
- fprintf (dump_file, "Selected stringop expansion strategy: %s\n",
- stringop_alg_names[alg]);
-
- if (alg == libcall)
- return false;
- gcc_assert (alg != no_stringop);
-
- /* For now vector-version of memset is generated only for memory zeroing, as
- creating of promoted vector value is very cheap in this case. */
- if (issetmem && alg == vector_loop && val_exp != const0_rtx)
- alg = unrolled_loop;
-
- if (!count)
- count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
- destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
- if (!issetmem)
- srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
-
- unroll_factor = 1;
- move_mode = word_mode;
- switch (alg)
- {
- case libcall:
- case no_stringop:
- case last_alg:
- gcc_unreachable ();
- case loop_1_byte:
- need_zero_guard = true;
- move_mode = QImode;
- break;
- case loop:
- need_zero_guard = true;
- break;
- case unrolled_loop:
- need_zero_guard = true;
- unroll_factor = (TARGET_64BIT ? 4 : 2);
- break;
- case vector_loop:
- need_zero_guard = true;
- unroll_factor = 4;
- /* Find the widest supported mode. */
- move_mode = word_mode;
- while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
- && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
- move_mode = wider_mode;
-
- if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128)
- move_mode = TImode;
-
- /* Find the corresponding vector mode with the same size as MOVE_MODE.
- MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
- if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
- {
- int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
- if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
- || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
- move_mode = word_mode;
- }
- gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
- break;
- case rep_prefix_8_byte:
- move_mode = DImode;
- break;
- case rep_prefix_4_byte:
- move_mode = SImode;
- break;
- case rep_prefix_1_byte:
- move_mode = QImode;
- break;
- }
- size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
- epilogue_size_needed = size_needed;
-
- /* If we are going to call any library calls conditionally, make sure any
- pending stack adjustment happen before the first conditional branch,
- otherwise they will be emitted before the library call only and won't
- happen from the other branches. */
- if (dynamic_check != -1)
- do_pending_stack_adjust ();
-
- desired_align = decide_alignment (align, alg, expected_size, move_mode);
- if (!TARGET_ALIGN_STRINGOPS || noalign)
- align = desired_align;
-
- /* Step 1: Prologue guard. */
-
- /* Alignment code needs count to be in register. */
- if (CONST_INT_P (count_exp) && desired_align > align)
- {
- if (INTVAL (count_exp) > desired_align
- && INTVAL (count_exp) > size_needed)
- {
- align_bytes
- = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
- if (align_bytes <= 0)
- align_bytes = 0;
- else
- align_bytes = desired_align - align_bytes;
- }
- if (align_bytes == 0)
- count_exp = force_reg (counter_mode (count_exp), count_exp);
- }
- gcc_assert (desired_align >= 1 && align >= 1);
-
- /* Misaligned move sequences handle both prologue and epilogue at once.
- Default code generation results in a smaller code for large alignments
- and also avoids redundant job when sizes are known precisely. */
- misaligned_prologue_used
- = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
- && MAX (desired_align, epilogue_size_needed) <= 32
- && desired_align <= epilogue_size_needed
- && ((desired_align > align && !align_bytes)
- || (!count && epilogue_size_needed > 1)));
-
- /* Do the cheap promotion to allow better CSE across the
- main loop and epilogue (ie one load of the big constant in the
- front of all code.
- For now the misaligned move sequences do not have fast path
- without broadcasting. */
- if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
- {
- if (alg == vector_loop)
- {
- gcc_assert (val_exp == const0_rtx);
- vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
- promoted_val = promote_duplicated_reg_to_size (val_exp,
- GET_MODE_SIZE (word_mode),
- desired_align, align);
- }
- else
- {
- promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
- desired_align, align);
- }
- }
- /* Misaligned move sequences handles both prologues and epilogues at once.
- Default code generation results in smaller code for large alignments and
- also avoids redundant job when sizes are known precisely. */
- if (misaligned_prologue_used)
- {
- /* Misaligned move prologue handled small blocks by itself. */
- expand_set_or_cpymem_prologue_epilogue_by_misaligned_moves
- (dst, src, &destreg, &srcreg,
- move_mode, promoted_val, vec_promoted_val,
- &count_exp,
- &jump_around_label,
- desired_align < align
- ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
- desired_align, align, &min_size, dynamic_check, issetmem);
- if (!issetmem)
- src = change_address (src, BLKmode, srcreg);
- dst = change_address (dst, BLKmode, destreg);
- set_mem_align (dst, desired_align * BITS_PER_UNIT);
- epilogue_size_needed = 0;
- if (need_zero_guard
- && min_size < (unsigned HOST_WIDE_INT) size_needed)
- {
- /* It is possible that we copied enough so the main loop will not
- execute. */
- gcc_assert (size_needed > 1);
- if (jump_around_label == NULL_RTX)
- jump_around_label = gen_label_rtx ();
- emit_cmp_and_jump_insns (count_exp,
- GEN_INT (size_needed),
- LTU, 0, counter_mode (count_exp), 1, jump_around_label);
- if (expected_size == -1
- || expected_size < (desired_align - align) / 2 + size_needed)
- predict_jump (REG_BR_PROB_BASE * 20 / 100);
- else
- predict_jump (REG_BR_PROB_BASE * 60 / 100);
- }
- }
- /* Ensure that alignment prologue won't copy past end of block. */
- else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
- {
- epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
- /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
- Make sure it is power of 2. */
- epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
-
- /* To improve performance of small blocks, we jump around the VAL
- promoting mode. This mean that if the promoted VAL is not constant,
- we might not use it in the epilogue and have to use byte
- loop variant. */
- if (issetmem && epilogue_size_needed > 2 && !promoted_val)
- force_loopy_epilogue = true;
- if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
- || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
- {
- /* If main algorithm works on QImode, no epilogue is needed.
- For small sizes just don't align anything. */
- if (size_needed == 1)
- desired_align = align;
- else
- goto epilogue;
- }
- else if (!count
- && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
- {
- label = gen_label_rtx ();
- emit_cmp_and_jump_insns (count_exp,
- GEN_INT (epilogue_size_needed),
- LTU, 0, counter_mode (count_exp), 1, label);
- if (expected_size == -1 || expected_size < epilogue_size_needed)
- predict_jump (REG_BR_PROB_BASE * 60 / 100);
- else
- predict_jump (REG_BR_PROB_BASE * 20 / 100);
- }
- }
-
- /* Emit code to decide on runtime whether library call or inline should be
- used. */
- if (dynamic_check != -1)
- {
- if (!issetmem && CONST_INT_P (count_exp))
- {
- if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
- {
- emit_block_copy_via_libcall (dst, src, count_exp);
- count_exp = const0_rtx;
- goto epilogue;
- }
- }
- else
- {
- rtx_code_label *hot_label = gen_label_rtx ();
- if (jump_around_label == NULL_RTX)
- jump_around_label = gen_label_rtx ();
- emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
- LEU, 0, counter_mode (count_exp),
- 1, hot_label);
- predict_jump (REG_BR_PROB_BASE * 90 / 100);
- if (issetmem)
- set_storage_via_libcall (dst, count_exp, val_exp);
- else
- emit_block_copy_via_libcall (dst, src, count_exp);
- emit_jump (jump_around_label);
- emit_label (hot_label);
- }
- }
-
- /* Step 2: Alignment prologue. */
- /* Do the expensive promotion once we branched off the small blocks. */
- if (issetmem && !promoted_val)
- promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
- desired_align, align);
-
- if (desired_align > align && !misaligned_prologue_used)
- {
- if (align_bytes == 0)
- {
- /* Except for the first move in prologue, we no longer know
- constant offset in aliasing info. It don't seems to worth
- the pain to maintain it for the first move, so throw away
- the info early. */
- dst = change_address (dst, BLKmode, destreg);
- if (!issetmem)
- src = change_address (src, BLKmode, srcreg);
- dst = expand_set_or_cpymem_prologue (dst, src, destreg, srcreg,
- promoted_val, vec_promoted_val,
- count_exp, align, desired_align,
- issetmem);
- /* At most desired_align - align bytes are copied. */
- if (min_size < (unsigned)(desired_align - align))
- min_size = 0;
- else
- min_size -= desired_align - align;
- }
- else
- {
- /* If we know how many bytes need to be stored before dst is
- sufficiently aligned, maintain aliasing info accurately. */
- dst = expand_set_or_cpymem_constant_prologue (dst, &src, destreg,
- srcreg,
- promoted_val,
- vec_promoted_val,
- desired_align,
- align_bytes,
- issetmem);
-
- count_exp = plus_constant (counter_mode (count_exp),
- count_exp, -align_bytes);
- count -= align_bytes;
- min_size -= align_bytes;
- max_size -= align_bytes;
- }
- if (need_zero_guard
- && min_size < (unsigned HOST_WIDE_INT) size_needed
- && (count < (unsigned HOST_WIDE_INT) size_needed
- || (align_bytes == 0
- && count < ((unsigned HOST_WIDE_INT) size_needed
- + desired_align - align))))
- {
- /* It is possible that we copied enough so the main loop will not
- execute. */
- gcc_assert (size_needed > 1);
- if (label == NULL_RTX)
- label = gen_label_rtx ();
- emit_cmp_and_jump_insns (count_exp,
- GEN_INT (size_needed),
- LTU, 0, counter_mode (count_exp), 1, label);
- if (expected_size == -1
- || expected_size < (desired_align - align) / 2 + size_needed)
- predict_jump (REG_BR_PROB_BASE * 20 / 100);
- else
- predict_jump (REG_BR_PROB_BASE * 60 / 100);
- }
- }
- if (label && size_needed == 1)
- {
- emit_label (label);
- LABEL_NUSES (label) = 1;
- label = NULL;
- epilogue_size_needed = 1;
- if (issetmem)
- promoted_val = val_exp;
- }
- else if (label == NULL_RTX && !misaligned_prologue_used)
- epilogue_size_needed = size_needed;
-
- /* Step 3: Main loop. */
-
- switch (alg)
- {
- case libcall:
- case no_stringop:
- case last_alg:
- gcc_unreachable ();
- case loop_1_byte:
- case loop:
- case unrolled_loop:
- expand_set_or_cpymem_via_loop (dst, src, destreg, srcreg, promoted_val,
- count_exp, move_mode, unroll_factor,
- expected_size, issetmem);
- break;
- case vector_loop:
- expand_set_or_cpymem_via_loop (dst, src, destreg, srcreg,
- vec_promoted_val, count_exp, move_mode,
- unroll_factor, expected_size, issetmem);
- break;
- case rep_prefix_8_byte:
- case rep_prefix_4_byte:
- case rep_prefix_1_byte:
- expand_set_or_cpymem_via_rep (dst, src, destreg, srcreg, promoted_val,
- val_exp, count_exp, move_mode, issetmem);
- break;
- }
- /* Adjust properly the offset of src and dest memory for aliasing. */
- if (CONST_INT_P (count_exp))
- {
- if (!issetmem)
- src = adjust_automodify_address_nv (src, BLKmode, srcreg,
- (count / size_needed) * size_needed);
- dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
- (count / size_needed) * size_needed);
- }
- else
- {
- if (!issetmem)
- src = change_address (src, BLKmode, srcreg);
- dst = change_address (dst, BLKmode, destreg);
- }
-
- /* Step 4: Epilogue to copy the remaining bytes. */
- epilogue:
- if (label)
- {
- /* When the main loop is done, COUNT_EXP might hold original count,
- while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
- Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
- bytes. Compensate if needed. */
-
- if (size_needed < epilogue_size_needed)
- {
- tmp = expand_simple_binop (counter_mode (count_exp), AND, count_exp,
- GEN_INT (size_needed - 1), count_exp, 1,
- OPTAB_DIRECT);
- if (tmp != count_exp)
- emit_move_insn (count_exp, tmp);
- }
- emit_label (label);
- LABEL_NUSES (label) = 1;
- }
-
- if (count_exp != const0_rtx && epilogue_size_needed > 1)
- {
- if (force_loopy_epilogue)
- expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
- epilogue_size_needed);
- else
- {
- if (issetmem)
- expand_setmem_epilogue (dst, destreg, promoted_val,
- vec_promoted_val, count_exp,
- epilogue_size_needed);
- else
- expand_cpymem_epilogue (dst, src, destreg, srcreg, count_exp,
- epilogue_size_needed);
- }
- }
- if (jump_around_label)
- emit_label (jump_around_label);
- return true;
-}
-
-
-/* Expand the appropriate insns for doing strlen if not just doing
- repnz; scasb
-
- out = result, initialized with the start address
- align_rtx = alignment of the address.
- scratch = scratch register, initialized with the startaddress when
- not aligned, otherwise undefined
-
- This is just the body. It needs the initializations mentioned above and
- some address computing at the end. These things are done in i386.md. */
-
-static void
-ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
-{
- int align;
- rtx tmp;
- rtx_code_label *align_2_label = NULL;
- rtx_code_label *align_3_label = NULL;
- rtx_code_label *align_4_label = gen_label_rtx ();
- rtx_code_label *end_0_label = gen_label_rtx ();
- rtx mem;
- rtx tmpreg = gen_reg_rtx (SImode);
- rtx scratch = gen_reg_rtx (SImode);
- rtx cmp;
-
- align = 0;
- if (CONST_INT_P (align_rtx))
- align = INTVAL (align_rtx);
-
- /* Loop to check 1..3 bytes for null to get an aligned pointer. */
-
- /* Is there a known alignment and is it less than 4? */
- if (align < 4)
- {
- rtx scratch1 = gen_reg_rtx (Pmode);
- emit_move_insn (scratch1, out);
- /* Is there a known alignment and is it not 2? */
- if (align != 2)
- {
- align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
- align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
-
- /* Leave just the 3 lower bits. */
- align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
- NULL_RTX, 0, OPTAB_WIDEN);
-
- emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
- Pmode, 1, align_4_label);
- emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
- Pmode, 1, align_2_label);
- emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
- Pmode, 1, align_3_label);
- }
- else
- {
- /* Since the alignment is 2, we have to check 2 or 0 bytes;
- check if is aligned to 4 - byte. */
-
- align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
- NULL_RTX, 0, OPTAB_WIDEN);
-
- emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
- Pmode, 1, align_4_label);
- }
-
- mem = change_address (src, QImode, out);
-
- /* Now compare the bytes. */
-
- /* Compare the first n unaligned byte on a byte per byte basis. */
- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
- QImode, 1, end_0_label);
-
- /* Increment the address. */
- emit_insn (gen_add2_insn (out, const1_rtx));
-
- /* Not needed with an alignment of 2 */
- if (align != 2)
- {
- emit_label (align_2_label);
-
- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
- end_0_label);
-
- emit_insn (gen_add2_insn (out, const1_rtx));
-
- emit_label (align_3_label);
- }
-
- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
- end_0_label);
-
- emit_insn (gen_add2_insn (out, const1_rtx));
- }
-
- /* Generate loop to check 4 bytes at a time. It is not a good idea to
- align this loop. It gives only huge programs, but does not help to
- speed up. */
- emit_label (align_4_label);
-
- mem = change_address (src, SImode, out);
- emit_move_insn (scratch, mem);
- emit_insn (gen_add2_insn (out, GEN_INT (4)));
-
- /* This formula yields a nonzero result iff one of the bytes is zero.
- This saves three branches inside loop and many cycles. */
-
- emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
- emit_insn (gen_one_cmplsi2 (scratch, scratch));
- emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
- emit_insn (gen_andsi3 (tmpreg, tmpreg,
- gen_int_mode (0x80808080, SImode)));
- emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
- align_4_label);
-
- if (TARGET_CMOVE)
- {
- rtx reg = gen_reg_rtx (SImode);
- rtx reg2 = gen_reg_rtx (Pmode);
- emit_move_insn (reg, tmpreg);
- emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
-
- /* If zero is not in the first two bytes, move two bytes forward. */
- emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
- tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
- emit_insn (gen_rtx_SET (tmpreg,
- gen_rtx_IF_THEN_ELSE (SImode, tmp,
- reg,
- tmpreg)));
- /* Emit lea manually to avoid clobbering of flags. */
- emit_insn (gen_rtx_SET (reg2, plus_constant (Pmode, out, 2)));
-
- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
- tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
- emit_insn (gen_rtx_SET (out,
- gen_rtx_IF_THEN_ELSE (Pmode, tmp,
- reg2,
- out)));
- }
- else
- {
- rtx_code_label *end_2_label = gen_label_rtx ();
- /* Is zero in the first two bytes? */
-
- emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
- tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode, end_2_label),
- pc_rtx);
- tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
- JUMP_LABEL (tmp) = end_2_label;
-
- /* Not in the first two. Move two bytes forward. */
- emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
- emit_insn (gen_add2_insn (out, const2_rtx));
-
- emit_label (end_2_label);
-
- }
-
- /* Avoid branch in fixing the byte. */
- tmpreg = gen_lowpart (QImode, tmpreg);
- emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
- tmp = gen_rtx_REG (CCmode, FLAGS_REG);
- cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
- emit_insn (gen_sub3_carry (Pmode, out, out, GEN_INT (3), tmp, cmp));
-
- emit_label (end_0_label);
-}
-
-/* Expand strlen. */
-
-bool
-ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
-{
-if (TARGET_UNROLL_STRLEN
- && TARGET_INLINE_ALL_STRINGOPS
- && eoschar == const0_rtx
- && optimize > 1)
- {
- /* The generic case of strlen expander is long. Avoid it's
- expanding unless TARGET_INLINE_ALL_STRINGOPS. */
- rtx addr = force_reg (Pmode, XEXP (src, 0));
- /* Well it seems that some optimizer does not combine a call like
- foo(strlen(bar), strlen(bar));
- when the move and the subtraction is done here. It does calculate
- the length just once when these instructions are done inside of
- output_strlen_unroll(). But I think since &bar[strlen(bar)] is
- often used and I use one fewer register for the lifetime of
- output_strlen_unroll() this is better. */
-
- emit_move_insn (out, addr);
-
- ix86_expand_strlensi_unroll_1 (out, src, align);
-
- /* strlensi_unroll_1 returns the address of the zero at the end of
- the string, like memchr(), so compute the length by subtracting
- the start address. */
- emit_insn (gen_sub2_insn (out, addr));
- return true;
- }
- else
- return false;
-}
-
-/* For given symbol (function) construct code to compute address of it's PLT
- entry in large x86-64 PIC model. */
-
-static rtx
-construct_plt_address (rtx symbol)
-{
- rtx tmp, unspec;
-
- gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
- gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
- gcc_assert (Pmode == DImode);
-
- tmp = gen_reg_rtx (Pmode);
- unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
-
- emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
- emit_insn (gen_add2_insn (tmp, pic_offset_table_rtx));
- return tmp;
-}
-
-/* Additional registers that are clobbered by SYSV calls. */
-
-static int const x86_64_ms_sysv_extra_clobbered_registers
- [NUM_X86_64_MS_CLOBBERED_REGS] =
-{
- SI_REG, DI_REG,
- XMM6_REG, XMM7_REG,
- XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
- XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
-};
-
-rtx_insn *
-ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
- rtx callarg2,
- rtx pop, bool sibcall)
-{
- rtx vec[3];
- rtx use = NULL, call;
- unsigned int vec_len = 0;
- tree fndecl;
-
- if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
- {
- fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
- if (fndecl
- && (lookup_attribute ("interrupt",
- TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
- error ("interrupt service routine cannot be called directly");
- }
- else
- fndecl = NULL_TREE;
-
- if (pop == const0_rtx)
- pop = NULL;
- gcc_assert (!TARGET_64BIT || !pop);
-
- if (TARGET_MACHO && !TARGET_64BIT)
- {
-#if TARGET_MACHO
- if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
- fnaddr = machopic_indirect_call_target (fnaddr);
-#endif
- }
- else
- {
- /* Static functions and indirect calls don't need the pic register. Also,
- check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
- it an indirect call. */
- rtx addr = XEXP (fnaddr, 0);
- if (flag_pic
- && GET_CODE (addr) == SYMBOL_REF
- && !SYMBOL_REF_LOCAL_P (addr))
- {
- if (flag_plt
- && (SYMBOL_REF_DECL (addr) == NULL_TREE
- || !lookup_attribute ("noplt",
- DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
- {
- if (!TARGET_64BIT
- || (ix86_cmodel == CM_LARGE_PIC
- && DEFAULT_ABI != MS_ABI))
- {
- use_reg (&use, gen_rtx_REG (Pmode,
- REAL_PIC_OFFSET_TABLE_REGNUM));
- if (ix86_use_pseudo_pic_reg ())
- emit_move_insn (gen_rtx_REG (Pmode,
- REAL_PIC_OFFSET_TABLE_REGNUM),
- pic_offset_table_rtx);
- }
- }
- else if (!TARGET_PECOFF && !TARGET_MACHO)
- {
- if (TARGET_64BIT)
- {
- fnaddr = gen_rtx_UNSPEC (Pmode,
- gen_rtvec (1, addr),
- UNSPEC_GOTPCREL);
- fnaddr = gen_rtx_CONST (Pmode, fnaddr);
- }
- else
- {
- fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
- UNSPEC_GOT);
- fnaddr = gen_rtx_CONST (Pmode, fnaddr);
- fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
- fnaddr);
- }
- fnaddr = gen_const_mem (Pmode, fnaddr);
- /* Pmode may not be the same as word_mode for x32, which
- doesn't support indirect branch via 32-bit memory slot.
- Since x32 GOT slot is 64 bit with zero upper 32 bits,
- indirect branch via x32 GOT slot is OK. */
- if (GET_MODE (fnaddr) != word_mode)
- fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
- fnaddr = gen_rtx_MEM (QImode, fnaddr);
- }
- }
- }
-
- /* Skip setting up RAX register for -mskip-rax-setup when there are no
- parameters passed in vector registers. */
- if (TARGET_64BIT
- && (INTVAL (callarg2) > 0
- || (INTVAL (callarg2) == 0
- && (TARGET_SSE || !flag_skip_rax_setup))))
- {
- rtx al = gen_rtx_REG (QImode, AX_REG);
- emit_move_insn (al, callarg2);
- use_reg (&use, al);
- }
-
- if (ix86_cmodel == CM_LARGE_PIC
- && !TARGET_PECOFF
- && MEM_P (fnaddr)
- && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
- && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
- fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
- /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
- branch via x32 GOT slot is OK. */
- else if (!(TARGET_X32
- && MEM_P (fnaddr)
- && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
- && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
- && (sibcall
- ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
- : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
- {
- fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
- fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
- }
-
- call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
-
- if (retval)
- call = gen_rtx_SET (retval, call);
- vec[vec_len++] = call;
-
- if (pop)
- {
- pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
- pop = gen_rtx_SET (stack_pointer_rtx, pop);
- vec[vec_len++] = pop;
- }
-
- if (cfun->machine->no_caller_saved_registers
- && (!fndecl
- || (!TREE_THIS_VOLATILE (fndecl)
- && !lookup_attribute ("no_caller_saved_registers",
- TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))))
- {
- static const char ix86_call_used_regs[] = CALL_USED_REGISTERS;
- bool is_64bit_ms_abi = (TARGET_64BIT
- && ix86_function_abi (fndecl) == MS_ABI);
- char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
-
- /* If there are no caller-saved registers, add all registers
- that are clobbered by the call which returns. */
- for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
- if (!fixed_regs[i]
- && (ix86_call_used_regs[i] == 1
- || (ix86_call_used_regs[i] & c_mask))
- && !STACK_REGNO_P (i)
- && !MMX_REGNO_P (i))
- clobber_reg (&use,
- gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i));
- }
- else if (TARGET_64BIT_MS_ABI
- && (!callarg2 || INTVAL (callarg2) != -2))
- {
- unsigned i;
-
- for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++)
- {
- int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
- machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
-
- clobber_reg (&use, gen_rtx_REG (mode, regno));
- }
-
- /* Set here, but it may get cleared later. */
- if (TARGET_CALL_MS2SYSV_XLOGUES)
- {
- if (!TARGET_SSE)
- ;
-
- /* Don't break hot-patched functions. */
- else if (ix86_function_ms_hook_prologue (current_function_decl))
- ;
-
- /* TODO: Cases not yet examined. */
- else if (flag_split_stack)
- warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
-
- else
- {
- gcc_assert (!reload_completed);
- cfun->machine->call_ms2sysv = true;
- }
- }
- }
-
- if (vec_len > 1)
- call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
- rtx_insn *call_insn = emit_call_insn (call);
- if (use)
- CALL_INSN_FUNCTION_USAGE (call_insn) = use;
-
- return call_insn;
-}
-
-/* Split simple return with popping POPC bytes from stack to indirect
- branch with stack adjustment . */
-
-void
-ix86_split_simple_return_pop_internal (rtx popc)
-{
- struct machine_function *m = cfun->machine;
- rtx ecx = gen_rtx_REG (SImode, CX_REG);
- rtx_insn *insn;
-
- /* There is no "pascal" calling convention in any 64bit ABI. */
- gcc_assert (!TARGET_64BIT);
-
- insn = emit_insn (gen_pop (ecx));
- m->fs.cfa_offset -= UNITS_PER_WORD;
- m->fs.sp_offset -= UNITS_PER_WORD;
-
- rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
- x = gen_rtx_SET (stack_pointer_rtx, x);
- add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
- add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
- RTX_FRAME_RELATED_P (insn) = 1;
-
- x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc);
- x = gen_rtx_SET (stack_pointer_rtx, x);
- insn = emit_insn (x);
- add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
- RTX_FRAME_RELATED_P (insn) = 1;
-
- /* Now return address is in ECX. */
- emit_jump_insn (gen_simple_return_indirect_internal (ecx));
-}
-
-/* Errors in the source file can cause expand_expr to return const0_rtx
- where we expect a vector. To avoid crashing, use one of the vector
- clear instructions. */
-
-static rtx
-safe_vector_operand (rtx x, machine_mode mode)
-{
- if (x == const0_rtx)
- x = CONST0_RTX (mode);
- return x;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of binop insns. */
-
-static rtx
-ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- machine_mode tmode = insn_data[icode].operand[0].mode;
- machine_mode mode0 = insn_data[icode].operand[1].mode;
- machine_mode mode1 = insn_data[icode].operand[2].mode;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- if (optimize || !target
- || GET_MODE (target) != tmode
- || !insn_data[icode].operand[0].predicate (target, tmode))
- target = gen_reg_rtx (tmode);
-
- if (GET_MODE (op1) == SImode && mode1 == TImode)
- {
- rtx x = gen_reg_rtx (V4SImode);
- emit_insn (gen_sse2_loadd (x, op1));
- op1 = gen_lowpart (TImode, x);
- }
-
- if (!insn_data[icode].operand[1].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (!insn_data[icode].operand[2].predicate (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
-
- emit_insn (pat);
-
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
-
-static rtx
-ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
- enum ix86_builtin_func_type m_type,
- enum rtx_code sub_code)
-{
- rtx pat;
- int i;
- int nargs;
- bool comparison_p = false;
- bool tf_p = false;
- bool last_arg_constant = false;
- int num_memory = 0;
- struct {
- rtx op;
- machine_mode mode;
- } args[4];
-
- machine_mode tmode = insn_data[icode].operand[0].mode;
-
- switch (m_type)
- {
- case MULTI_ARG_4_DF2_DI_I:
- case MULTI_ARG_4_DF2_DI_I1:
- case MULTI_ARG_4_SF2_SI_I:
- case MULTI_ARG_4_SF2_SI_I1:
- nargs = 4;
- last_arg_constant = true;
- break;
-
- case MULTI_ARG_3_SF:
- case MULTI_ARG_3_DF:
- case MULTI_ARG_3_SF2:
- case MULTI_ARG_3_DF2:
- case MULTI_ARG_3_DI:
- case MULTI_ARG_3_SI:
- case MULTI_ARG_3_SI_DI:
- case MULTI_ARG_3_HI:
- case MULTI_ARG_3_HI_SI:
- case MULTI_ARG_3_QI:
- case MULTI_ARG_3_DI2:
- case MULTI_ARG_3_SI2:
- case MULTI_ARG_3_HI2:
- case MULTI_ARG_3_QI2:
- nargs = 3;
- break;
-
- case MULTI_ARG_2_SF:
- case MULTI_ARG_2_DF:
- case MULTI_ARG_2_DI:
- case MULTI_ARG_2_SI:
- case MULTI_ARG_2_HI:
- case MULTI_ARG_2_QI:
- nargs = 2;
- break;
-
- case MULTI_ARG_2_DI_IMM:
- case MULTI_ARG_2_SI_IMM:
- case MULTI_ARG_2_HI_IMM:
- case MULTI_ARG_2_QI_IMM:
- nargs = 2;
- last_arg_constant = true;
- break;
-
- case MULTI_ARG_1_SF:
- case MULTI_ARG_1_DF:
- case MULTI_ARG_1_SF2:
- case MULTI_ARG_1_DF2:
- case MULTI_ARG_1_DI:
- case MULTI_ARG_1_SI:
- case MULTI_ARG_1_HI:
- case MULTI_ARG_1_QI:
- case MULTI_ARG_1_SI_DI:
- case MULTI_ARG_1_HI_DI:
- case MULTI_ARG_1_HI_SI:
- case MULTI_ARG_1_QI_DI:
- case MULTI_ARG_1_QI_SI:
- case MULTI_ARG_1_QI_HI:
- nargs = 1;
- break;
-
- case MULTI_ARG_2_DI_CMP:
- case MULTI_ARG_2_SI_CMP:
- case MULTI_ARG_2_HI_CMP:
- case MULTI_ARG_2_QI_CMP:
- nargs = 2;
- comparison_p = true;
- break;
-
- case MULTI_ARG_2_SF_TF:
- case MULTI_ARG_2_DF_TF:
- case MULTI_ARG_2_DI_TF:
- case MULTI_ARG_2_SI_TF:
- case MULTI_ARG_2_HI_TF:
- case MULTI_ARG_2_QI_TF:
- nargs = 2;
- tf_p = true;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- if (optimize || !target
- || GET_MODE (target) != tmode
- || !insn_data[icode].operand[0].predicate (target, tmode))
- target = gen_reg_rtx (tmode);
- else if (memory_operand (target, tmode))
- num_memory++;
-
- gcc_assert (nargs <= 4);
-
- for (i = 0; i < nargs; i++)
- {
- tree arg = CALL_EXPR_ARG (exp, i);
- rtx op = expand_normal (arg);
- int adjust = (comparison_p) ? 1 : 0;
- machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
-
- if (last_arg_constant && i == nargs - 1)
- {
- if (!insn_data[icode].operand[i + 1].predicate (op, mode))
- {
- enum insn_code new_icode = icode;
- switch (icode)
- {
- case CODE_FOR_xop_vpermil2v2df3:
- case CODE_FOR_xop_vpermil2v4sf3:
- case CODE_FOR_xop_vpermil2v4df3:
- case CODE_FOR_xop_vpermil2v8sf3:
- error ("the last argument must be a 2-bit immediate");
- return gen_reg_rtx (tmode);
- case CODE_FOR_xop_rotlv2di3:
- new_icode = CODE_FOR_rotlv2di3;
- goto xop_rotl;
- case CODE_FOR_xop_rotlv4si3:
- new_icode = CODE_FOR_rotlv4si3;
- goto xop_rotl;
- case CODE_FOR_xop_rotlv8hi3:
- new_icode = CODE_FOR_rotlv8hi3;
- goto xop_rotl;
- case CODE_FOR_xop_rotlv16qi3:
- new_icode = CODE_FOR_rotlv16qi3;
- xop_rotl:
- if (CONST_INT_P (op))
- {
- int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
- op = GEN_INT (INTVAL (op) & mask);
- gcc_checking_assert
- (insn_data[icode].operand[i + 1].predicate (op, mode));
- }
- else
- {
- gcc_checking_assert
- (nargs == 2
- && insn_data[new_icode].operand[0].mode == tmode
- && insn_data[new_icode].operand[1].mode == tmode
- && insn_data[new_icode].operand[2].mode == mode
- && insn_data[new_icode].operand[0].predicate
- == insn_data[icode].operand[0].predicate
- && insn_data[new_icode].operand[1].predicate
- == insn_data[icode].operand[1].predicate);
- icode = new_icode;
- goto non_constant;
- }
- break;
- default:
- gcc_unreachable ();
- }
- }
- }
- else
- {
- non_constant:
- if (VECTOR_MODE_P (mode))
- op = safe_vector_operand (op, mode);
-
- /* If we aren't optimizing, only allow one memory operand to be
- generated. */
- if (memory_operand (op, mode))
- num_memory++;
-
- gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
-
- if (optimize
- || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
- || num_memory > 1)
- op = force_reg (mode, op);
- }
-
- args[i].op = op;
- args[i].mode = mode;
- }
-
- switch (nargs)
- {
- case 1:
- pat = GEN_FCN (icode) (target, args[0].op);
- break;
-
- case 2:
- if (tf_p)
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
- GEN_INT ((int)sub_code));
- else if (! comparison_p)
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
- else
- {
- rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
- args[0].op,
- args[1].op);
-
- pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
- }
- break;
-
- case 3:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
- break;
-
- case 4:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
- break;
-
- default:
- gcc_unreachable ();
- }
-
- if (! pat)
- return 0;
-
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_args_builtin to take care of scalar unop
- insns with vec_merge. */
-
-static rtx
-ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
- rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- rtx op1, op0 = expand_normal (arg0);
- machine_mode tmode = insn_data[icode].operand[0].mode;
- machine_mode mode0 = insn_data[icode].operand[1].mode;
-
- if (optimize || !target
- || GET_MODE (target) != tmode
- || !insn_data[icode].operand[0].predicate (target, tmode))
- target = gen_reg_rtx (tmode);
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
-
- if ((optimize && !register_operand (op0, mode0))
- || !insn_data[icode].operand[1].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
-
- op1 = op0;
- if (!insn_data[icode].operand[2].predicate (op1, mode0))
- op1 = copy_to_mode_reg (mode0, op1);
-
- pat = GEN_FCN (icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
-
-static rtx
-ix86_expand_sse_compare (const struct builtin_description *d,
- tree exp, rtx target, bool swap)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2;
- machine_mode tmode = insn_data[d->icode].operand[0].mode;
- machine_mode mode0 = insn_data[d->icode].operand[1].mode;
- machine_mode mode1 = insn_data[d->icode].operand[2].mode;
- enum rtx_code comparison = d->comparison;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- /* Swap operands if we have a comparison that isn't available in
- hardware. */
- if (swap)
- std::swap (op0, op1);
-
- if (optimize || !target
- || GET_MODE (target) != tmode
- || !insn_data[d->icode].operand[0].predicate (target, tmode))
- target = gen_reg_rtx (tmode);
-
- if ((optimize && !register_operand (op0, mode0))
- || !insn_data[d->icode].operand[1].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !insn_data[d->icode].operand[2].predicate (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
- pat = GEN_FCN (d->icode) (target, op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of comi insns. */
-
-static rtx
-ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
- rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- machine_mode mode0 = insn_data[d->icode].operand[0].mode;
- machine_mode mode1 = insn_data[d->icode].operand[1].mode;
- enum rtx_code comparison = d->comparison;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- /* Swap operands if we have a comparison that isn't available in
- hardware. */
- if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
- std::swap (op0, op1);
-
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- if ((optimize && !register_operand (op0, mode0))
- || !insn_data[d->icode].operand[0].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !insn_data[d->icode].operand[1].predicate (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (d->icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (comparison, QImode,
- SET_DEST (pat),
- const0_rtx)));
-
- return SUBREG_REG (target);
-}
-
-/* Subroutines of ix86_expand_args_builtin to take care of round insns. */
-
-static rtx
-ix86_expand_sse_round (const struct builtin_description *d, tree exp,
- rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- rtx op1, op0 = expand_normal (arg0);
- machine_mode tmode = insn_data[d->icode].operand[0].mode;
- machine_mode mode0 = insn_data[d->icode].operand[1].mode;
-
- if (optimize || target == 0
- || GET_MODE (target) != tmode
- || !insn_data[d->icode].operand[0].predicate (target, tmode))
- target = gen_reg_rtx (tmode);
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
-
- if ((optimize && !register_operand (op0, mode0))
- || !insn_data[d->icode].operand[0].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
-
- op1 = GEN_INT (d->comparison);
-
- pat = GEN_FCN (d->icode) (target, op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-static rtx
-ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
- tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2;
- machine_mode tmode = insn_data[d->icode].operand[0].mode;
- machine_mode mode0 = insn_data[d->icode].operand[1].mode;
- machine_mode mode1 = insn_data[d->icode].operand[2].mode;
-
- if (optimize || target == 0
- || GET_MODE (target) != tmode
- || !insn_data[d->icode].operand[0].predicate (target, tmode))
- target = gen_reg_rtx (tmode);
-
- op0 = safe_vector_operand (op0, mode0);
- op1 = safe_vector_operand (op1, mode1);
-
- if ((optimize && !register_operand (op0, mode0))
- || !insn_data[d->icode].operand[0].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !insn_data[d->icode].operand[1].predicate (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- op2 = GEN_INT (d->comparison);
-
- pat = GEN_FCN (d->icode) (target, op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
-
-static rtx
-ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
- rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- machine_mode mode0 = insn_data[d->icode].operand[0].mode;
- machine_mode mode1 = insn_data[d->icode].operand[1].mode;
- enum rtx_code comparison = d->comparison;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- if ((optimize && !register_operand (op0, mode0))
- || !insn_data[d->icode].operand[0].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !insn_data[d->icode].operand[1].predicate (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (d->icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (comparison, QImode,
- SET_DEST (pat),
- const0_rtx)));
-
- return SUBREG_REG (target);
-}
-
-/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
-
-static rtx
-ix86_expand_sse_pcmpestr (const struct builtin_description *d,
- tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- tree arg2 = CALL_EXPR_ARG (exp, 2);
- tree arg3 = CALL_EXPR_ARG (exp, 3);
- tree arg4 = CALL_EXPR_ARG (exp, 4);
- rtx scratch0, scratch1;
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- rtx op3 = expand_normal (arg3);
- rtx op4 = expand_normal (arg4);
- machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
-
- tmode0 = insn_data[d->icode].operand[0].mode;
- tmode1 = insn_data[d->icode].operand[1].mode;
- modev2 = insn_data[d->icode].operand[2].mode;
- modei3 = insn_data[d->icode].operand[3].mode;
- modev4 = insn_data[d->icode].operand[4].mode;
- modei5 = insn_data[d->icode].operand[5].mode;
- modeimm = insn_data[d->icode].operand[6].mode;
-
- if (VECTOR_MODE_P (modev2))
- op0 = safe_vector_operand (op0, modev2);
- if (VECTOR_MODE_P (modev4))
- op2 = safe_vector_operand (op2, modev4);
-
- if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
- op0 = copy_to_mode_reg (modev2, op0);
- if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
- op1 = copy_to_mode_reg (modei3, op1);
- if ((optimize && !register_operand (op2, modev4))
- || !insn_data[d->icode].operand[4].predicate (op2, modev4))
- op2 = copy_to_mode_reg (modev4, op2);
- if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
- op3 = copy_to_mode_reg (modei5, op3);
-
- if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
- {
- error ("the fifth argument must be an 8-bit immediate");
- return const0_rtx;
- }
-
- if (d->code == IX86_BUILTIN_PCMPESTRI128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode0
- || !insn_data[d->icode].operand[0].predicate (target, tmode0))
- target = gen_reg_rtx (tmode0);
-
- scratch1 = gen_reg_rtx (tmode1);
-
- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
- }
- else if (d->code == IX86_BUILTIN_PCMPESTRM128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode1
- || !insn_data[d->icode].operand[1].predicate (target, tmode1))
- target = gen_reg_rtx (tmode1);
-
- scratch0 = gen_reg_rtx (tmode0);
-
- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
- }
- else
- {
- gcc_assert (d->flag);
-
- scratch0 = gen_reg_rtx (tmode0);
- scratch1 = gen_reg_rtx (tmode1);
-
- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
- }
-
- if (! pat)
- return 0;
-
- emit_insn (pat);
-
- if (d->flag)
- {
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- emit_insn
- (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (EQ, QImode,
- gen_rtx_REG ((machine_mode) d->flag,
- FLAGS_REG),
- const0_rtx)));
- return SUBREG_REG (target);
- }
- else
- return target;
-}
-
-
-/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
-
-static rtx
-ix86_expand_sse_pcmpistr (const struct builtin_description *d,
- tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- tree arg2 = CALL_EXPR_ARG (exp, 2);
- rtx scratch0, scratch1;
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- machine_mode tmode0, tmode1, modev2, modev3, modeimm;
-
- tmode0 = insn_data[d->icode].operand[0].mode;
- tmode1 = insn_data[d->icode].operand[1].mode;
- modev2 = insn_data[d->icode].operand[2].mode;
- modev3 = insn_data[d->icode].operand[3].mode;
- modeimm = insn_data[d->icode].operand[4].mode;
-
- if (VECTOR_MODE_P (modev2))
- op0 = safe_vector_operand (op0, modev2);
- if (VECTOR_MODE_P (modev3))
- op1 = safe_vector_operand (op1, modev3);
-
- if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
- op0 = copy_to_mode_reg (modev2, op0);
- if ((optimize && !register_operand (op1, modev3))
- || !insn_data[d->icode].operand[3].predicate (op1, modev3))
- op1 = copy_to_mode_reg (modev3, op1);
-
- if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
- {
- error ("the third argument must be an 8-bit immediate");
- return const0_rtx;
- }
-
- if (d->code == IX86_BUILTIN_PCMPISTRI128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode0
- || !insn_data[d->icode].operand[0].predicate (target, tmode0))
- target = gen_reg_rtx (tmode0);
-
- scratch1 = gen_reg_rtx (tmode1);
-
- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
- }
- else if (d->code == IX86_BUILTIN_PCMPISTRM128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode1
- || !insn_data[d->icode].operand[1].predicate (target, tmode1))
- target = gen_reg_rtx (tmode1);
-
- scratch0 = gen_reg_rtx (tmode0);
-
- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
- }
- else
- {
- gcc_assert (d->flag);
-
- scratch0 = gen_reg_rtx (tmode0);
- scratch1 = gen_reg_rtx (tmode1);
-
- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
- }
-
- if (! pat)
- return 0;
-
- emit_insn (pat);
-
- if (d->flag)
- {
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- emit_insn
- (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (EQ, QImode,
- gen_rtx_REG ((machine_mode) d->flag,
- FLAGS_REG),
- const0_rtx)));
- return SUBREG_REG (target);
- }
- else
- return target;
-}
-
-/* Fixup modeless constants to fit required mode. */
-
-static rtx
-fixup_modeless_constant (rtx x, machine_mode mode)
-{
- if (GET_MODE (x) == VOIDmode)
- x = convert_to_mode (mode, x, 1);
- return x;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of insns with
- variable number of operands. */
-
-static rtx
-ix86_expand_args_builtin (const struct builtin_description *d,
- tree exp, rtx target)
-{
- rtx pat, real_target;
- unsigned int i, nargs;
- unsigned int nargs_constant = 0;
- unsigned int mask_pos = 0;
- int num_memory = 0;
- struct
- {
- rtx op;
- machine_mode mode;
- } args[6];
- bool second_arg_count = false;
- enum insn_code icode = d->icode;
- const struct insn_data_d *insn_p = &insn_data[icode];
- machine_mode tmode = insn_p->operand[0].mode;
- machine_mode rmode = VOIDmode;
- bool swap = false;
- enum rtx_code comparison = d->comparison;
-
- switch ((enum ix86_builtin_func_type) d->flag)
- {
- case V2DF_FTYPE_V2DF_ROUND:
- case V4DF_FTYPE_V4DF_ROUND:
- case V8DF_FTYPE_V8DF_ROUND:
- case V4SF_FTYPE_V4SF_ROUND:
- case V8SF_FTYPE_V8SF_ROUND:
- case V16SF_FTYPE_V16SF_ROUND:
- case V4SI_FTYPE_V4SF_ROUND:
- case V8SI_FTYPE_V8SF_ROUND:
- case V16SI_FTYPE_V16SF_ROUND:
- return ix86_expand_sse_round (d, exp, target);
- case V4SI_FTYPE_V2DF_V2DF_ROUND:
- case V8SI_FTYPE_V4DF_V4DF_ROUND:
- case V16SI_FTYPE_V8DF_V8DF_ROUND:
- return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
- case INT_FTYPE_V8SF_V8SF_PTEST:
- case INT_FTYPE_V4DI_V4DI_PTEST:
- case INT_FTYPE_V4DF_V4DF_PTEST:
- case INT_FTYPE_V4SF_V4SF_PTEST:
- case INT_FTYPE_V2DI_V2DI_PTEST:
- case INT_FTYPE_V2DF_V2DF_PTEST:
- return ix86_expand_sse_ptest (d, exp, target);
- case FLOAT128_FTYPE_FLOAT128:
- case FLOAT_FTYPE_FLOAT:
- case INT_FTYPE_INT:
- case UINT_FTYPE_UINT:
- case UINT16_FTYPE_UINT16:
- case UINT64_FTYPE_INT:
- case UINT64_FTYPE_UINT64:
- case INT64_FTYPE_INT64:
- case INT64_FTYPE_V4SF:
- case INT64_FTYPE_V2DF:
- case INT_FTYPE_V16QI:
- case INT_FTYPE_V8QI:
- case INT_FTYPE_V8SF:
- case INT_FTYPE_V4DF:
- case INT_FTYPE_V4SF:
- case INT_FTYPE_V2DF:
- case INT_FTYPE_V32QI:
- case V16QI_FTYPE_V16QI:
- case V8SI_FTYPE_V8SF:
- case V8SI_FTYPE_V4SI:
- case V8HI_FTYPE_V8HI:
- case V8HI_FTYPE_V16QI:
- case V8QI_FTYPE_V8QI:
- case V8SF_FTYPE_V8SF:
- case V8SF_FTYPE_V8SI:
- case V8SF_FTYPE_V4SF:
- case V8SF_FTYPE_V8HI:
- case V4SI_FTYPE_V4SI:
- case V4SI_FTYPE_V16QI:
- case V4SI_FTYPE_V4SF:
- case V4SI_FTYPE_V8SI:
- case V4SI_FTYPE_V8HI:
- case V4SI_FTYPE_V4DF:
- case V4SI_FTYPE_V2DF:
- case V4HI_FTYPE_V4HI:
- case V4DF_FTYPE_V4DF:
- case V4DF_FTYPE_V4SI:
- case V4DF_FTYPE_V4SF:
- case V4DF_FTYPE_V2DF:
- case V4SF_FTYPE_V4SF:
- case V4SF_FTYPE_V4SI:
- case V4SF_FTYPE_V8SF:
- case V4SF_FTYPE_V4DF:
- case V4SF_FTYPE_V8HI:
- case V4SF_FTYPE_V2DF:
- case V2DI_FTYPE_V2DI:
- case V2DI_FTYPE_V16QI:
- case V2DI_FTYPE_V8HI:
- case V2DI_FTYPE_V4SI:
- case V2DF_FTYPE_V2DF:
- case V2DF_FTYPE_V4SI:
- case V2DF_FTYPE_V4DF:
- case V2DF_FTYPE_V4SF:
- case V2DF_FTYPE_V2SI:
- case V2SI_FTYPE_V2SI:
- case V2SI_FTYPE_V4SF:
- case V2SI_FTYPE_V2SF:
- case V2SI_FTYPE_V2DF:
- case V2SF_FTYPE_V2SF:
- case V2SF_FTYPE_V2SI:
- case V32QI_FTYPE_V32QI:
- case V32QI_FTYPE_V16QI:
- case V16HI_FTYPE_V16HI:
- case V16HI_FTYPE_V8HI:
- case V8SI_FTYPE_V8SI:
- case V16HI_FTYPE_V16QI:
- case V8SI_FTYPE_V16QI:
- case V4DI_FTYPE_V16QI:
- case V8SI_FTYPE_V8HI:
- case V4DI_FTYPE_V8HI:
- case V4DI_FTYPE_V4SI:
- case V4DI_FTYPE_V2DI:
- case UQI_FTYPE_UQI:
- case UHI_FTYPE_UHI:
- case USI_FTYPE_USI:
- case USI_FTYPE_UQI:
- case USI_FTYPE_UHI:
- case UDI_FTYPE_UDI:
- case UHI_FTYPE_V16QI:
- case USI_FTYPE_V32QI:
- case UDI_FTYPE_V64QI:
- case V16QI_FTYPE_UHI:
- case V32QI_FTYPE_USI:
- case V64QI_FTYPE_UDI:
- case V8HI_FTYPE_UQI:
- case V16HI_FTYPE_UHI:
- case V32HI_FTYPE_USI:
- case V4SI_FTYPE_UQI:
- case V8SI_FTYPE_UQI:
- case V4SI_FTYPE_UHI:
- case V8SI_FTYPE_UHI:
- case UQI_FTYPE_V8HI:
- case UHI_FTYPE_V16HI:
- case USI_FTYPE_V32HI:
- case UQI_FTYPE_V4SI:
- case UQI_FTYPE_V8SI:
- case UHI_FTYPE_V16SI:
- case UQI_FTYPE_V2DI:
- case UQI_FTYPE_V4DI:
- case UQI_FTYPE_V8DI:
- case V16SI_FTYPE_UHI:
- case V2DI_FTYPE_UQI:
- case V4DI_FTYPE_UQI:
- case V16SI_FTYPE_INT:
- case V16SF_FTYPE_V8SF:
- case V16SI_FTYPE_V8SI:
- case V16SF_FTYPE_V4SF:
- case V16SI_FTYPE_V4SI:
- case V16SI_FTYPE_V16SF:
- case V16SI_FTYPE_V16SI:
- case V64QI_FTYPE_V64QI:
- case V32HI_FTYPE_V32HI:
- case V16SF_FTYPE_V16SF:
- case V8DI_FTYPE_UQI:
- case V8DI_FTYPE_V8DI:
- case V8DF_FTYPE_V4DF:
- case V8DF_FTYPE_V2DF:
- case V8DF_FTYPE_V8DF:
- case V4DI_FTYPE_V4DI:
- case V16HI_FTYPE_V16SF:
- case V8HI_FTYPE_V8SF:
- case V8HI_FTYPE_V4SF:
- nargs = 1;
- break;
- case V4SF_FTYPE_V4SF_VEC_MERGE:
- case V2DF_FTYPE_V2DF_VEC_MERGE:
- return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
- case FLOAT128_FTYPE_FLOAT128_FLOAT128:
- case V16QI_FTYPE_V16QI_V16QI:
- case V16QI_FTYPE_V8HI_V8HI:
- case V16SF_FTYPE_V16SF_V16SF:
- case V8QI_FTYPE_V8QI_V8QI:
- case V8QI_FTYPE_V4HI_V4HI:
- case V8HI_FTYPE_V8HI_V8HI:
- case V8HI_FTYPE_V16QI_V16QI:
- case V8HI_FTYPE_V4SI_V4SI:
- case V8SF_FTYPE_V8SF_V8SF:
- case V8SF_FTYPE_V8SF_V8SI:
- case V8DF_FTYPE_V8DF_V8DF:
- case V4SI_FTYPE_V4SI_V4SI:
- case V4SI_FTYPE_V8HI_V8HI:
- case V4SI_FTYPE_V2DF_V2DF:
- case V4HI_FTYPE_V4HI_V4HI:
- case V4HI_FTYPE_V8QI_V8QI:
- case V4HI_FTYPE_V2SI_V2SI:
- case V4DF_FTYPE_V4DF_V4DF:
- case V4DF_FTYPE_V4DF_V4DI:
- case V4SF_FTYPE_V4SF_V4SF:
- case V4SF_FTYPE_V4SF_V4SI:
- case V4SF_FTYPE_V4SF_V2SI:
- case V4SF_FTYPE_V4SF_V2DF:
- case V4SF_FTYPE_V4SF_UINT:
- case V4SF_FTYPE_V4SF_DI:
- case V4SF_FTYPE_V4SF_SI:
- case V2DI_FTYPE_V2DI_V2DI:
- case V2DI_FTYPE_V16QI_V16QI:
- case V2DI_FTYPE_V4SI_V4SI:
- case V2DI_FTYPE_V2DI_V16QI:
- case V2SI_FTYPE_V2SI_V2SI:
- case V2SI_FTYPE_V4HI_V4HI:
- case V2SI_FTYPE_V2SF_V2SF:
- case V2DF_FTYPE_V2DF_V2DF:
- case V2DF_FTYPE_V2DF_V4SF:
- case V2DF_FTYPE_V2DF_V2DI:
- case V2DF_FTYPE_V2DF_DI:
- case V2DF_FTYPE_V2DF_SI:
- case V2DF_FTYPE_V2DF_UINT:
- case V2SF_FTYPE_V2SF_V2SF:
- case V1DI_FTYPE_V1DI_V1DI:
- case V1DI_FTYPE_V8QI_V8QI:
- case V1DI_FTYPE_V2SI_V2SI:
- case V32QI_FTYPE_V16HI_V16HI:
- case V16HI_FTYPE_V8SI_V8SI:
- case V64QI_FTYPE_V64QI_V64QI:
- case V32QI_FTYPE_V32QI_V32QI:
- case V16HI_FTYPE_V32QI_V32QI:
- case V16HI_FTYPE_V16HI_V16HI:
- case V8SI_FTYPE_V4DF_V4DF:
- case V8SI_FTYPE_V8SI_V8SI:
- case V8SI_FTYPE_V16HI_V16HI:
- case V4DI_FTYPE_V4DI_V4DI:
- case V4DI_FTYPE_V8SI_V8SI:
- case V8DI_FTYPE_V64QI_V64QI:
- if (comparison == UNKNOWN)
- return ix86_expand_binop_builtin (icode, exp, target);
- nargs = 2;
- break;
- case V4SF_FTYPE_V4SF_V4SF_SWAP:
- case V2DF_FTYPE_V2DF_V2DF_SWAP:
- gcc_assert (comparison != UNKNOWN);
- nargs = 2;
- swap = true;
- break;
- case V16HI_FTYPE_V16HI_V8HI_COUNT:
- case V16HI_FTYPE_V16HI_SI_COUNT:
- case V8SI_FTYPE_V8SI_V4SI_COUNT:
- case V8SI_FTYPE_V8SI_SI_COUNT:
- case V4DI_FTYPE_V4DI_V2DI_COUNT:
- case V4DI_FTYPE_V4DI_INT_COUNT:
- case V8HI_FTYPE_V8HI_V8HI_COUNT:
- case V8HI_FTYPE_V8HI_SI_COUNT:
- case V4SI_FTYPE_V4SI_V4SI_COUNT:
- case V4SI_FTYPE_V4SI_SI_COUNT:
- case V4HI_FTYPE_V4HI_V4HI_COUNT:
- case V4HI_FTYPE_V4HI_SI_COUNT:
- case V2DI_FTYPE_V2DI_V2DI_COUNT:
- case V2DI_FTYPE_V2DI_SI_COUNT:
- case V2SI_FTYPE_V2SI_V2SI_COUNT:
- case V2SI_FTYPE_V2SI_SI_COUNT:
- case V1DI_FTYPE_V1DI_V1DI_COUNT:
- case V1DI_FTYPE_V1DI_SI_COUNT:
- nargs = 2;
- second_arg_count = true;
- break;
- case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT:
- case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT:
- case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT:
- case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT:
- case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT:
- case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT:
- case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT:
- case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT:
- case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT:
- case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT:
- case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT:
- case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT:
- case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT:
- case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT:
- case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT:
- case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT:
- case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT:
- case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT:
- nargs = 4;
- second_arg_count = true;
- break;
- case UINT64_FTYPE_UINT64_UINT64:
- case UINT_FTYPE_UINT_UINT:
- case UINT_FTYPE_UINT_USHORT:
- case UINT_FTYPE_UINT_UCHAR:
- case UINT16_FTYPE_UINT16_INT:
- case UINT8_FTYPE_UINT8_INT:
- case UQI_FTYPE_UQI_UQI:
- case UHI_FTYPE_UHI_UHI:
- case USI_FTYPE_USI_USI:
- case UDI_FTYPE_UDI_UDI:
- case V16SI_FTYPE_V8DF_V8DF:
- case V32HI_FTYPE_V16SF_V16SF:
- case V16HI_FTYPE_V8SF_V8SF:
- case V8HI_FTYPE_V4SF_V4SF:
- case V16HI_FTYPE_V16SF_UHI:
- case V8HI_FTYPE_V8SF_UQI:
- case V8HI_FTYPE_V4SF_UQI:
- nargs = 2;
- break;
- case V2DI_FTYPE_V2DI_INT_CONVERT:
- nargs = 2;
- rmode = V1TImode;
- nargs_constant = 1;
- break;
- case V4DI_FTYPE_V4DI_INT_CONVERT:
- nargs = 2;
- rmode = V2TImode;
- nargs_constant = 1;
- break;
- case V8DI_FTYPE_V8DI_INT_CONVERT:
- nargs = 2;
- rmode = V4TImode;
- nargs_constant = 1;
- break;
- case V8HI_FTYPE_V8HI_INT:
- case V8HI_FTYPE_V8SF_INT:
- case V16HI_FTYPE_V16SF_INT:
- case V8HI_FTYPE_V4SF_INT:
- case V8SF_FTYPE_V8SF_INT:
- case V4SF_FTYPE_V16SF_INT:
- case V16SF_FTYPE_V16SF_INT:
- case V4SI_FTYPE_V4SI_INT:
- case V4SI_FTYPE_V8SI_INT:
- case V4HI_FTYPE_V4HI_INT:
- case V4DF_FTYPE_V4DF_INT:
- case V4DF_FTYPE_V8DF_INT:
- case V4SF_FTYPE_V4SF_INT:
- case V4SF_FTYPE_V8SF_INT:
- case V2DI_FTYPE_V2DI_INT:
- case V2DF_FTYPE_V2DF_INT:
- case V2DF_FTYPE_V4DF_INT:
- case V16HI_FTYPE_V16HI_INT:
- case V8SI_FTYPE_V8SI_INT:
- case V16SI_FTYPE_V16SI_INT:
- case V4SI_FTYPE_V16SI_INT:
- case V4DI_FTYPE_V4DI_INT:
- case V2DI_FTYPE_V4DI_INT:
- case V4DI_FTYPE_V8DI_INT:
- case UQI_FTYPE_UQI_UQI_CONST:
- case UHI_FTYPE_UHI_UQI:
- case USI_FTYPE_USI_UQI:
- case UDI_FTYPE_UDI_UQI:
- nargs = 2;
- nargs_constant = 1;
- break;
- case V16QI_FTYPE_V16QI_V16QI_V16QI:
- case V8SF_FTYPE_V8SF_V8SF_V8SF:
- case V4DF_FTYPE_V4DF_V4DF_V4DF:
- case V4SF_FTYPE_V4SF_V4SF_V4SF:
- case V2DF_FTYPE_V2DF_V2DF_V2DF:
- case V32QI_FTYPE_V32QI_V32QI_V32QI:
- case UHI_FTYPE_V16SI_V16SI_UHI:
- case UQI_FTYPE_V8DI_V8DI_UQI:
- case V16HI_FTYPE_V16SI_V16HI_UHI:
- case V16QI_FTYPE_V16SI_V16QI_UHI:
- case V16QI_FTYPE_V8DI_V16QI_UQI:
- case V16SF_FTYPE_V16SF_V16SF_UHI:
- case V16SF_FTYPE_V4SF_V16SF_UHI:
- case V16SI_FTYPE_SI_V16SI_UHI:
- case V16SI_FTYPE_V16HI_V16SI_UHI:
- case V16SI_FTYPE_V16QI_V16SI_UHI:
- case V8SF_FTYPE_V4SF_V8SF_UQI:
- case V4DF_FTYPE_V2DF_V4DF_UQI:
- case V8SI_FTYPE_V4SI_V8SI_UQI:
- case V8SI_FTYPE_SI_V8SI_UQI:
- case V4SI_FTYPE_V4SI_V4SI_UQI:
- case V4SI_FTYPE_SI_V4SI_UQI:
- case V4DI_FTYPE_V2DI_V4DI_UQI:
- case V4DI_FTYPE_DI_V4DI_UQI:
- case V2DI_FTYPE_V2DI_V2DI_UQI:
- case V2DI_FTYPE_DI_V2DI_UQI:
- case V64QI_FTYPE_V64QI_V64QI_UDI:
- case V64QI_FTYPE_V16QI_V64QI_UDI:
- case V64QI_FTYPE_QI_V64QI_UDI:
- case V32QI_FTYPE_V32QI_V32QI_USI:
- case V32QI_FTYPE_V16QI_V32QI_USI:
- case V32QI_FTYPE_QI_V32QI_USI:
- case V16QI_FTYPE_V16QI_V16QI_UHI:
- case V16QI_FTYPE_QI_V16QI_UHI:
- case V32HI_FTYPE_V8HI_V32HI_USI:
- case V32HI_FTYPE_HI_V32HI_USI:
- case V16HI_FTYPE_V8HI_V16HI_UHI:
- case V16HI_FTYPE_HI_V16HI_UHI:
- case V8HI_FTYPE_V8HI_V8HI_UQI:
- case V8HI_FTYPE_HI_V8HI_UQI:
- case V8SF_FTYPE_V8HI_V8SF_UQI:
- case V4SF_FTYPE_V8HI_V4SF_UQI:
- case V8SI_FTYPE_V8SF_V8SI_UQI:
- case V4SI_FTYPE_V4SF_V4SI_UQI:
- case V4DI_FTYPE_V4SF_V4DI_UQI:
- case V2DI_FTYPE_V4SF_V2DI_UQI:
- case V4SF_FTYPE_V4DI_V4SF_UQI:
- case V4SF_FTYPE_V2DI_V4SF_UQI:
- case V4DF_FTYPE_V4DI_V4DF_UQI:
- case V2DF_FTYPE_V2DI_V2DF_UQI:
- case V16QI_FTYPE_V8HI_V16QI_UQI:
- case V16QI_FTYPE_V16HI_V16QI_UHI:
- case V16QI_FTYPE_V4SI_V16QI_UQI:
- case V16QI_FTYPE_V8SI_V16QI_UQI:
- case V8HI_FTYPE_V4SI_V8HI_UQI:
- case V8HI_FTYPE_V8SI_V8HI_UQI:
- case V16QI_FTYPE_V2DI_V16QI_UQI:
- case V16QI_FTYPE_V4DI_V16QI_UQI:
- case V8HI_FTYPE_V2DI_V8HI_UQI:
- case V8HI_FTYPE_V4DI_V8HI_UQI:
- case V4SI_FTYPE_V2DI_V4SI_UQI:
- case V4SI_FTYPE_V4DI_V4SI_UQI:
- case V32QI_FTYPE_V32HI_V32QI_USI:
- case UHI_FTYPE_V16QI_V16QI_UHI:
- case USI_FTYPE_V32QI_V32QI_USI:
- case UDI_FTYPE_V64QI_V64QI_UDI:
- case UQI_FTYPE_V8HI_V8HI_UQI:
- case UHI_FTYPE_V16HI_V16HI_UHI:
- case USI_FTYPE_V32HI_V32HI_USI:
- case UQI_FTYPE_V4SI_V4SI_UQI:
- case UQI_FTYPE_V8SI_V8SI_UQI:
- case UQI_FTYPE_V2DI_V2DI_UQI:
- case UQI_FTYPE_V4DI_V4DI_UQI:
- case V4SF_FTYPE_V2DF_V4SF_UQI:
- case V4SF_FTYPE_V4DF_V4SF_UQI:
- case V16SI_FTYPE_V16SI_V16SI_UHI:
- case V16SI_FTYPE_V4SI_V16SI_UHI:
- case V2DI_FTYPE_V4SI_V2DI_UQI:
- case V2DI_FTYPE_V8HI_V2DI_UQI:
- case V2DI_FTYPE_V16QI_V2DI_UQI:
- case V4DI_FTYPE_V4DI_V4DI_UQI:
- case V4DI_FTYPE_V4SI_V4DI_UQI:
- case V4DI_FTYPE_V8HI_V4DI_UQI:
- case V4DI_FTYPE_V16QI_V4DI_UQI:
- case V4DI_FTYPE_V4DF_V4DI_UQI:
- case V2DI_FTYPE_V2DF_V2DI_UQI:
- case V4SI_FTYPE_V4DF_V4SI_UQI:
- case V4SI_FTYPE_V2DF_V4SI_UQI:
- case V4SI_FTYPE_V8HI_V4SI_UQI:
- case V4SI_FTYPE_V16QI_V4SI_UQI:
- case V4DI_FTYPE_V4DI_V4DI_V4DI:
- case V8DF_FTYPE_V2DF_V8DF_UQI:
- case V8DF_FTYPE_V4DF_V8DF_UQI:
- case V8DF_FTYPE_V8DF_V8DF_UQI:
- case V8SF_FTYPE_V8SF_V8SF_UQI:
- case V8SF_FTYPE_V8SI_V8SF_UQI:
- case V4DF_FTYPE_V4DF_V4DF_UQI:
- case V4SF_FTYPE_V4SF_V4SF_UQI:
- case V2DF_FTYPE_V2DF_V2DF_UQI:
- case V2DF_FTYPE_V4SF_V2DF_UQI:
- case V2DF_FTYPE_V4SI_V2DF_UQI:
- case V4SF_FTYPE_V4SI_V4SF_UQI:
- case V4DF_FTYPE_V4SF_V4DF_UQI:
- case V4DF_FTYPE_V4SI_V4DF_UQI:
- case V8SI_FTYPE_V8SI_V8SI_UQI:
- case V8SI_FTYPE_V8HI_V8SI_UQI:
- case V8SI_FTYPE_V16QI_V8SI_UQI:
- case V8DF_FTYPE_V8SI_V8DF_UQI:
- case V8DI_FTYPE_DI_V8DI_UQI:
- case V16SF_FTYPE_V8SF_V16SF_UHI:
- case V16SI_FTYPE_V8SI_V16SI_UHI:
- case V16HI_FTYPE_V16HI_V16HI_UHI:
- case V8HI_FTYPE_V16QI_V8HI_UQI:
- case V16HI_FTYPE_V16QI_V16HI_UHI:
- case V32HI_FTYPE_V32HI_V32HI_USI:
- case V32HI_FTYPE_V32QI_V32HI_USI:
- case V8DI_FTYPE_V16QI_V8DI_UQI:
- case V8DI_FTYPE_V2DI_V8DI_UQI:
- case V8DI_FTYPE_V4DI_V8DI_UQI:
- case V8DI_FTYPE_V8DI_V8DI_UQI:
- case V8DI_FTYPE_V8HI_V8DI_UQI:
- case V8DI_FTYPE_V8SI_V8DI_UQI:
- case V8HI_FTYPE_V8DI_V8HI_UQI:
- case V8SI_FTYPE_V8DI_V8SI_UQI:
- case V4SI_FTYPE_V4SI_V4SI_V4SI:
- case V16SI_FTYPE_V16SI_V16SI_V16SI:
- case V8DI_FTYPE_V8DI_V8DI_V8DI:
- case V32HI_FTYPE_V32HI_V32HI_V32HI:
- case V2DI_FTYPE_V2DI_V2DI_V2DI:
- case V16HI_FTYPE_V16HI_V16HI_V16HI:
- case V8SI_FTYPE_V8SI_V8SI_V8SI:
- case V8HI_FTYPE_V8HI_V8HI_V8HI:
- case V32HI_FTYPE_V16SF_V16SF_USI:
- case V16HI_FTYPE_V8SF_V8SF_UHI:
- case V8HI_FTYPE_V4SF_V4SF_UQI:
- case V16HI_FTYPE_V16SF_V16HI_UHI:
- case V8HI_FTYPE_V8SF_V8HI_UQI:
- case V8HI_FTYPE_V4SF_V8HI_UQI:
- case V16SF_FTYPE_V16SF_V32HI_V32HI:
- case V8SF_FTYPE_V8SF_V16HI_V16HI:
- case V4SF_FTYPE_V4SF_V8HI_V8HI:
- nargs = 3;
- break;
- case V32QI_FTYPE_V32QI_V32QI_INT:
- case V16HI_FTYPE_V16HI_V16HI_INT:
- case V16QI_FTYPE_V16QI_V16QI_INT:
- case V4DI_FTYPE_V4DI_V4DI_INT:
- case V8HI_FTYPE_V8HI_V8HI_INT:
- case V8SI_FTYPE_V8SI_V8SI_INT:
- case V8SI_FTYPE_V8SI_V4SI_INT:
- case V8SF_FTYPE_V8SF_V8SF_INT:
- case V8SF_FTYPE_V8SF_V4SF_INT:
- case V4SI_FTYPE_V4SI_V4SI_INT:
- case V4DF_FTYPE_V4DF_V4DF_INT:
- case V16SF_FTYPE_V16SF_V16SF_INT:
- case V16SF_FTYPE_V16SF_V4SF_INT:
- case V16SI_FTYPE_V16SI_V4SI_INT:
- case V4DF_FTYPE_V4DF_V2DF_INT:
- case V4SF_FTYPE_V4SF_V4SF_INT:
- case V2DI_FTYPE_V2DI_V2DI_INT:
- case V4DI_FTYPE_V4DI_V2DI_INT:
- case V2DF_FTYPE_V2DF_V2DF_INT:
- case UQI_FTYPE_V8DI_V8UDI_INT:
- case UQI_FTYPE_V8DF_V8DF_INT:
- case UQI_FTYPE_V2DF_V2DF_INT:
- case UQI_FTYPE_V4SF_V4SF_INT:
- case UHI_FTYPE_V16SI_V16SI_INT:
- case UHI_FTYPE_V16SF_V16SF_INT:
- case V64QI_FTYPE_V64QI_V64QI_INT:
- case V32HI_FTYPE_V32HI_V32HI_INT:
- case V16SI_FTYPE_V16SI_V16SI_INT:
- case V8DI_FTYPE_V8DI_V8DI_INT:
- nargs = 3;
- nargs_constant = 1;
- break;
- case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
- nargs = 3;
- rmode = V4DImode;
- nargs_constant = 1;
- break;
- case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
- nargs = 3;
- rmode = V2DImode;
- nargs_constant = 1;
- break;
- case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
- nargs = 3;
- rmode = DImode;
- nargs_constant = 1;
- break;
- case V2DI_FTYPE_V2DI_UINT_UINT:
- nargs = 3;
- nargs_constant = 2;
- break;
- case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
- nargs = 3;
- rmode = V8DImode;
- nargs_constant = 1;
- break;
- case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
- nargs = 5;
- rmode = V8DImode;
- mask_pos = 2;
- nargs_constant = 1;
- break;
- case QI_FTYPE_V8DF_INT_UQI:
- case QI_FTYPE_V4DF_INT_UQI:
- case QI_FTYPE_V2DF_INT_UQI:
- case HI_FTYPE_V16SF_INT_UHI:
- case QI_FTYPE_V8SF_INT_UQI:
- case QI_FTYPE_V4SF_INT_UQI:
- case V4SI_FTYPE_V4SI_V4SI_UHI:
- case V8SI_FTYPE_V8SI_V8SI_UHI:
- nargs = 3;
- mask_pos = 1;
- nargs_constant = 1;
- break;
- case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
- nargs = 5;
- rmode = V4DImode;
- mask_pos = 2;
- nargs_constant = 1;
- break;
- case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
- nargs = 5;
- rmode = V2DImode;
- mask_pos = 2;
- nargs_constant = 1;
- break;
- case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
- case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
- case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
- case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
- case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
- case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
- case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
- case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
- case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
- case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
- case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
- case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
- case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
- case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
- case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
- case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
- case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
- case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
- case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
- case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
- case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
- case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
- case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
- case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
- case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
- case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
- case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
- case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
- case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
- case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
- case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
- case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
- case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
- case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
- case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
- case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
- case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
- case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
- case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
- case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
- case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
- case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
- case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
- case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
- case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
- case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
- case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
- case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
- case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
- case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
- case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
- case V32HI_FTYPE_V16SF_V16SF_V32HI_USI:
- case V16HI_FTYPE_V8SF_V8SF_V16HI_UHI:
- case V8HI_FTYPE_V4SF_V4SF_V8HI_UQI:
- nargs = 4;
- break;
- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
- case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
- nargs = 4;
- nargs_constant = 1;
- break;
- case UQI_FTYPE_V4DI_V4DI_INT_UQI:
- case UQI_FTYPE_V8SI_V8SI_INT_UQI:
- case QI_FTYPE_V4DF_V4DF_INT_UQI:
- case QI_FTYPE_V8SF_V8SF_INT_UQI:
- case UQI_FTYPE_V2DI_V2DI_INT_UQI:
- case UQI_FTYPE_V4SI_V4SI_INT_UQI:
- case UQI_FTYPE_V2DF_V2DF_INT_UQI:
- case UQI_FTYPE_V4SF_V4SF_INT_UQI:
- case UDI_FTYPE_V64QI_V64QI_INT_UDI:
- case USI_FTYPE_V32QI_V32QI_INT_USI:
- case UHI_FTYPE_V16QI_V16QI_INT_UHI:
- case USI_FTYPE_V32HI_V32HI_INT_USI:
- case UHI_FTYPE_V16HI_V16HI_INT_UHI:
- case UQI_FTYPE_V8HI_V8HI_INT_UQI:
- nargs = 4;
- mask_pos = 1;
- nargs_constant = 1;
- break;
- case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
- nargs = 4;
- nargs_constant = 2;
- break;
- case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
- case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
- case V16SF_FTYPE_V16SF_V32HI_V32HI_UHI:
- case V8SF_FTYPE_V8SF_V16HI_V16HI_UQI:
- case V4SF_FTYPE_V4SF_V8HI_V8HI_UQI:
- nargs = 4;
- break;
- case UQI_FTYPE_V8DI_V8DI_INT_UQI:
- case UHI_FTYPE_V16SI_V16SI_INT_UHI:
- mask_pos = 1;
- nargs = 4;
- nargs_constant = 1;
- break;
- case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
- case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
- case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
- case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
- case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
- case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
- case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
- case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
- case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
- case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
- case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
- case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
- case V32HI_FTYPE_V32HI_INT_V32HI_USI:
- case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
- case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
- case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
- case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
- case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
- case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
- case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
- case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
- case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
- case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
- case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
- case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
- case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
- case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
- case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
- case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
- case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
- nargs = 4;
- mask_pos = 2;
- nargs_constant = 1;
- break;
- case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
- case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
- case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
- case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
- case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
- case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
- case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
- case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
- case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
- case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
- case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
- case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
- case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
- case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
- case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
- case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
- case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
- case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
- case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
- case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
- case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
- case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
- case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
- case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
- case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
- case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
- case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
- nargs = 5;
- mask_pos = 2;
- nargs_constant = 1;
- break;
- case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
- case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
- case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
- case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
- case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
- case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
- nargs = 5;
- mask_pos = 1;
- nargs_constant = 1;
- break;
- case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI:
- case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI:
- case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI:
- case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT:
- case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT:
- case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT:
- case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT:
- case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT:
- case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT:
- case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT:
- case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT:
- case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT:
- nargs = 5;
- mask_pos = 1;
- nargs_constant = 2;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- gcc_assert (nargs <= ARRAY_SIZE (args));
-
- if (comparison != UNKNOWN)
- {
- gcc_assert (nargs == 2);
- return ix86_expand_sse_compare (d, exp, target, swap);
- }
-
- if (rmode == VOIDmode || rmode == tmode)
- {
- if (optimize
- || target == 0
- || GET_MODE (target) != tmode
- || !insn_p->operand[0].predicate (target, tmode))
- target = gen_reg_rtx (tmode);
- else if (memory_operand (target, tmode))
- num_memory++;
- real_target = target;
- }
- else
- {
- real_target = gen_reg_rtx (tmode);
- target = lowpart_subreg (rmode, real_target, tmode);
- }
-
- for (i = 0; i < nargs; i++)
- {
- tree arg = CALL_EXPR_ARG (exp, i);
- rtx op = expand_normal (arg);
- machine_mode mode = insn_p->operand[i + 1].mode;
- bool match = insn_p->operand[i + 1].predicate (op, mode);
-
- if (second_arg_count && i == 1)
- {
- /* SIMD shift insns take either an 8-bit immediate or
- register as count. But builtin functions take int as
- count. If count doesn't match, we put it in register.
- The instructions are using 64-bit count, if op is just
- 32-bit, zero-extend it, as negative shift counts
- are undefined behavior and zero-extension is more
- efficient. */
- if (!match)
- {
- if (SCALAR_INT_MODE_P (GET_MODE (op)))
- op = convert_modes (mode, GET_MODE (op), op, 1);
- else
- op = lowpart_subreg (mode, op, GET_MODE (op));
- if (!insn_p->operand[i + 1].predicate (op, mode))
- op = copy_to_reg (op);
- }
- }
- else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
- (!mask_pos && (nargs - i) <= nargs_constant))
- {
- if (!match)
- switch (icode)
- {
- case CODE_FOR_avx_vinsertf128v4di:
- case CODE_FOR_avx_vextractf128v4di:
- error ("the last argument must be an 1-bit immediate");
- return const0_rtx;
-
- case CODE_FOR_avx512f_cmpv8di3_mask:
- case CODE_FOR_avx512f_cmpv16si3_mask:
- case CODE_FOR_avx512f_ucmpv8di3_mask:
- case CODE_FOR_avx512f_ucmpv16si3_mask:
- case CODE_FOR_avx512vl_cmpv4di3_mask:
- case CODE_FOR_avx512vl_cmpv8si3_mask:
- case CODE_FOR_avx512vl_ucmpv4di3_mask:
- case CODE_FOR_avx512vl_ucmpv8si3_mask:
- case CODE_FOR_avx512vl_cmpv2di3_mask:
- case CODE_FOR_avx512vl_cmpv4si3_mask:
- case CODE_FOR_avx512vl_ucmpv2di3_mask:
- case CODE_FOR_avx512vl_ucmpv4si3_mask:
- error ("the last argument must be a 3-bit immediate");
- return const0_rtx;
-
- case CODE_FOR_sse4_1_roundsd:
- case CODE_FOR_sse4_1_roundss:
-
- case CODE_FOR_sse4_1_roundpd:
- case CODE_FOR_sse4_1_roundps:
- case CODE_FOR_avx_roundpd256:
- case CODE_FOR_avx_roundps256:
-
- case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
- case CODE_FOR_sse4_1_roundps_sfix:
- case CODE_FOR_avx_roundpd_vec_pack_sfix256:
- case CODE_FOR_avx_roundps_sfix256:
-
- case CODE_FOR_sse4_1_blendps:
- case CODE_FOR_avx_blendpd256:
- case CODE_FOR_avx_vpermilv4df:
- case CODE_FOR_avx_vpermilv4df_mask:
- case CODE_FOR_avx512f_getmantv8df_mask:
- case CODE_FOR_avx512f_getmantv16sf_mask:
- case CODE_FOR_avx512vl_getmantv8sf_mask:
- case CODE_FOR_avx512vl_getmantv4df_mask:
- case CODE_FOR_avx512vl_getmantv4sf_mask:
- case CODE_FOR_avx512vl_getmantv2df_mask:
- case CODE_FOR_avx512dq_rangepv8df_mask_round:
- case CODE_FOR_avx512dq_rangepv16sf_mask_round:
- case CODE_FOR_avx512dq_rangepv4df_mask:
- case CODE_FOR_avx512dq_rangepv8sf_mask:
- case CODE_FOR_avx512dq_rangepv2df_mask:
- case CODE_FOR_avx512dq_rangepv4sf_mask:
- case CODE_FOR_avx_shufpd256_mask:
- error ("the last argument must be a 4-bit immediate");
- return const0_rtx;
-
- case CODE_FOR_sha1rnds4:
- case CODE_FOR_sse4_1_blendpd:
- case CODE_FOR_avx_vpermilv2df:
- case CODE_FOR_avx_vpermilv2df_mask:
- case CODE_FOR_xop_vpermil2v2df3:
- case CODE_FOR_xop_vpermil2v4sf3:
- case CODE_FOR_xop_vpermil2v4df3:
- case CODE_FOR_xop_vpermil2v8sf3:
- case CODE_FOR_avx512f_vinsertf32x4_mask:
- case CODE_FOR_avx512f_vinserti32x4_mask:
- case CODE_FOR_avx512f_vextractf32x4_mask:
- case CODE_FOR_avx512f_vextracti32x4_mask:
- case CODE_FOR_sse2_shufpd:
- case CODE_FOR_sse2_shufpd_mask:
- case CODE_FOR_avx512dq_shuf_f64x2_mask:
- case CODE_FOR_avx512dq_shuf_i64x2_mask:
- case CODE_FOR_avx512vl_shuf_i32x4_mask:
- case CODE_FOR_avx512vl_shuf_f32x4_mask:
- error ("the last argument must be a 2-bit immediate");
- return const0_rtx;
-
- case CODE_FOR_avx_vextractf128v4df:
- case CODE_FOR_avx_vextractf128v8sf:
- case CODE_FOR_avx_vextractf128v8si:
- case CODE_FOR_avx_vinsertf128v4df:
- case CODE_FOR_avx_vinsertf128v8sf:
- case CODE_FOR_avx_vinsertf128v8si:
- case CODE_FOR_avx512f_vinsertf64x4_mask:
- case CODE_FOR_avx512f_vinserti64x4_mask:
- case CODE_FOR_avx512f_vextractf64x4_mask:
- case CODE_FOR_avx512f_vextracti64x4_mask:
- case CODE_FOR_avx512dq_vinsertf32x8_mask:
- case CODE_FOR_avx512dq_vinserti32x8_mask:
- case CODE_FOR_avx512vl_vinsertv4df:
- case CODE_FOR_avx512vl_vinsertv4di:
- case CODE_FOR_avx512vl_vinsertv8sf:
- case CODE_FOR_avx512vl_vinsertv8si:
- error ("the last argument must be a 1-bit immediate");
- return const0_rtx;
-
- case CODE_FOR_avx_vmcmpv2df3:
- case CODE_FOR_avx_vmcmpv4sf3:
- case CODE_FOR_avx_cmpv2df3:
- case CODE_FOR_avx_cmpv4sf3:
- case CODE_FOR_avx_cmpv4df3:
- case CODE_FOR_avx_cmpv8sf3:
- case CODE_FOR_avx512f_cmpv8df3_mask:
- case CODE_FOR_avx512f_cmpv16sf3_mask:
- case CODE_FOR_avx512f_vmcmpv2df3_mask:
- case CODE_FOR_avx512f_vmcmpv4sf3_mask:
- error ("the last argument must be a 5-bit immediate");
- return const0_rtx;
-
- default:
- switch (nargs_constant)
- {
- case 2:
- if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
- (!mask_pos && (nargs - i) == nargs_constant))
- {
- error ("the next to last argument must be an 8-bit immediate");
- break;
- }
- /* FALLTHRU */
- case 1:
- error ("the last argument must be an 8-bit immediate");
- break;
- default:
- gcc_unreachable ();
- }
- return const0_rtx;
- }
- }
- else
- {
- if (VECTOR_MODE_P (mode))
- op = safe_vector_operand (op, mode);
-
- /* If we aren't optimizing, only allow one memory operand to
- be generated. */
- if (memory_operand (op, mode))
- num_memory++;
-
- op = fixup_modeless_constant (op, mode);
-
- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
- {
- if (optimize || !match || num_memory > 1)
- op = copy_to_mode_reg (mode, op);
- }
- else
- {
- op = copy_to_reg (op);
- op = lowpart_subreg (mode, op, GET_MODE (op));
- }
- }
-
- args[i].op = op;
- args[i].mode = mode;
- }
-
- switch (nargs)
- {
- case 1:
- pat = GEN_FCN (icode) (real_target, args[0].op);
- break;
- case 2:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
- break;
- case 3:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
- args[2].op);
- break;
- case 4:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
- args[2].op, args[3].op);
- break;
- case 5:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
- args[2].op, args[3].op, args[4].op);
- break;
- case 6:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
- args[2].op, args[3].op, args[4].op,
- args[5].op);
- break;
- default:
- gcc_unreachable ();
- }
-
- if (! pat)
- return 0;
-
- emit_insn (pat);
- return target;
-}
-
-/* Transform pattern of following layout:
- (set A
- (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
- )
- into:
- (set (A B)) */
-
-static rtx
-ix86_erase_embedded_rounding (rtx pat)
-{
- if (GET_CODE (pat) == INSN)
- pat = PATTERN (pat);
-
- gcc_assert (GET_CODE (pat) == SET);
- rtx src = SET_SRC (pat);
- gcc_assert (XVECLEN (src, 0) == 2);
- rtx p0 = XVECEXP (src, 0, 0);
- gcc_assert (GET_CODE (src) == UNSPEC
- && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING);
- rtx res = gen_rtx_SET (SET_DEST (pat), p0);
- return res;
-}
-
-/* Subroutine of ix86_expand_round_builtin to take care of comi insns
- with rounding. */
-static rtx
-ix86_expand_sse_comi_round (const struct builtin_description *d,
- tree exp, rtx target)
-{
- rtx pat, set_dst;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- tree arg2 = CALL_EXPR_ARG (exp, 2);
- tree arg3 = CALL_EXPR_ARG (exp, 3);
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- rtx op3 = expand_normal (arg3);
- enum insn_code icode = d->icode;
- const struct insn_data_d *insn_p = &insn_data[icode];
- machine_mode mode0 = insn_p->operand[0].mode;
- machine_mode mode1 = insn_p->operand[1].mode;
-
- /* See avxintrin.h for values. */
- static const enum rtx_code comparisons[32] =
- {
- EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED,
- UNEQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED,
- EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED,
- UNEQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED
- };
- static const bool ordereds[32] =
- {
- true, true, true, false, false, false, false, true,
- false, false, false, true, true, true, true, false,
- true, true, true, false, false, false, false, true,
- false, false, false, true, true, true, true, false
- };
- static const bool non_signalings[32] =
- {
- true, false, false, true, true, false, false, true,
- true, false, false, true, true, false, false, true,
- false, true, true, false, false, true, true, false,
- false, true, true, false, false, true, true, false
- };
-
- if (!CONST_INT_P (op2))
- {
- error ("the third argument must be comparison constant");
- return const0_rtx;
- }
- if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
- {
- error ("incorrect comparison mode");
- return const0_rtx;
- }
-
- if (!insn_p->operand[2].predicate (op3, SImode))
- {
- error ("incorrect rounding operand");
- return const0_rtx;
- }
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- enum rtx_code comparison = comparisons[INTVAL (op2)];
- bool ordered = ordereds[INTVAL (op2)];
- bool non_signaling = non_signalings[INTVAL (op2)];
- rtx const_val = const0_rtx;
-
- bool check_unordered = false;
- machine_mode mode = CCFPmode;
- switch (comparison)
- {
- case ORDERED:
- if (!ordered)
- {
- /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US. */
- if (!non_signaling)
- ordered = true;
- mode = CCSmode;
- }
- else
- {
- /* NB: Use CCPmode/NE for _CMP_ORD_Q/_CMP_ORD_S. */
- if (non_signaling)
- ordered = false;
- mode = CCPmode;
- }
- comparison = NE;
- break;
- case UNORDERED:
- if (ordered)
- {
- /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS. */
- if (non_signaling)
- ordered = false;
- mode = CCSmode;
- }
- else
- {
- /* NB: Use CCPmode/NE for _CMP_UNORD_Q/_CMP_UNORD_S. */
- if (!non_signaling)
- ordered = true;
- mode = CCPmode;
- }
- comparison = EQ;
- break;
-
- case LE: /* -> GE */
- case LT: /* -> GT */
- case UNGE: /* -> UNLE */
- case UNGT: /* -> UNLT */
- std::swap (op0, op1);
- comparison = swap_condition (comparison);
- /* FALLTHRU */
- case GT:
- case GE:
- case UNEQ:
- case UNLT:
- case UNLE:
- case LTGT:
- /* These are supported by CCFPmode. NB: Use ordered/signaling
- COMI or unordered/non-signaling UCOMI. Both set ZF, PF, CF
- with NAN operands. */
- if (ordered == non_signaling)
- ordered = !ordered;
- break;
- case EQ:
- /* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for
- _CMP_EQ_OQ/_CMP_EQ_OS. */
- check_unordered = true;
- mode = CCZmode;
- break;
- case NE:
- /* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for
- _CMP_NEQ_UQ/_CMP_NEQ_US. */
- gcc_assert (!ordered);
- check_unordered = true;
- mode = CCZmode;
- const_val = const1_rtx;
- break;
- default:
- gcc_unreachable ();
- }
-
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const_val);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- if ((optimize && !register_operand (op0, mode0))
- || !insn_p->operand[0].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !insn_p->operand[1].predicate (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- /*
- 1. COMI: ordered and signaling.
- 2. UCOMI: unordered and non-signaling.
- */
- if (non_signaling)
- icode = (icode == CODE_FOR_sse_comi_round
- ? CODE_FOR_sse_ucomi_round
- : CODE_FOR_sse2_ucomi_round);
-
- pat = GEN_FCN (icode) (op0, op1, op3);
- if (! pat)
- return 0;
-
- /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
- if (INTVAL (op3) == NO_ROUND)
- {
- pat = ix86_erase_embedded_rounding (pat);
- if (! pat)
- return 0;
-
- set_dst = SET_DEST (pat);
- }
- else
- {
- gcc_assert (GET_CODE (pat) == SET);
- set_dst = SET_DEST (pat);
- }
-
- emit_insn (pat);
-
- rtx_code_label *label = NULL;
-
- /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
- with NAN operands. */
- if (check_unordered)
- {
- gcc_assert (comparison == EQ || comparison == NE);
-
- rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG);
- label = gen_label_rtx ();
- rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx);
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
- gen_rtx_LABEL_REF (VOIDmode, label),
- pc_rtx);
- emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
- }
-
- /* NB: Set CCFPmode and check a different CCmode which is in subset
- of CCFPmode. */
- if (GET_MODE (set_dst) != mode)
- {
- gcc_assert (mode == CCAmode || mode == CCCmode
- || mode == CCOmode || mode == CCPmode
- || mode == CCSmode || mode == CCZmode);
- set_dst = gen_rtx_REG (mode, FLAGS_REG);
- }
-
- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (comparison, QImode,
- set_dst,
- const0_rtx)));
-
- if (label)
- emit_label (label);
-
- return SUBREG_REG (target);
-}
-
-static rtx
-ix86_expand_round_builtin (const struct builtin_description *d,
- tree exp, rtx target)
-{
- rtx pat;
- unsigned int i, nargs;
- struct
- {
- rtx op;
- machine_mode mode;
- } args[6];
- enum insn_code icode = d->icode;
- const struct insn_data_d *insn_p = &insn_data[icode];
- machine_mode tmode = insn_p->operand[0].mode;
- unsigned int nargs_constant = 0;
- unsigned int redundant_embed_rnd = 0;
-
- switch ((enum ix86_builtin_func_type) d->flag)
- {
- case UINT64_FTYPE_V2DF_INT:
- case UINT64_FTYPE_V4SF_INT:
- case UINT_FTYPE_V2DF_INT:
- case UINT_FTYPE_V4SF_INT:
- case INT64_FTYPE_V2DF_INT:
- case INT64_FTYPE_V4SF_INT:
- case INT_FTYPE_V2DF_INT:
- case INT_FTYPE_V4SF_INT:
- nargs = 2;
- break;
- case V4SF_FTYPE_V4SF_UINT_INT:
- case V4SF_FTYPE_V4SF_UINT64_INT:
- case V2DF_FTYPE_V2DF_UINT64_INT:
- case V4SF_FTYPE_V4SF_INT_INT:
- case V4SF_FTYPE_V4SF_INT64_INT:
- case V2DF_FTYPE_V2DF_INT64_INT:
- case V4SF_FTYPE_V4SF_V4SF_INT:
- case V2DF_FTYPE_V2DF_V2DF_INT:
- case V4SF_FTYPE_V4SF_V2DF_INT:
- case V2DF_FTYPE_V2DF_V4SF_INT:
- nargs = 3;
- break;
- case V8SF_FTYPE_V8DF_V8SF_QI_INT:
- case V8DF_FTYPE_V8DF_V8DF_QI_INT:
- case V8SI_FTYPE_V8DF_V8SI_QI_INT:
- case V8DI_FTYPE_V8DF_V8DI_QI_INT:
- case V8SF_FTYPE_V8DI_V8SF_QI_INT:
- case V8DF_FTYPE_V8DI_V8DF_QI_INT:
- case V16SF_FTYPE_V16SF_V16SF_HI_INT:
- case V8DI_FTYPE_V8SF_V8DI_QI_INT:
- case V16SF_FTYPE_V16SI_V16SF_HI_INT:
- case V16SI_FTYPE_V16SF_V16SI_HI_INT:
- case V8DF_FTYPE_V8SF_V8DF_QI_INT:
- case V16SF_FTYPE_V16HI_V16SF_HI_INT:
- case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
- case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
- nargs = 4;
- break;
- case V4SF_FTYPE_V4SF_V4SF_INT_INT:
- case V2DF_FTYPE_V2DF_V2DF_INT_INT:
- nargs_constant = 2;
- nargs = 4;
- break;
- case INT_FTYPE_V4SF_V4SF_INT_INT:
- case INT_FTYPE_V2DF_V2DF_INT_INT:
- return ix86_expand_sse_comi_round (d, exp, target);
- case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
- case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
- case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
- case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
- case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
- case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
- case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
- case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
- nargs = 5;
- break;
- case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
- case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
- nargs_constant = 4;
- nargs = 5;
- break;
- case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
- case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
- case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
- case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
- nargs_constant = 3;
- nargs = 5;
- break;
- case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
- case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
- case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
- case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
- case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
- case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT:
- nargs = 6;
- nargs_constant = 4;
- break;
- case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
- case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
- nargs = 6;
- nargs_constant = 3;
- break;
- default:
- gcc_unreachable ();
- }
- gcc_assert (nargs <= ARRAY_SIZE (args));
-
- if (optimize
- || target == 0
- || GET_MODE (target) != tmode
- || !insn_p->operand[0].predicate (target, tmode))
- target = gen_reg_rtx (tmode);
-
- for (i = 0; i < nargs; i++)
- {
- tree arg = CALL_EXPR_ARG (exp, i);
- rtx op = expand_normal (arg);
- machine_mode mode = insn_p->operand[i + 1].mode;
- bool match = insn_p->operand[i + 1].predicate (op, mode);
-
- if (i == nargs - nargs_constant)
- {
- if (!match)
- {
- switch (icode)
- {
- case CODE_FOR_avx512f_getmantv8df_mask_round:
- case CODE_FOR_avx512f_getmantv16sf_mask_round:
- case CODE_FOR_avx512f_vgetmantv2df_round:
- case CODE_FOR_avx512f_vgetmantv2df_mask_round:
- case CODE_FOR_avx512f_vgetmantv4sf_round:
- case CODE_FOR_avx512f_vgetmantv4sf_mask_round:
- error ("the immediate argument must be a 4-bit immediate");
- return const0_rtx;
- case CODE_FOR_avx512f_cmpv8df3_mask_round:
- case CODE_FOR_avx512f_cmpv16sf3_mask_round:
- case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
- case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
- error ("the immediate argument must be a 5-bit immediate");
- return const0_rtx;
- default:
- error ("the immediate argument must be an 8-bit immediate");
- return const0_rtx;
- }
- }
- }
- else if (i == nargs-1)
- {
- if (!insn_p->operand[nargs].predicate (op, SImode))
- {
- error ("incorrect rounding operand");
- return const0_rtx;
- }
-
- /* If there is no rounding use normal version of the pattern. */
- if (INTVAL (op) == NO_ROUND)
- redundant_embed_rnd = 1;
- }
- else
- {
- if (VECTOR_MODE_P (mode))
- op = safe_vector_operand (op, mode);
-
- op = fixup_modeless_constant (op, mode);
-
- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
- {
- if (optimize || !match)
- op = copy_to_mode_reg (mode, op);
- }
- else
- {
- op = copy_to_reg (op);
- op = lowpart_subreg (mode, op, GET_MODE (op));
- }
- }
-
- args[i].op = op;
- args[i].mode = mode;
- }
-
- switch (nargs)
- {
- case 1:
- pat = GEN_FCN (icode) (target, args[0].op);
- break;
- case 2:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
- break;
- case 3:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
- args[2].op);
- break;
- case 4:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
- args[2].op, args[3].op);
- break;
- case 5:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
- args[2].op, args[3].op, args[4].op);
- break;
- case 6:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
- args[2].op, args[3].op, args[4].op,
- args[5].op);
- break;
- default:
- gcc_unreachable ();
- }
-
- if (!pat)
- return 0;
-
- if (redundant_embed_rnd)
- pat = ix86_erase_embedded_rounding (pat);
-
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of special insns
- with variable number of operands. */
-
-static rtx
-ix86_expand_special_args_builtin (const struct builtin_description *d,
- tree exp, rtx target)
-{
- tree arg;
- rtx pat, op;
- unsigned int i, nargs, arg_adjust, memory;
- bool aligned_mem = false;
- struct
- {
- rtx op;
- machine_mode mode;
- } args[3];
- enum insn_code icode = d->icode;
- bool last_arg_constant = false;
- const struct insn_data_d *insn_p = &insn_data[icode];
- machine_mode tmode = insn_p->operand[0].mode;
- enum { load, store } klass;
-
- switch ((enum ix86_builtin_func_type) d->flag)
- {
- case VOID_FTYPE_VOID:
- emit_insn (GEN_FCN (icode) (target));
- return 0;
- case VOID_FTYPE_UINT64:
- case VOID_FTYPE_UNSIGNED:
- nargs = 0;
- klass = store;
- memory = 0;
- break;
-
- case INT_FTYPE_VOID:
- case USHORT_FTYPE_VOID:
- case UINT64_FTYPE_VOID:
- case UINT_FTYPE_VOID:
- case UNSIGNED_FTYPE_VOID:
- nargs = 0;
- klass = load;
- memory = 0;
- break;
- case UINT64_FTYPE_PUNSIGNED:
- case V2DI_FTYPE_PV2DI:
- case V4DI_FTYPE_PV4DI:
- case V32QI_FTYPE_PCCHAR:
- case V16QI_FTYPE_PCCHAR:
- case V8SF_FTYPE_PCV4SF:
- case V8SF_FTYPE_PCFLOAT:
- case V4SF_FTYPE_PCFLOAT:
- case V4DF_FTYPE_PCV2DF:
- case V4DF_FTYPE_PCDOUBLE:
- case V2DF_FTYPE_PCDOUBLE:
- case VOID_FTYPE_PVOID:
- case V8DI_FTYPE_PV8DI:
- nargs = 1;
- klass = load;
- memory = 0;
- switch (icode)
- {
- case CODE_FOR_sse4_1_movntdqa:
- case CODE_FOR_avx2_movntdqa:
- case CODE_FOR_avx512f_movntdqa:
- aligned_mem = true;
- break;
- default:
- break;
- }
- break;
- case VOID_FTYPE_PV2SF_V4SF:
- case VOID_FTYPE_PV8DI_V8DI:
- case VOID_FTYPE_PV4DI_V4DI:
- case VOID_FTYPE_PV2DI_V2DI:
- case VOID_FTYPE_PCHAR_V32QI:
- case VOID_FTYPE_PCHAR_V16QI:
- case VOID_FTYPE_PFLOAT_V16SF:
- case VOID_FTYPE_PFLOAT_V8SF:
- case VOID_FTYPE_PFLOAT_V4SF:
- case VOID_FTYPE_PDOUBLE_V8DF:
- case VOID_FTYPE_PDOUBLE_V4DF:
- case VOID_FTYPE_PDOUBLE_V2DF:
- case VOID_FTYPE_PLONGLONG_LONGLONG:
- case VOID_FTYPE_PULONGLONG_ULONGLONG:
- case VOID_FTYPE_PUNSIGNED_UNSIGNED:
- case VOID_FTYPE_PINT_INT:
- nargs = 1;
- klass = store;
- /* Reserve memory operand for target. */
- memory = ARRAY_SIZE (args);
- switch (icode)
- {
- /* These builtins and instructions require the memory
- to be properly aligned. */
- case CODE_FOR_avx_movntv4di:
- case CODE_FOR_sse2_movntv2di:
- case CODE_FOR_avx_movntv8sf:
- case CODE_FOR_sse_movntv4sf:
- case CODE_FOR_sse4a_vmmovntv4sf:
- case CODE_FOR_avx_movntv4df:
- case CODE_FOR_sse2_movntv2df:
- case CODE_FOR_sse4a_vmmovntv2df:
- case CODE_FOR_sse2_movntidi:
- case CODE_FOR_sse_movntq:
- case CODE_FOR_sse2_movntisi:
- case CODE_FOR_avx512f_movntv16sf:
- case CODE_FOR_avx512f_movntv8df:
- case CODE_FOR_avx512f_movntv8di:
- aligned_mem = true;
- break;
- default:
- break;
- }
- break;
- case VOID_FTYPE_PVOID_PCVOID:
- nargs = 1;
- klass = store;
- memory = 0;
-
- break;
- case V4SF_FTYPE_V4SF_PCV2SF:
- case V2DF_FTYPE_V2DF_PCDOUBLE:
- nargs = 2;
- klass = load;
- memory = 1;
- break;
- case V8SF_FTYPE_PCV8SF_V8SI:
- case V4DF_FTYPE_PCV4DF_V4DI:
- case V4SF_FTYPE_PCV4SF_V4SI:
- case V2DF_FTYPE_PCV2DF_V2DI:
- case V8SI_FTYPE_PCV8SI_V8SI:
- case V4DI_FTYPE_PCV4DI_V4DI:
- case V4SI_FTYPE_PCV4SI_V4SI:
- case V2DI_FTYPE_PCV2DI_V2DI:
- case VOID_FTYPE_INT_INT64:
- nargs = 2;
- klass = load;
- memory = 0;
- break;
- case VOID_FTYPE_PV8DF_V8DF_UQI:
- case VOID_FTYPE_PV4DF_V4DF_UQI:
- case VOID_FTYPE_PV2DF_V2DF_UQI:
- case VOID_FTYPE_PV16SF_V16SF_UHI:
- case VOID_FTYPE_PV8SF_V8SF_UQI:
- case VOID_FTYPE_PV4SF_V4SF_UQI:
- case VOID_FTYPE_PV8DI_V8DI_UQI:
- case VOID_FTYPE_PV4DI_V4DI_UQI:
- case VOID_FTYPE_PV2DI_V2DI_UQI:
- case VOID_FTYPE_PV16SI_V16SI_UHI:
- case VOID_FTYPE_PV8SI_V8SI_UQI:
- case VOID_FTYPE_PV4SI_V4SI_UQI:
- case VOID_FTYPE_PV64QI_V64QI_UDI:
- case VOID_FTYPE_PV32HI_V32HI_USI:
- case VOID_FTYPE_PV32QI_V32QI_USI:
- case VOID_FTYPE_PV16QI_V16QI_UHI:
- case VOID_FTYPE_PV16HI_V16HI_UHI:
- case VOID_FTYPE_PV8HI_V8HI_UQI:
- switch (icode)
- {
- /* These builtins and instructions require the memory
- to be properly aligned. */
- case CODE_FOR_avx512f_storev16sf_mask:
- case CODE_FOR_avx512f_storev16si_mask:
- case CODE_FOR_avx512f_storev8df_mask:
- case CODE_FOR_avx512f_storev8di_mask:
- case CODE_FOR_avx512vl_storev8sf_mask:
- case CODE_FOR_avx512vl_storev8si_mask:
- case CODE_FOR_avx512vl_storev4df_mask:
- case CODE_FOR_avx512vl_storev4di_mask:
- case CODE_FOR_avx512vl_storev4sf_mask:
- case CODE_FOR_avx512vl_storev4si_mask:
- case CODE_FOR_avx512vl_storev2df_mask:
- case CODE_FOR_avx512vl_storev2di_mask:
- aligned_mem = true;
- break;
- default:
- break;
- }
- /* FALLTHRU */
- case VOID_FTYPE_PV8SF_V8SI_V8SF:
- case VOID_FTYPE_PV4DF_V4DI_V4DF:
- case VOID_FTYPE_PV4SF_V4SI_V4SF:
- case VOID_FTYPE_PV2DF_V2DI_V2DF:
- case VOID_FTYPE_PV8SI_V8SI_V8SI:
- case VOID_FTYPE_PV4DI_V4DI_V4DI:
- case VOID_FTYPE_PV4SI_V4SI_V4SI:
- case VOID_FTYPE_PV2DI_V2DI_V2DI:
- case VOID_FTYPE_PV8SI_V8DI_UQI:
- case VOID_FTYPE_PV8HI_V8DI_UQI:
- case VOID_FTYPE_PV16HI_V16SI_UHI:
- case VOID_FTYPE_PUDI_V8DI_UQI:
- case VOID_FTYPE_PV16QI_V16SI_UHI:
- case VOID_FTYPE_PV4SI_V4DI_UQI:
- case VOID_FTYPE_PUDI_V2DI_UQI:
- case VOID_FTYPE_PUDI_V4DI_UQI:
- case VOID_FTYPE_PUSI_V2DI_UQI:
- case VOID_FTYPE_PV8HI_V8SI_UQI:
- case VOID_FTYPE_PUDI_V4SI_UQI:
- case VOID_FTYPE_PUSI_V4DI_UQI:
- case VOID_FTYPE_PUHI_V2DI_UQI:
- case VOID_FTYPE_PUDI_V8SI_UQI:
- case VOID_FTYPE_PUSI_V4SI_UQI:
- case VOID_FTYPE_PCHAR_V64QI_UDI:
- case VOID_FTYPE_PCHAR_V32QI_USI:
- case VOID_FTYPE_PCHAR_V16QI_UHI:
- case VOID_FTYPE_PSHORT_V32HI_USI:
- case VOID_FTYPE_PSHORT_V16HI_UHI:
- case VOID_FTYPE_PSHORT_V8HI_UQI:
- case VOID_FTYPE_PINT_V16SI_UHI:
- case VOID_FTYPE_PINT_V8SI_UQI:
- case VOID_FTYPE_PINT_V4SI_UQI:
- case VOID_FTYPE_PINT64_V8DI_UQI:
- case VOID_FTYPE_PINT64_V4DI_UQI:
- case VOID_FTYPE_PINT64_V2DI_UQI:
- case VOID_FTYPE_PDOUBLE_V8DF_UQI:
- case VOID_FTYPE_PDOUBLE_V4DF_UQI:
- case VOID_FTYPE_PDOUBLE_V2DF_UQI:
- case VOID_FTYPE_PFLOAT_V16SF_UHI:
- case VOID_FTYPE_PFLOAT_V8SF_UQI:
- case VOID_FTYPE_PFLOAT_V4SF_UQI:
- case VOID_FTYPE_PV32QI_V32HI_USI:
- case VOID_FTYPE_PV16QI_V16HI_UHI:
- case VOID_FTYPE_PUDI_V8HI_UQI:
- nargs = 2;
- klass = store;
- /* Reserve memory operand for target. */
- memory = ARRAY_SIZE (args);
- break;
- case V4SF_FTYPE_PCV4SF_V4SF_UQI:
- case V8SF_FTYPE_PCV8SF_V8SF_UQI:
- case V16SF_FTYPE_PCV16SF_V16SF_UHI:
- case V4SI_FTYPE_PCV4SI_V4SI_UQI:
- case V8SI_FTYPE_PCV8SI_V8SI_UQI:
- case V16SI_FTYPE_PCV16SI_V16SI_UHI:
- case V2DF_FTYPE_PCV2DF_V2DF_UQI:
- case V4DF_FTYPE_PCV4DF_V4DF_UQI:
- case V8DF_FTYPE_PCV8DF_V8DF_UQI:
- case V2DI_FTYPE_PCV2DI_V2DI_UQI:
- case V4DI_FTYPE_PCV4DI_V4DI_UQI:
- case V8DI_FTYPE_PCV8DI_V8DI_UQI:
- case V64QI_FTYPE_PCV64QI_V64QI_UDI:
- case V32HI_FTYPE_PCV32HI_V32HI_USI:
- case V32QI_FTYPE_PCV32QI_V32QI_USI:
- case V16QI_FTYPE_PCV16QI_V16QI_UHI:
- case V16HI_FTYPE_PCV16HI_V16HI_UHI:
- case V8HI_FTYPE_PCV8HI_V8HI_UQI:
- switch (icode)
- {
- /* These builtins and instructions require the memory
- to be properly aligned. */
- case CODE_FOR_avx512f_loadv16sf_mask:
- case CODE_FOR_avx512f_loadv16si_mask:
- case CODE_FOR_avx512f_loadv8df_mask:
- case CODE_FOR_avx512f_loadv8di_mask:
- case CODE_FOR_avx512vl_loadv8sf_mask:
- case CODE_FOR_avx512vl_loadv8si_mask:
- case CODE_FOR_avx512vl_loadv4df_mask:
- case CODE_FOR_avx512vl_loadv4di_mask:
- case CODE_FOR_avx512vl_loadv4sf_mask:
- case CODE_FOR_avx512vl_loadv4si_mask:
- case CODE_FOR_avx512vl_loadv2df_mask:
- case CODE_FOR_avx512vl_loadv2di_mask:
- case CODE_FOR_avx512bw_loadv64qi_mask:
- case CODE_FOR_avx512vl_loadv32qi_mask:
- case CODE_FOR_avx512vl_loadv16qi_mask:
- case CODE_FOR_avx512bw_loadv32hi_mask:
- case CODE_FOR_avx512vl_loadv16hi_mask:
- case CODE_FOR_avx512vl_loadv8hi_mask:
- aligned_mem = true;
- break;
- default:
- break;
- }
- /* FALLTHRU */
- case V64QI_FTYPE_PCCHAR_V64QI_UDI:
- case V32QI_FTYPE_PCCHAR_V32QI_USI:
- case V16QI_FTYPE_PCCHAR_V16QI_UHI:
- case V32HI_FTYPE_PCSHORT_V32HI_USI:
- case V16HI_FTYPE_PCSHORT_V16HI_UHI:
- case V8HI_FTYPE_PCSHORT_V8HI_UQI:
- case V16SI_FTYPE_PCINT_V16SI_UHI:
- case V8SI_FTYPE_PCINT_V8SI_UQI:
- case V4SI_FTYPE_PCINT_V4SI_UQI:
- case V8DI_FTYPE_PCINT64_V8DI_UQI:
- case V4DI_FTYPE_PCINT64_V4DI_UQI:
- case V2DI_FTYPE_PCINT64_V2DI_UQI:
- case V8DF_FTYPE_PCDOUBLE_V8DF_UQI:
- case V4DF_FTYPE_PCDOUBLE_V4DF_UQI:
- case V2DF_FTYPE_PCDOUBLE_V2DF_UQI:
- case V16SF_FTYPE_PCFLOAT_V16SF_UHI:
- case V8SF_FTYPE_PCFLOAT_V8SF_UQI:
- case V4SF_FTYPE_PCFLOAT_V4SF_UQI:
- nargs = 3;
- klass = load;
- memory = 0;
- break;
- case VOID_FTYPE_UINT_UINT_UINT:
- case VOID_FTYPE_UINT64_UINT_UINT:
- case UCHAR_FTYPE_UINT_UINT_UINT:
- case UCHAR_FTYPE_UINT64_UINT_UINT:
- nargs = 3;
- klass = load;
- memory = ARRAY_SIZE (args);
- last_arg_constant = true;
- break;
- default:
- gcc_unreachable ();
- }
-
- gcc_assert (nargs <= ARRAY_SIZE (args));
-
- if (klass == store)
- {
- arg = CALL_EXPR_ARG (exp, 0);
- op = expand_normal (arg);
- gcc_assert (target == 0);
- if (memory)
- {
- op = ix86_zero_extend_to_Pmode (op);
- target = gen_rtx_MEM (tmode, op);
- /* target at this point has just BITS_PER_UNIT MEM_ALIGN
- on it. Try to improve it using get_pointer_alignment,
- and if the special builtin is one that requires strict
- mode alignment, also from it's GET_MODE_ALIGNMENT.
- Failure to do so could lead to ix86_legitimate_combined_insn
- rejecting all changes to such insns. */
- unsigned int align = get_pointer_alignment (arg);
- if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
- align = GET_MODE_ALIGNMENT (tmode);
- if (MEM_ALIGN (target) < align)
- set_mem_align (target, align);
- }
- else
- target = force_reg (tmode, op);
- arg_adjust = 1;
- }
- else
- {
- arg_adjust = 0;
- if (optimize
- || target == 0
- || !register_operand (target, tmode)
- || GET_MODE (target) != tmode)
- target = gen_reg_rtx (tmode);
- }
-
- for (i = 0; i < nargs; i++)
- {
- machine_mode mode = insn_p->operand[i + 1].mode;
- bool match;
-
- arg = CALL_EXPR_ARG (exp, i + arg_adjust);
- op = expand_normal (arg);
- match = insn_p->operand[i + 1].predicate (op, mode);
-
- if (last_arg_constant && (i + 1) == nargs)
- {
- if (!match)
- {
- if (icode == CODE_FOR_lwp_lwpvalsi3
- || icode == CODE_FOR_lwp_lwpinssi3
- || icode == CODE_FOR_lwp_lwpvaldi3
- || icode == CODE_FOR_lwp_lwpinsdi3)
- error ("the last argument must be a 32-bit immediate");
- else
- error ("the last argument must be an 8-bit immediate");
- return const0_rtx;
- }
- }
- else
- {
- if (i == memory)
- {
- /* This must be the memory operand. */
- op = ix86_zero_extend_to_Pmode (op);
- op = gen_rtx_MEM (mode, op);
- /* op at this point has just BITS_PER_UNIT MEM_ALIGN
- on it. Try to improve it using get_pointer_alignment,
- and if the special builtin is one that requires strict
- mode alignment, also from it's GET_MODE_ALIGNMENT.
- Failure to do so could lead to ix86_legitimate_combined_insn
- rejecting all changes to such insns. */
- unsigned int align = get_pointer_alignment (arg);
- if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
- align = GET_MODE_ALIGNMENT (mode);
- if (MEM_ALIGN (op) < align)
- set_mem_align (op, align);
- }
- else
- {
- /* This must be register. */
- if (VECTOR_MODE_P (mode))
- op = safe_vector_operand (op, mode);
-
- op = fixup_modeless_constant (op, mode);
-
- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
- op = copy_to_mode_reg (mode, op);
- else
- {
- op = copy_to_reg (op);
- op = lowpart_subreg (mode, op, GET_MODE (op));
- }
- }
- }
-
- args[i].op = op;
- args[i].mode = mode;
- }
-
- switch (nargs)
- {
- case 0:
- pat = GEN_FCN (icode) (target);
- break;
- case 1:
- pat = GEN_FCN (icode) (target, args[0].op);
- break;
- case 2:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
- break;
- case 3:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
- break;
- default:
- gcc_unreachable ();
- }
-
- if (! pat)
- return 0;
- emit_insn (pat);
- return klass == store ? 0 : target;
-}
-
-/* Return the integer constant in ARG. Constrain it to be in the range
- of the subparts of VEC_TYPE; issue an error if not. */
-
-static int
-get_element_number (tree vec_type, tree arg)
-{
- unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
-
- if (!tree_fits_uhwi_p (arg)
- || (elt = tree_to_uhwi (arg), elt > max))
- {
- error ("selector must be an integer constant in the range "
- "[0, %wi]", max);
- return 0;
- }
-
- return elt;
-}
-
-/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
- ix86_expand_vector_init. We DO have language-level syntax for this, in
- the form of (type){ init-list }. Except that since we can't place emms
- instructions from inside the compiler, we can't allow the use of MMX
- registers unless the user explicitly asks for it. So we do *not* define
- vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
- we have builtins invoked by mmintrin.h that gives us license to emit
- these sorts of instructions. */
-
-static rtx
-ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
-{
- machine_mode tmode = TYPE_MODE (type);
- machine_mode inner_mode = GET_MODE_INNER (tmode);
- int i, n_elt = GET_MODE_NUNITS (tmode);
- rtvec v = rtvec_alloc (n_elt);
-
- gcc_assert (VECTOR_MODE_P (tmode));
- gcc_assert (call_expr_nargs (exp) == n_elt);
-
- for (i = 0; i < n_elt; ++i)
- {
- rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
- RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
- }
-
- if (!target || !register_operand (target, tmode))
- target = gen_reg_rtx (tmode);
-
- ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
- return target;
-}
-
-/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
- ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
- had a language-level syntax for referencing vector elements. */
-
-static rtx
-ix86_expand_vec_ext_builtin (tree exp, rtx target)
-{
- machine_mode tmode, mode0;
- tree arg0, arg1;
- int elt;
- rtx op0;
-
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
-
- op0 = expand_normal (arg0);
- elt = get_element_number (TREE_TYPE (arg0), arg1);
-
- tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
- mode0 = TYPE_MODE (TREE_TYPE (arg0));
- gcc_assert (VECTOR_MODE_P (mode0));
-
- op0 = force_reg (mode0, op0);
-
- if (optimize || !target || !register_operand (target, tmode))
- target = gen_reg_rtx (tmode);
-
- ix86_expand_vector_extract (true, target, op0, elt);
-
- return target;
-}
-
-/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
- ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
- a language-level syntax for referencing vector elements. */
-
-static rtx
-ix86_expand_vec_set_builtin (tree exp)
-{
- machine_mode tmode, mode1;
- tree arg0, arg1, arg2;
- int elt;
- rtx op0, op1, target;
-
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
-
- tmode = TYPE_MODE (TREE_TYPE (arg0));
- mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
- gcc_assert (VECTOR_MODE_P (tmode));
-
- op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
- op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
- elt = get_element_number (TREE_TYPE (arg0), arg2);
-
- if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
- op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
-
- op0 = force_reg (tmode, op0);
- op1 = force_reg (mode1, op1);
-
- /* OP0 is the source of these builtin functions and shouldn't be
- modified. Create a copy, use it and return it as target. */
- target = gen_reg_rtx (tmode);
- emit_move_insn (target, op0);
- ix86_expand_vector_set (true, target, op1, elt);
-
- return target;
-}
-
-/* Expand an expression EXP that calls a built-in function,
- with result going to TARGET if that's convenient
- (and in mode MODE if that's convenient).
- SUBTARGET may be used as the target for computing one of EXP's operands.
- IGNORE is nonzero if the value is to be ignored. */
-
-rtx
-ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
- machine_mode mode, int ignore)
-{
- size_t i;
- enum insn_code icode, icode2;
- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- tree arg0, arg1, arg2, arg3, arg4;
- rtx op0, op1, op2, op3, op4, pat, pat2, insn;
- machine_mode mode0, mode1, mode2, mode3, mode4;
- unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
-
- /* For CPU builtins that can be folded, fold first and expand the fold. */
- switch (fcode)
- {
- case IX86_BUILTIN_CPU_INIT:
- {
- /* Make it call __cpu_indicator_init in libgcc. */
- tree call_expr, fndecl, type;
- type = build_function_type_list (integer_type_node, NULL_TREE);
- fndecl = build_fn_decl ("__cpu_indicator_init", type);
- call_expr = build_call_expr (fndecl, 0);
- return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
- }
- case IX86_BUILTIN_CPU_IS:
- case IX86_BUILTIN_CPU_SUPPORTS:
- {
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
- gcc_assert (fold_expr != NULL_TREE);
- return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
- }
- }
-
- HOST_WIDE_INT isa = ix86_isa_flags;
- HOST_WIDE_INT isa2 = ix86_isa_flags2;
- HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa;
- HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2;
- /* The general case is we require all the ISAs specified in bisa{,2}
- to be enabled.
- The exceptions are:
- OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
- OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32
- OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4
- where for each such pair it is sufficient if either of the ISAs is
- enabled, plus if it is ored with other options also those others.
- OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */
- if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
- == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A))
- && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0)
- isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A);
- if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
- == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32))
- && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0)
- isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32);
- if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
- == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4))
- && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0)
- isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4);
- if ((bisa & OPTION_MASK_ISA_MMX) && !TARGET_MMX && TARGET_MMX_WITH_SSE)
- {
- bisa &= ~OPTION_MASK_ISA_MMX;
- bisa |= OPTION_MASK_ISA_SSE2;
- }
- if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
- {
- bool add_abi_p = bisa & OPTION_MASK_ISA_64BIT;
- if (TARGET_ABI_X32)
- bisa |= OPTION_MASK_ABI_X32;
- else
- bisa |= OPTION_MASK_ABI_64;
- char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
- (enum fpmath_unit) 0,
- (enum prefer_vector_width) 0,
- false, add_abi_p);
- if (!opts)
- error ("%qE needs unknown isa option", fndecl);
- else
- {
- gcc_assert (opts != NULL);
- error ("%qE needs isa option %s", fndecl, opts);
- free (opts);
- }
- return expand_call (exp, target, ignore);
- }
-
- switch (fcode)
- {
- case IX86_BUILTIN_MASKMOVQ:
- case IX86_BUILTIN_MASKMOVDQU:
- icode = (fcode == IX86_BUILTIN_MASKMOVQ
- ? CODE_FOR_mmx_maskmovq
- : CODE_FOR_sse2_maskmovdqu);
- /* Note the arg order is different from the operand order. */
- arg1 = CALL_EXPR_ARG (exp, 0);
- arg2 = CALL_EXPR_ARG (exp, 1);
- arg0 = CALL_EXPR_ARG (exp, 2);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- mode0 = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
-
- op0 = ix86_zero_extend_to_Pmode (op0);
- op0 = gen_rtx_MEM (mode1, op0);
-
- if (!insn_data[icode].operand[0].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (!insn_data[icode].operand[1].predicate (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- if (!insn_data[icode].operand[2].predicate (op2, mode2))
- op2 = copy_to_mode_reg (mode2, op2);
- pat = GEN_FCN (icode) (op0, op1, op2);
- if (! pat)
- return 0;
- emit_insn (pat);
- return 0;
-
- case IX86_BUILTIN_LDMXCSR:
- op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
- target = assign_386_stack_local (SImode, SLOT_TEMP);
- emit_move_insn (target, op0);
- emit_insn (gen_sse_ldmxcsr (target));
- return 0;
-
- case IX86_BUILTIN_STMXCSR:
- target = assign_386_stack_local (SImode, SLOT_TEMP);
- emit_insn (gen_sse_stmxcsr (target));
- return copy_to_mode_reg (SImode, target);
-
- case IX86_BUILTIN_CLFLUSH:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- icode = CODE_FOR_sse2_clflush;
- if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = ix86_zero_extend_to_Pmode (op0);
-
- emit_insn (gen_sse2_clflush (op0));
- return 0;
-
- case IX86_BUILTIN_CLWB:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- icode = CODE_FOR_clwb;
- if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = ix86_zero_extend_to_Pmode (op0);
-
- emit_insn (gen_clwb (op0));
- return 0;
-
- case IX86_BUILTIN_CLFLUSHOPT:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- icode = CODE_FOR_clflushopt;
- if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = ix86_zero_extend_to_Pmode (op0);
-
- emit_insn (gen_clflushopt (op0));
- return 0;
-
- case IX86_BUILTIN_MONITOR:
- case IX86_BUILTIN_MONITORX:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- if (!REG_P (op0))
- op0 = ix86_zero_extend_to_Pmode (op0);
- if (!REG_P (op1))
- op1 = copy_to_mode_reg (SImode, op1);
- if (!REG_P (op2))
- op2 = copy_to_mode_reg (SImode, op2);
-
- emit_insn (fcode == IX86_BUILTIN_MONITOR
- ? gen_sse3_monitor (Pmode, op0, op1, op2)
- : gen_monitorx (Pmode, op0, op1, op2));
- return 0;
-
- case IX86_BUILTIN_MWAIT:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- if (!REG_P (op0))
- op0 = copy_to_mode_reg (SImode, op0);
- if (!REG_P (op1))
- op1 = copy_to_mode_reg (SImode, op1);
- emit_insn (gen_sse3_mwait (op0, op1));
- return 0;
-
- case IX86_BUILTIN_MWAITX:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- if (!REG_P (op0))
- op0 = copy_to_mode_reg (SImode, op0);
- if (!REG_P (op1))
- op1 = copy_to_mode_reg (SImode, op1);
- if (!REG_P (op2))
- op2 = copy_to_mode_reg (SImode, op2);
- emit_insn (gen_mwaitx (op0, op1, op2));
- return 0;
-
- case IX86_BUILTIN_UMONITOR:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
-
- op0 = ix86_zero_extend_to_Pmode (op0);
- emit_insn (gen_umonitor (Pmode, op0));
- return 0;
-
- case IX86_BUILTIN_UMWAIT:
- case IX86_BUILTIN_TPAUSE:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
-
- if (!REG_P (op0))
- op0 = copy_to_mode_reg (SImode, op0);
-
- op1 = force_reg (DImode, op1);
-
- if (TARGET_64BIT)
- {
- op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
- NULL, 1, OPTAB_DIRECT);
- switch (fcode)
- {
- case IX86_BUILTIN_UMWAIT:
- icode = CODE_FOR_umwait_rex64;
- break;
- case IX86_BUILTIN_TPAUSE:
- icode = CODE_FOR_tpause_rex64;
- break;
- default:
- gcc_unreachable ();
- }
-
- op2 = gen_lowpart (SImode, op2);
- op1 = gen_lowpart (SImode, op1);
- pat = GEN_FCN (icode) (op0, op1, op2);
- }
- else
- {
- switch (fcode)
- {
- case IX86_BUILTIN_UMWAIT:
- icode = CODE_FOR_umwait;
- break;
- case IX86_BUILTIN_TPAUSE:
- icode = CODE_FOR_tpause;
- break;
- default:
- gcc_unreachable ();
- }
- pat = GEN_FCN (icode) (op0, op1);
- }
-
- if (!pat)
- return 0;
-
- emit_insn (pat);
-
- if (target == 0
- || !register_operand (target, QImode))
- target = gen_reg_rtx (QImode);
-
- pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
- const0_rtx);
- emit_insn (gen_rtx_SET (target, pat));
-
- return target;
-
- case IX86_BUILTIN_CLZERO:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- if (!REG_P (op0))
- op0 = ix86_zero_extend_to_Pmode (op0);
- emit_insn (gen_clzero (Pmode, op0));
- return 0;
-
- case IX86_BUILTIN_CLDEMOTE:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- icode = CODE_FOR_cldemote;
- if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = ix86_zero_extend_to_Pmode (op0);
-
- emit_insn (gen_cldemote (op0));
- return 0;
-
- case IX86_BUILTIN_VEC_INIT_V2SI:
- case IX86_BUILTIN_VEC_INIT_V4HI:
- case IX86_BUILTIN_VEC_INIT_V8QI:
- return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
-
- case IX86_BUILTIN_VEC_EXT_V2DF:
- case IX86_BUILTIN_VEC_EXT_V2DI:
- case IX86_BUILTIN_VEC_EXT_V4SF:
- case IX86_BUILTIN_VEC_EXT_V4SI:
- case IX86_BUILTIN_VEC_EXT_V8HI:
- case IX86_BUILTIN_VEC_EXT_V2SI:
- case IX86_BUILTIN_VEC_EXT_V4HI:
- case IX86_BUILTIN_VEC_EXT_V16QI:
- return ix86_expand_vec_ext_builtin (exp, target);
-
- case IX86_BUILTIN_VEC_SET_V2DI:
- case IX86_BUILTIN_VEC_SET_V4SF:
- case IX86_BUILTIN_VEC_SET_V4SI:
- case IX86_BUILTIN_VEC_SET_V8HI:
- case IX86_BUILTIN_VEC_SET_V4HI:
- case IX86_BUILTIN_VEC_SET_V16QI:
- return ix86_expand_vec_set_builtin (exp);
-
- case IX86_BUILTIN_NANQ:
- case IX86_BUILTIN_NANSQ:
- return expand_call (exp, target, ignore);
-
- case IX86_BUILTIN_RDPID:
-
- op0 = gen_reg_rtx (word_mode);
-
- if (TARGET_64BIT)
- {
- insn = gen_rdpid_rex64 (op0);
- op0 = convert_to_mode (SImode, op0, 1);
- }
- else
- insn = gen_rdpid (op0);
-
- emit_insn (insn);
-
- if (target == 0
- || !register_operand (target, SImode))
- target = gen_reg_rtx (SImode);
-
- emit_move_insn (target, op0);
- return target;
-
- case IX86_BUILTIN_2INTERSECTD512:
- case IX86_BUILTIN_2INTERSECTQ512:
- case IX86_BUILTIN_2INTERSECTD256:
- case IX86_BUILTIN_2INTERSECTQ256:
- case IX86_BUILTIN_2INTERSECTD128:
- case IX86_BUILTIN_2INTERSECTQ128:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- arg3 = CALL_EXPR_ARG (exp, 3);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- op3 = expand_normal (arg3);
-
- if (!address_operand (op0, VOIDmode))
- {
- op0 = convert_memory_address (Pmode, op0);
- op0 = copy_addr_to_reg (op0);
- }
- if (!address_operand (op1, VOIDmode))
- {
- op1 = convert_memory_address (Pmode, op1);
- op1 = copy_addr_to_reg (op1);
- }
-
- switch (fcode)
- {
- case IX86_BUILTIN_2INTERSECTD512:
- mode4 = P2HImode;
- icode = CODE_FOR_avx512vp2intersect_2intersectv16si;
- break;
- case IX86_BUILTIN_2INTERSECTQ512:
- mode4 = P2QImode;
- icode = CODE_FOR_avx512vp2intersect_2intersectv8di;
- break;
- case IX86_BUILTIN_2INTERSECTD256:
- mode4 = P2QImode;
- icode = CODE_FOR_avx512vp2intersect_2intersectv8si;
- break;
- case IX86_BUILTIN_2INTERSECTQ256:
- mode4 = P2QImode;
- icode = CODE_FOR_avx512vp2intersect_2intersectv4di;
- break;
- case IX86_BUILTIN_2INTERSECTD128:
- mode4 = P2QImode;
- icode = CODE_FOR_avx512vp2intersect_2intersectv4si;
- break;
- case IX86_BUILTIN_2INTERSECTQ128:
- mode4 = P2QImode;
- icode = CODE_FOR_avx512vp2intersect_2intersectv2di;
- break;
- default:
- gcc_unreachable ();
- }
-
- mode2 = insn_data[icode].operand[1].mode;
- mode3 = insn_data[icode].operand[2].mode;
- if (!insn_data[icode].operand[1].predicate (op2, mode2))
- op2 = copy_to_mode_reg (mode2, op2);
- if (!insn_data[icode].operand[2].predicate (op3, mode3))
- op3 = copy_to_mode_reg (mode3, op3);
-
- op4 = gen_reg_rtx (mode4);
- emit_insn (GEN_FCN (icode) (op4, op2, op3));
- mode0 = mode4 == P2HImode ? HImode : QImode;
- emit_move_insn (gen_rtx_MEM (mode0, op0),
- gen_lowpart (mode0, op4));
- emit_move_insn (gen_rtx_MEM (mode0, op1),
- gen_highpart (mode0, op4));
-
- return 0;
-
- case IX86_BUILTIN_RDPMC:
- case IX86_BUILTIN_RDTSC:
- case IX86_BUILTIN_RDTSCP:
- case IX86_BUILTIN_XGETBV:
-
- op0 = gen_reg_rtx (DImode);
- op1 = gen_reg_rtx (DImode);
-
- if (fcode == IX86_BUILTIN_RDPMC)
- {
- arg0 = CALL_EXPR_ARG (exp, 0);
- op2 = expand_normal (arg0);
- if (!register_operand (op2, SImode))
- op2 = copy_to_mode_reg (SImode, op2);
-
- insn = (TARGET_64BIT
- ? gen_rdpmc_rex64 (op0, op1, op2)
- : gen_rdpmc (op0, op2));
- emit_insn (insn);
- }
- else if (fcode == IX86_BUILTIN_XGETBV)
- {
- arg0 = CALL_EXPR_ARG (exp, 0);
- op2 = expand_normal (arg0);
- if (!register_operand (op2, SImode))
- op2 = copy_to_mode_reg (SImode, op2);
-
- insn = (TARGET_64BIT
- ? gen_xgetbv_rex64 (op0, op1, op2)
- : gen_xgetbv (op0, op2));
- emit_insn (insn);
- }
- else if (fcode == IX86_BUILTIN_RDTSC)
- {
- insn = (TARGET_64BIT
- ? gen_rdtsc_rex64 (op0, op1)
- : gen_rdtsc (op0));
- emit_insn (insn);
- }
- else
- {
- op2 = gen_reg_rtx (SImode);
-
- insn = (TARGET_64BIT
- ? gen_rdtscp_rex64 (op0, op1, op2)
- : gen_rdtscp (op0, op2));
- emit_insn (insn);
-
- arg0 = CALL_EXPR_ARG (exp, 0);
- op4 = expand_normal (arg0);
- if (!address_operand (op4, VOIDmode))
- {
- op4 = convert_memory_address (Pmode, op4);
- op4 = copy_addr_to_reg (op4);
- }
- emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
- }
-
- if (target == 0
- || !register_operand (target, DImode))
- target = gen_reg_rtx (DImode);
-
- if (TARGET_64BIT)
- {
- op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
- op1, 1, OPTAB_DIRECT);
- op0 = expand_simple_binop (DImode, IOR, op0, op1,
- op0, 1, OPTAB_DIRECT);
- }
-
- emit_move_insn (target, op0);
- return target;
-
- case IX86_BUILTIN_ENQCMD:
- case IX86_BUILTIN_ENQCMDS:
- case IX86_BUILTIN_MOVDIR64B:
-
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
-
- op0 = ix86_zero_extend_to_Pmode (op0);
- if (!address_operand (op1, VOIDmode))
- {
- op1 = convert_memory_address (Pmode, op1);
- op1 = copy_addr_to_reg (op1);
- }
- op1 = gen_rtx_MEM (XImode, op1);
-
- if (fcode == IX86_BUILTIN_MOVDIR64B)
- {
- emit_insn (gen_movdir64b (Pmode, op0, op1));
- return 0;
- }
- else
- {
- rtx pat;
-
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- if (fcode == IX86_BUILTIN_ENQCMD)
- pat = gen_enqcmd (UNSPECV_ENQCMD, Pmode, op0, op1);
- else
- pat = gen_enqcmd (UNSPECV_ENQCMDS, Pmode, op0, op1);
-
- emit_insn (pat);
-
- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (EQ, QImode,
- SET_DEST (pat),
- const0_rtx)));
-
- return SUBREG_REG (target);
- }
-
- case IX86_BUILTIN_FXSAVE:
- case IX86_BUILTIN_FXRSTOR:
- case IX86_BUILTIN_FXSAVE64:
- case IX86_BUILTIN_FXRSTOR64:
- case IX86_BUILTIN_FNSTENV:
- case IX86_BUILTIN_FLDENV:
- mode0 = BLKmode;
- switch (fcode)
- {
- case IX86_BUILTIN_FXSAVE:
- icode = CODE_FOR_fxsave;
- break;
- case IX86_BUILTIN_FXRSTOR:
- icode = CODE_FOR_fxrstor;
- break;
- case IX86_BUILTIN_FXSAVE64:
- icode = CODE_FOR_fxsave64;
- break;
- case IX86_BUILTIN_FXRSTOR64:
- icode = CODE_FOR_fxrstor64;
- break;
- case IX86_BUILTIN_FNSTENV:
- icode = CODE_FOR_fnstenv;
- break;
- case IX86_BUILTIN_FLDENV:
- icode = CODE_FOR_fldenv;
- break;
- default:
- gcc_unreachable ();
- }
-
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
-
- if (!address_operand (op0, VOIDmode))
- {
- op0 = convert_memory_address (Pmode, op0);
- op0 = copy_addr_to_reg (op0);
- }
- op0 = gen_rtx_MEM (mode0, op0);
-
- pat = GEN_FCN (icode) (op0);
- if (pat)
- emit_insn (pat);
- return 0;
-
- case IX86_BUILTIN_XSETBV:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
-
- if (!REG_P (op0))
- op0 = copy_to_mode_reg (SImode, op0);
-
- op1 = force_reg (DImode, op1);
-
- if (TARGET_64BIT)
- {
- op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
- NULL, 1, OPTAB_DIRECT);
-
- icode = CODE_FOR_xsetbv_rex64;
-
- op2 = gen_lowpart (SImode, op2);
- op1 = gen_lowpart (SImode, op1);
- pat = GEN_FCN (icode) (op0, op1, op2);
- }
- else
- {
- icode = CODE_FOR_xsetbv;
-
- pat = GEN_FCN (icode) (op0, op1);
- }
- if (pat)
- emit_insn (pat);
- return 0;
-
- case IX86_BUILTIN_XSAVE:
- case IX86_BUILTIN_XRSTOR:
- case IX86_BUILTIN_XSAVE64:
- case IX86_BUILTIN_XRSTOR64:
- case IX86_BUILTIN_XSAVEOPT:
- case IX86_BUILTIN_XSAVEOPT64:
- case IX86_BUILTIN_XSAVES:
- case IX86_BUILTIN_XRSTORS:
- case IX86_BUILTIN_XSAVES64:
- case IX86_BUILTIN_XRSTORS64:
- case IX86_BUILTIN_XSAVEC:
- case IX86_BUILTIN_XSAVEC64:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
-
- if (!address_operand (op0, VOIDmode))
- {
- op0 = convert_memory_address (Pmode, op0);
- op0 = copy_addr_to_reg (op0);
- }
- op0 = gen_rtx_MEM (BLKmode, op0);
-
- op1 = force_reg (DImode, op1);
-
- if (TARGET_64BIT)
- {
- op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
- NULL, 1, OPTAB_DIRECT);
- switch (fcode)
- {
- case IX86_BUILTIN_XSAVE:
- icode = CODE_FOR_xsave_rex64;
- break;
- case IX86_BUILTIN_XRSTOR:
- icode = CODE_FOR_xrstor_rex64;
- break;
- case IX86_BUILTIN_XSAVE64:
- icode = CODE_FOR_xsave64;
- break;
- case IX86_BUILTIN_XRSTOR64:
- icode = CODE_FOR_xrstor64;
- break;
- case IX86_BUILTIN_XSAVEOPT:
- icode = CODE_FOR_xsaveopt_rex64;
- break;
- case IX86_BUILTIN_XSAVEOPT64:
- icode = CODE_FOR_xsaveopt64;
- break;
- case IX86_BUILTIN_XSAVES:
- icode = CODE_FOR_xsaves_rex64;
- break;
- case IX86_BUILTIN_XRSTORS:
- icode = CODE_FOR_xrstors_rex64;
- break;
- case IX86_BUILTIN_XSAVES64:
- icode = CODE_FOR_xsaves64;
- break;
- case IX86_BUILTIN_XRSTORS64:
- icode = CODE_FOR_xrstors64;
- break;
- case IX86_BUILTIN_XSAVEC:
- icode = CODE_FOR_xsavec_rex64;
- break;
- case IX86_BUILTIN_XSAVEC64:
- icode = CODE_FOR_xsavec64;
- break;
- default:
- gcc_unreachable ();
- }
-
- op2 = gen_lowpart (SImode, op2);
- op1 = gen_lowpart (SImode, op1);
- pat = GEN_FCN (icode) (op0, op1, op2);
- }
- else
- {
- switch (fcode)
- {
- case IX86_BUILTIN_XSAVE:
- icode = CODE_FOR_xsave;
- break;
- case IX86_BUILTIN_XRSTOR:
- icode = CODE_FOR_xrstor;
- break;
- case IX86_BUILTIN_XSAVEOPT:
- icode = CODE_FOR_xsaveopt;
- break;
- case IX86_BUILTIN_XSAVES:
- icode = CODE_FOR_xsaves;
- break;
- case IX86_BUILTIN_XRSTORS:
- icode = CODE_FOR_xrstors;
- break;
- case IX86_BUILTIN_XSAVEC:
- icode = CODE_FOR_xsavec;
- break;
- default:
- gcc_unreachable ();
- }
- pat = GEN_FCN (icode) (op0, op1);
- }
-
- if (pat)
- emit_insn (pat);
- return 0;
-
- case IX86_BUILTIN_LLWPCB:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- icode = CODE_FOR_lwp_llwpcb;
- if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = ix86_zero_extend_to_Pmode (op0);
- emit_insn (gen_lwp_llwpcb (op0));
- return 0;
-
- case IX86_BUILTIN_SLWPCB:
- icode = CODE_FOR_lwp_slwpcb;
- if (!target
- || !insn_data[icode].operand[0].predicate (target, Pmode))
- target = gen_reg_rtx (Pmode);
- emit_insn (gen_lwp_slwpcb (target));
- return target;
-
- case IX86_BUILTIN_BEXTRI32:
- case IX86_BUILTIN_BEXTRI64:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- icode = (fcode == IX86_BUILTIN_BEXTRI32
- ? CODE_FOR_tbm_bextri_si
- : CODE_FOR_tbm_bextri_di);
- if (!CONST_INT_P (op1))
- {
- error ("last argument must be an immediate");
- return const0_rtx;
- }
- else
- {
- unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
- unsigned char lsb_index = INTVAL (op1) & 0xFF;
- op1 = GEN_INT (length);
- op2 = GEN_INT (lsb_index);
-
- mode1 = insn_data[icode].operand[1].mode;
- if (!insn_data[icode].operand[1].predicate (op0, mode1))
- op0 = copy_to_mode_reg (mode1, op0);
-
- mode0 = insn_data[icode].operand[0].mode;
- if (target == 0
- || !register_operand (target, mode0))
- target = gen_reg_rtx (mode0);
-
- pat = GEN_FCN (icode) (target, op0, op1, op2);
- if (pat)
- emit_insn (pat);
- return target;
- }
-
- case IX86_BUILTIN_RDRAND16_STEP:
- icode = CODE_FOR_rdrandhi_1;
- mode0 = HImode;
- goto rdrand_step;
-
- case IX86_BUILTIN_RDRAND32_STEP:
- icode = CODE_FOR_rdrandsi_1;
- mode0 = SImode;
- goto rdrand_step;
-
- case IX86_BUILTIN_RDRAND64_STEP:
- icode = CODE_FOR_rdranddi_1;
- mode0 = DImode;
-
-rdrand_step:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op1 = expand_normal (arg0);
- if (!address_operand (op1, VOIDmode))
- {
- op1 = convert_memory_address (Pmode, op1);
- op1 = copy_addr_to_reg (op1);
- }
-
- op0 = gen_reg_rtx (mode0);
- emit_insn (GEN_FCN (icode) (op0));
-
- emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
-
- op1 = gen_reg_rtx (SImode);
- emit_move_insn (op1, CONST1_RTX (SImode));
-
- /* Emit SImode conditional move. */
- if (mode0 == HImode)
- {
- if (TARGET_ZERO_EXTEND_WITH_AND
- && optimize_function_for_speed_p (cfun))
- {
- op2 = force_reg (SImode, const0_rtx);
-
- emit_insn (gen_movstricthi
- (gen_lowpart (HImode, op2), op0));
- }
- else
- {
- op2 = gen_reg_rtx (SImode);
-
- emit_insn (gen_zero_extendhisi2 (op2, op0));
- }
- }
- else if (mode0 == SImode)
- op2 = op0;
- else
- op2 = gen_rtx_SUBREG (SImode, op0, 0);
-
- if (target == 0
- || !register_operand (target, SImode))
- target = gen_reg_rtx (SImode);
-
- pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
- const0_rtx);
- emit_insn (gen_rtx_SET (target,
- gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
- return target;
-
- case IX86_BUILTIN_RDSEED16_STEP:
- icode = CODE_FOR_rdseedhi_1;
- mode0 = HImode;
- goto rdseed_step;
-
- case IX86_BUILTIN_RDSEED32_STEP:
- icode = CODE_FOR_rdseedsi_1;
- mode0 = SImode;
- goto rdseed_step;
-
- case IX86_BUILTIN_RDSEED64_STEP:
- icode = CODE_FOR_rdseeddi_1;
- mode0 = DImode;
-
-rdseed_step:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op1 = expand_normal (arg0);
- if (!address_operand (op1, VOIDmode))
- {
- op1 = convert_memory_address (Pmode, op1);
- op1 = copy_addr_to_reg (op1);
- }
-
- op0 = gen_reg_rtx (mode0);
- emit_insn (GEN_FCN (icode) (op0));
-
- emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
-
- op2 = gen_reg_rtx (QImode);
-
- pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
- const0_rtx);
- emit_insn (gen_rtx_SET (op2, pat));
-
- if (target == 0
- || !register_operand (target, SImode))
- target = gen_reg_rtx (SImode);
-
- emit_insn (gen_zero_extendqisi2 (target, op2));
- return target;
-
- case IX86_BUILTIN_SBB32:
- icode = CODE_FOR_subborrowsi;
- icode2 = CODE_FOR_subborrowsi_0;
- mode0 = SImode;
- mode1 = DImode;
- mode2 = CCmode;
- goto handlecarry;
-
- case IX86_BUILTIN_SBB64:
- icode = CODE_FOR_subborrowdi;
- icode2 = CODE_FOR_subborrowdi_0;
- mode0 = DImode;
- mode1 = TImode;
- mode2 = CCmode;
- goto handlecarry;
-
- case IX86_BUILTIN_ADDCARRYX32:
- icode = CODE_FOR_addcarrysi;
- icode2 = CODE_FOR_addcarrysi_0;
- mode0 = SImode;
- mode1 = DImode;
- mode2 = CCCmode;
- goto handlecarry;
-
- case IX86_BUILTIN_ADDCARRYX64:
- icode = CODE_FOR_addcarrydi;
- icode2 = CODE_FOR_addcarrydi_0;
- mode0 = DImode;
- mode1 = TImode;
- mode2 = CCCmode;
-
- handlecarry:
- arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
- arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
- arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
- arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
-
- op1 = expand_normal (arg0);
- if (!integer_zerop (arg0))
- op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
-
- op2 = expand_normal (arg1);
- if (!register_operand (op2, mode0))
- op2 = copy_to_mode_reg (mode0, op2);
-
- op3 = expand_normal (arg2);
- if (!register_operand (op3, mode0))
- op3 = copy_to_mode_reg (mode0, op3);
-
- op4 = expand_normal (arg3);
- if (!address_operand (op4, VOIDmode))
- {
- op4 = convert_memory_address (Pmode, op4);
- op4 = copy_addr_to_reg (op4);
- }
-
- op0 = gen_reg_rtx (mode0);
- if (integer_zerop (arg0))
- {
- /* If arg0 is 0, optimize right away into add or sub
- instruction that sets CCCmode flags. */
- op1 = gen_rtx_REG (mode2, FLAGS_REG);
- emit_insn (GEN_FCN (icode2) (op0, op2, op3));
- }
- else
- {
- /* Generate CF from input operand. */
- emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
-
- /* Generate instruction that consumes CF. */
- op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
- pat = gen_rtx_LTU (mode1, op1, const0_rtx);
- pat2 = gen_rtx_LTU (mode0, op1, const0_rtx);
- emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2));
- }
-
- /* Return current CF value. */
- if (target == 0)
- target = gen_reg_rtx (QImode);
-
- pat = gen_rtx_LTU (QImode, op1, const0_rtx);
- emit_insn (gen_rtx_SET (target, pat));
-
- /* Store the result. */
- emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
-
- return target;
-
- case IX86_BUILTIN_READ_FLAGS:
- emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
-
- if (optimize
- || target == NULL_RTX
- || !nonimmediate_operand (target, word_mode)
- || GET_MODE (target) != word_mode)
- target = gen_reg_rtx (word_mode);
-
- emit_insn (gen_pop (target));
- return target;
-
- case IX86_BUILTIN_WRITE_FLAGS:
-
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- if (!general_no_elim_operand (op0, word_mode))
- op0 = copy_to_mode_reg (word_mode, op0);
-
- emit_insn (gen_push (op0));
- emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
- return 0;
-
- case IX86_BUILTIN_KTESTC8:
- icode = CODE_FOR_ktestqi;
- mode3 = CCCmode;
- goto kortest;
-
- case IX86_BUILTIN_KTESTZ8:
- icode = CODE_FOR_ktestqi;
- mode3 = CCZmode;
- goto kortest;
-
- case IX86_BUILTIN_KTESTC16:
- icode = CODE_FOR_ktesthi;
- mode3 = CCCmode;
- goto kortest;
-
- case IX86_BUILTIN_KTESTZ16:
- icode = CODE_FOR_ktesthi;
- mode3 = CCZmode;
- goto kortest;
-
- case IX86_BUILTIN_KTESTC32:
- icode = CODE_FOR_ktestsi;
- mode3 = CCCmode;
- goto kortest;
-
- case IX86_BUILTIN_KTESTZ32:
- icode = CODE_FOR_ktestsi;
- mode3 = CCZmode;
- goto kortest;
-
- case IX86_BUILTIN_KTESTC64:
- icode = CODE_FOR_ktestdi;
- mode3 = CCCmode;
- goto kortest;
-
- case IX86_BUILTIN_KTESTZ64:
- icode = CODE_FOR_ktestdi;
- mode3 = CCZmode;
- goto kortest;
-
- case IX86_BUILTIN_KORTESTC8:
- icode = CODE_FOR_kortestqi;
- mode3 = CCCmode;
- goto kortest;
-
- case IX86_BUILTIN_KORTESTZ8:
- icode = CODE_FOR_kortestqi;
- mode3 = CCZmode;
- goto kortest;
-
- case IX86_BUILTIN_KORTESTC16:
- icode = CODE_FOR_kortesthi;
- mode3 = CCCmode;
- goto kortest;
-
- case IX86_BUILTIN_KORTESTZ16:
- icode = CODE_FOR_kortesthi;
- mode3 = CCZmode;
- goto kortest;
-
- case IX86_BUILTIN_KORTESTC32:
- icode = CODE_FOR_kortestsi;
- mode3 = CCCmode;
- goto kortest;
-
- case IX86_BUILTIN_KORTESTZ32:
- icode = CODE_FOR_kortestsi;
- mode3 = CCZmode;
- goto kortest;
-
- case IX86_BUILTIN_KORTESTC64:
- icode = CODE_FOR_kortestdi;
- mode3 = CCCmode;
- goto kortest;
-
- case IX86_BUILTIN_KORTESTZ64:
- icode = CODE_FOR_kortestdi;
- mode3 = CCZmode;
-
- kortest:
- arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
- arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
-
- mode0 = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
-
- if (GET_MODE (op0) != VOIDmode)
- op0 = force_reg (GET_MODE (op0), op0);
-
- op0 = gen_lowpart (mode0, op0);
-
- if (!insn_data[icode].operand[0].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
-
- if (GET_MODE (op1) != VOIDmode)
- op1 = force_reg (GET_MODE (op1), op1);
-
- op1 = gen_lowpart (mode1, op1);
-
- if (!insn_data[icode].operand[1].predicate (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- target = gen_reg_rtx (QImode);
-
- /* Emit kortest. */
- emit_insn (GEN_FCN (icode) (op0, op1));
- /* And use setcc to return result from flags. */
- ix86_expand_setcc (target, EQ,
- gen_rtx_REG (mode3, FLAGS_REG), const0_rtx);
- return target;
-
- case IX86_BUILTIN_GATHERSIV2DF:
- icode = CODE_FOR_avx2_gathersiv2df;
- goto gather_gen;
- case IX86_BUILTIN_GATHERSIV4DF:
- icode = CODE_FOR_avx2_gathersiv4df;
- goto gather_gen;
- case IX86_BUILTIN_GATHERDIV2DF:
- icode = CODE_FOR_avx2_gatherdiv2df;
- goto gather_gen;
- case IX86_BUILTIN_GATHERDIV4DF:
- icode = CODE_FOR_avx2_gatherdiv4df;
- goto gather_gen;
- case IX86_BUILTIN_GATHERSIV4SF:
- icode = CODE_FOR_avx2_gathersiv4sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHERSIV8SF:
- icode = CODE_FOR_avx2_gathersiv8sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHERDIV4SF:
- icode = CODE_FOR_avx2_gatherdiv4sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHERDIV8SF:
- icode = CODE_FOR_avx2_gatherdiv8sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHERSIV2DI:
- icode = CODE_FOR_avx2_gathersiv2di;
- goto gather_gen;
- case IX86_BUILTIN_GATHERSIV4DI:
- icode = CODE_FOR_avx2_gathersiv4di;
- goto gather_gen;
- case IX86_BUILTIN_GATHERDIV2DI:
- icode = CODE_FOR_avx2_gatherdiv2di;
- goto gather_gen;
- case IX86_BUILTIN_GATHERDIV4DI:
- icode = CODE_FOR_avx2_gatherdiv4di;
- goto gather_gen;
- case IX86_BUILTIN_GATHERSIV4SI:
- icode = CODE_FOR_avx2_gathersiv4si;
- goto gather_gen;
- case IX86_BUILTIN_GATHERSIV8SI:
- icode = CODE_FOR_avx2_gathersiv8si;
- goto gather_gen;
- case IX86_BUILTIN_GATHERDIV4SI:
- icode = CODE_FOR_avx2_gatherdiv4si;
- goto gather_gen;
- case IX86_BUILTIN_GATHERDIV8SI:
- icode = CODE_FOR_avx2_gatherdiv8si;
- goto gather_gen;
- case IX86_BUILTIN_GATHERALTSIV4DF:
- icode = CODE_FOR_avx2_gathersiv4df;
- goto gather_gen;
- case IX86_BUILTIN_GATHERALTDIV8SF:
- icode = CODE_FOR_avx2_gatherdiv8sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHERALTSIV4DI:
- icode = CODE_FOR_avx2_gathersiv4di;
- goto gather_gen;
- case IX86_BUILTIN_GATHERALTDIV8SI:
- icode = CODE_FOR_avx2_gatherdiv8si;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV16SF:
- icode = CODE_FOR_avx512f_gathersiv16sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV8DF:
- icode = CODE_FOR_avx512f_gathersiv8df;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV16SF:
- icode = CODE_FOR_avx512f_gatherdiv16sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV8DF:
- icode = CODE_FOR_avx512f_gatherdiv8df;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV16SI:
- icode = CODE_FOR_avx512f_gathersiv16si;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV8DI:
- icode = CODE_FOR_avx512f_gathersiv8di;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV16SI:
- icode = CODE_FOR_avx512f_gatherdiv16si;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV8DI:
- icode = CODE_FOR_avx512f_gatherdiv8di;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3ALTSIV8DF:
- icode = CODE_FOR_avx512f_gathersiv8df;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3ALTDIV16SF:
- icode = CODE_FOR_avx512f_gatherdiv16sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3ALTSIV8DI:
- icode = CODE_FOR_avx512f_gathersiv8di;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3ALTDIV16SI:
- icode = CODE_FOR_avx512f_gatherdiv16si;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV2DF:
- icode = CODE_FOR_avx512vl_gathersiv2df;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV4DF:
- icode = CODE_FOR_avx512vl_gathersiv4df;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV2DF:
- icode = CODE_FOR_avx512vl_gatherdiv2df;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV4DF:
- icode = CODE_FOR_avx512vl_gatherdiv4df;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV4SF:
- icode = CODE_FOR_avx512vl_gathersiv4sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV8SF:
- icode = CODE_FOR_avx512vl_gathersiv8sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV4SF:
- icode = CODE_FOR_avx512vl_gatherdiv4sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV8SF:
- icode = CODE_FOR_avx512vl_gatherdiv8sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV2DI:
- icode = CODE_FOR_avx512vl_gathersiv2di;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV4DI:
- icode = CODE_FOR_avx512vl_gathersiv4di;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV2DI:
- icode = CODE_FOR_avx512vl_gatherdiv2di;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV4DI:
- icode = CODE_FOR_avx512vl_gatherdiv4di;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV4SI:
- icode = CODE_FOR_avx512vl_gathersiv4si;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3SIV8SI:
- icode = CODE_FOR_avx512vl_gathersiv8si;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV4SI:
- icode = CODE_FOR_avx512vl_gatherdiv4si;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3DIV8SI:
- icode = CODE_FOR_avx512vl_gatherdiv8si;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3ALTSIV4DF:
- icode = CODE_FOR_avx512vl_gathersiv4df;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3ALTDIV8SF:
- icode = CODE_FOR_avx512vl_gatherdiv8sf;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3ALTSIV4DI:
- icode = CODE_FOR_avx512vl_gathersiv4di;
- goto gather_gen;
- case IX86_BUILTIN_GATHER3ALTDIV8SI:
- icode = CODE_FOR_avx512vl_gatherdiv8si;
- goto gather_gen;
- case IX86_BUILTIN_SCATTERSIV16SF:
- icode = CODE_FOR_avx512f_scattersiv16sf;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV8DF:
- icode = CODE_FOR_avx512f_scattersiv8df;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV16SF:
- icode = CODE_FOR_avx512f_scatterdiv16sf;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV8DF:
- icode = CODE_FOR_avx512f_scatterdiv8df;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV16SI:
- icode = CODE_FOR_avx512f_scattersiv16si;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV8DI:
- icode = CODE_FOR_avx512f_scattersiv8di;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV16SI:
- icode = CODE_FOR_avx512f_scatterdiv16si;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV8DI:
- icode = CODE_FOR_avx512f_scatterdiv8di;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV8SF:
- icode = CODE_FOR_avx512vl_scattersiv8sf;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV4SF:
- icode = CODE_FOR_avx512vl_scattersiv4sf;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV4DF:
- icode = CODE_FOR_avx512vl_scattersiv4df;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV2DF:
- icode = CODE_FOR_avx512vl_scattersiv2df;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV8SF:
- icode = CODE_FOR_avx512vl_scatterdiv8sf;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV4SF:
- icode = CODE_FOR_avx512vl_scatterdiv4sf;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV4DF:
- icode = CODE_FOR_avx512vl_scatterdiv4df;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV2DF:
- icode = CODE_FOR_avx512vl_scatterdiv2df;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV8SI:
- icode = CODE_FOR_avx512vl_scattersiv8si;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV4SI:
- icode = CODE_FOR_avx512vl_scattersiv4si;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV4DI:
- icode = CODE_FOR_avx512vl_scattersiv4di;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERSIV2DI:
- icode = CODE_FOR_avx512vl_scattersiv2di;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV8SI:
- icode = CODE_FOR_avx512vl_scatterdiv8si;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV4SI:
- icode = CODE_FOR_avx512vl_scatterdiv4si;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV4DI:
- icode = CODE_FOR_avx512vl_scatterdiv4di;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERDIV2DI:
- icode = CODE_FOR_avx512vl_scatterdiv2di;
- goto scatter_gen;
- case IX86_BUILTIN_GATHERPFDPD:
- icode = CODE_FOR_avx512pf_gatherpfv8sidf;
- goto vec_prefetch_gen;
- case IX86_BUILTIN_SCATTERALTSIV8DF:
- icode = CODE_FOR_avx512f_scattersiv8df;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTDIV16SF:
- icode = CODE_FOR_avx512f_scatterdiv16sf;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTSIV8DI:
- icode = CODE_FOR_avx512f_scattersiv8di;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTDIV16SI:
- icode = CODE_FOR_avx512f_scatterdiv16si;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTSIV4DF:
- icode = CODE_FOR_avx512vl_scattersiv4df;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTDIV8SF:
- icode = CODE_FOR_avx512vl_scatterdiv8sf;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTSIV4DI:
- icode = CODE_FOR_avx512vl_scattersiv4di;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTDIV8SI:
- icode = CODE_FOR_avx512vl_scatterdiv8si;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTSIV2DF:
- icode = CODE_FOR_avx512vl_scattersiv2df;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTDIV4SF:
- icode = CODE_FOR_avx512vl_scatterdiv4sf;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTSIV2DI:
- icode = CODE_FOR_avx512vl_scattersiv2di;
- goto scatter_gen;
- case IX86_BUILTIN_SCATTERALTDIV4SI:
- icode = CODE_FOR_avx512vl_scatterdiv4si;
- goto scatter_gen;
- case IX86_BUILTIN_GATHERPFDPS:
- icode = CODE_FOR_avx512pf_gatherpfv16sisf;
- goto vec_prefetch_gen;
- case IX86_BUILTIN_GATHERPFQPD:
- icode = CODE_FOR_avx512pf_gatherpfv8didf;
- goto vec_prefetch_gen;
- case IX86_BUILTIN_GATHERPFQPS:
- icode = CODE_FOR_avx512pf_gatherpfv8disf;
- goto vec_prefetch_gen;
- case IX86_BUILTIN_SCATTERPFDPD:
- icode = CODE_FOR_avx512pf_scatterpfv8sidf;
- goto vec_prefetch_gen;
- case IX86_BUILTIN_SCATTERPFDPS:
- icode = CODE_FOR_avx512pf_scatterpfv16sisf;
- goto vec_prefetch_gen;
- case IX86_BUILTIN_SCATTERPFQPD:
- icode = CODE_FOR_avx512pf_scatterpfv8didf;
- goto vec_prefetch_gen;
- case IX86_BUILTIN_SCATTERPFQPS:
- icode = CODE_FOR_avx512pf_scatterpfv8disf;
- goto vec_prefetch_gen;
-
- gather_gen:
- rtx half;
- rtx (*gen) (rtx, rtx);
-
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- arg3 = CALL_EXPR_ARG (exp, 3);
- arg4 = CALL_EXPR_ARG (exp, 4);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- op3 = expand_normal (arg3);
- op4 = expand_normal (arg4);
- /* Note the arg order is different from the operand order. */
- mode0 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[3].mode;
- mode3 = insn_data[icode].operand[4].mode;
- mode4 = insn_data[icode].operand[5].mode;
-
- if (target == NULL_RTX
- || GET_MODE (target) != insn_data[icode].operand[0].mode
- || !insn_data[icode].operand[0].predicate (target,
- GET_MODE (target)))
- subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
- else
- subtarget = target;
-
- switch (fcode)
- {
- case IX86_BUILTIN_GATHER3ALTSIV8DF:
- case IX86_BUILTIN_GATHER3ALTSIV8DI:
- half = gen_reg_rtx (V8SImode);
- if (!nonimmediate_operand (op2, V16SImode))
- op2 = copy_to_mode_reg (V16SImode, op2);
- emit_insn (gen_vec_extract_lo_v16si (half, op2));
- op2 = half;
- break;
- case IX86_BUILTIN_GATHER3ALTSIV4DF:
- case IX86_BUILTIN_GATHER3ALTSIV4DI:
- case IX86_BUILTIN_GATHERALTSIV4DF:
- case IX86_BUILTIN_GATHERALTSIV4DI:
- half = gen_reg_rtx (V4SImode);
- if (!nonimmediate_operand (op2, V8SImode))
- op2 = copy_to_mode_reg (V8SImode, op2);
- emit_insn (gen_vec_extract_lo_v8si (half, op2));
- op2 = half;
- break;
- case IX86_BUILTIN_GATHER3ALTDIV16SF:
- case IX86_BUILTIN_GATHER3ALTDIV16SI:
- half = gen_reg_rtx (mode0);
- if (mode0 == V8SFmode)
- gen = gen_vec_extract_lo_v16sf;
- else
- gen = gen_vec_extract_lo_v16si;
- if (!nonimmediate_operand (op0, GET_MODE (op0)))
- op0 = copy_to_mode_reg (GET_MODE (op0), op0);
- emit_insn (gen (half, op0));
- op0 = half;
- op3 = lowpart_subreg (QImode, op3, HImode);
- break;
- case IX86_BUILTIN_GATHER3ALTDIV8SF:
- case IX86_BUILTIN_GATHER3ALTDIV8SI:
- case IX86_BUILTIN_GATHERALTDIV8SF:
- case IX86_BUILTIN_GATHERALTDIV8SI:
- half = gen_reg_rtx (mode0);
- if (mode0 == V4SFmode)
- gen = gen_vec_extract_lo_v8sf;
- else
- gen = gen_vec_extract_lo_v8si;
- if (!nonimmediate_operand (op0, GET_MODE (op0)))
- op0 = copy_to_mode_reg (GET_MODE (op0), op0);
- emit_insn (gen (half, op0));
- op0 = half;
- if (VECTOR_MODE_P (GET_MODE (op3)))
- {
- half = gen_reg_rtx (mode0);
- if (!nonimmediate_operand (op3, GET_MODE (op3)))
- op3 = copy_to_mode_reg (GET_MODE (op3), op3);
- emit_insn (gen (half, op3));
- op3 = half;
- }
- break;
- default:
- break;
- }
-
- /* Force memory operand only with base register here. But we
- don't want to do it on memory operand for other builtin
- functions. */
- op1 = ix86_zero_extend_to_Pmode (op1);
-
- if (!insn_data[icode].operand[1].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (!insn_data[icode].operand[2].predicate (op1, Pmode))
- op1 = copy_to_mode_reg (Pmode, op1);
- if (!insn_data[icode].operand[3].predicate (op2, mode2))
- op2 = copy_to_mode_reg (mode2, op2);
-
- op3 = fixup_modeless_constant (op3, mode3);
-
- if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
- {
- if (!insn_data[icode].operand[4].predicate (op3, mode3))
- op3 = copy_to_mode_reg (mode3, op3);
- }
- else
- {
- op3 = copy_to_reg (op3);
- op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
- }
- if (!insn_data[icode].operand[5].predicate (op4, mode4))
- {
- error ("the last argument must be scale 1, 2, 4, 8");
- return const0_rtx;
- }
-
- /* Optimize. If mask is known to have all high bits set,
- replace op0 with pc_rtx to signal that the instruction
- overwrites the whole destination and doesn't use its
- previous contents. */
- if (optimize)
- {
- if (TREE_CODE (arg3) == INTEGER_CST)
- {
- if (integer_all_onesp (arg3))
- op0 = pc_rtx;
- }
- else if (TREE_CODE (arg3) == VECTOR_CST)
- {
- unsigned int negative = 0;
- for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
- {
- tree cst = VECTOR_CST_ELT (arg3, i);
- if (TREE_CODE (cst) == INTEGER_CST
- && tree_int_cst_sign_bit (cst))
- negative++;
- else if (TREE_CODE (cst) == REAL_CST
- && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
- negative++;
- }
- if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
- op0 = pc_rtx;
- }
- else if (TREE_CODE (arg3) == SSA_NAME
- && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
- {
- /* Recognize also when mask is like:
- __v2df src = _mm_setzero_pd ();
- __v2df mask = _mm_cmpeq_pd (src, src);
- or
- __v8sf src = _mm256_setzero_ps ();
- __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
- as that is a cheaper way to load all ones into
- a register than having to load a constant from
- memory. */
- gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
- if (is_gimple_call (def_stmt))
- {
- tree fndecl = gimple_call_fndecl (def_stmt);
- if (fndecl
- && fndecl_built_in_p (fndecl, BUILT_IN_MD))
- switch (DECL_MD_FUNCTION_CODE (fndecl))
- {
- case IX86_BUILTIN_CMPPD:
- case IX86_BUILTIN_CMPPS:
- case IX86_BUILTIN_CMPPD256:
- case IX86_BUILTIN_CMPPS256:
- if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
- break;
- /* FALLTHRU */
- case IX86_BUILTIN_CMPEQPD:
- case IX86_BUILTIN_CMPEQPS:
- if (initializer_zerop (gimple_call_arg (def_stmt, 0))
- && initializer_zerop (gimple_call_arg (def_stmt,
- 1)))
- op0 = pc_rtx;
- break;
- default:
- break;
- }
- }
- }
- }
-
- pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
- if (! pat)
- return const0_rtx;
- emit_insn (pat);
-
- switch (fcode)
- {
- case IX86_BUILTIN_GATHER3DIV16SF:
- if (target == NULL_RTX)
- target = gen_reg_rtx (V8SFmode);
- emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
- break;
- case IX86_BUILTIN_GATHER3DIV16SI:
- if (target == NULL_RTX)
- target = gen_reg_rtx (V8SImode);
- emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
- break;
- case IX86_BUILTIN_GATHER3DIV8SF:
- case IX86_BUILTIN_GATHERDIV8SF:
- if (target == NULL_RTX)
- target = gen_reg_rtx (V4SFmode);
- emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
- break;
- case IX86_BUILTIN_GATHER3DIV8SI:
- case IX86_BUILTIN_GATHERDIV8SI:
- if (target == NULL_RTX)
- target = gen_reg_rtx (V4SImode);
- emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
- break;
- default:
- target = subtarget;
- break;
- }
- return target;
-
- scatter_gen:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- arg3 = CALL_EXPR_ARG (exp, 3);
- arg4 = CALL_EXPR_ARG (exp, 4);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- op3 = expand_normal (arg3);
- op4 = expand_normal (arg4);
- mode1 = insn_data[icode].operand[1].mode;
- mode2 = insn_data[icode].operand[2].mode;
- mode3 = insn_data[icode].operand[3].mode;
- mode4 = insn_data[icode].operand[4].mode;
-
- /* Scatter instruction stores operand op3 to memory with
- indices from op2 and scale from op4 under writemask op1.
- If index operand op2 has more elements then source operand
- op3 one need to use only its low half. And vice versa. */
- switch (fcode)
- {
- case IX86_BUILTIN_SCATTERALTSIV8DF:
- case IX86_BUILTIN_SCATTERALTSIV8DI:
- half = gen_reg_rtx (V8SImode);
- if (!nonimmediate_operand (op2, V16SImode))
- op2 = copy_to_mode_reg (V16SImode, op2);
- emit_insn (gen_vec_extract_lo_v16si (half, op2));
- op2 = half;
- break;
- case IX86_BUILTIN_SCATTERALTDIV16SF:
- case IX86_BUILTIN_SCATTERALTDIV16SI:
- half = gen_reg_rtx (mode3);
- if (mode3 == V8SFmode)
- gen = gen_vec_extract_lo_v16sf;
- else
- gen = gen_vec_extract_lo_v16si;
- if (!nonimmediate_operand (op3, GET_MODE (op3)))
- op3 = copy_to_mode_reg (GET_MODE (op3), op3);
- emit_insn (gen (half, op3));
- op3 = half;
- break;
- case IX86_BUILTIN_SCATTERALTSIV4DF:
- case IX86_BUILTIN_SCATTERALTSIV4DI:
- half = gen_reg_rtx (V4SImode);
- if (!nonimmediate_operand (op2, V8SImode))
- op2 = copy_to_mode_reg (V8SImode, op2);
- emit_insn (gen_vec_extract_lo_v8si (half, op2));
- op2 = half;
- break;
- case IX86_BUILTIN_SCATTERALTDIV8SF:
- case IX86_BUILTIN_SCATTERALTDIV8SI:
- half = gen_reg_rtx (mode3);
- if (mode3 == V4SFmode)
- gen = gen_vec_extract_lo_v8sf;
- else
- gen = gen_vec_extract_lo_v8si;
- if (!nonimmediate_operand (op3, GET_MODE (op3)))
- op3 = copy_to_mode_reg (GET_MODE (op3), op3);
- emit_insn (gen (half, op3));
- op3 = half;
- break;
- case IX86_BUILTIN_SCATTERALTSIV2DF:
- case IX86_BUILTIN_SCATTERALTSIV2DI:
- if (!nonimmediate_operand (op2, V4SImode))
- op2 = copy_to_mode_reg (V4SImode, op2);
- break;
- case IX86_BUILTIN_SCATTERALTDIV4SF:
- case IX86_BUILTIN_SCATTERALTDIV4SI:
- if (!nonimmediate_operand (op3, GET_MODE (op3)))
- op3 = copy_to_mode_reg (GET_MODE (op3), op3);
- break;
- default:
- break;
- }
-
- /* Force memory operand only with base register here. But we
- don't want to do it on memory operand for other builtin
- functions. */
- op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
-
- if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = copy_to_mode_reg (Pmode, op0);
-
- op1 = fixup_modeless_constant (op1, mode1);
-
- if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
- {
- if (!insn_data[icode].operand[1].predicate (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
- }
- else
- {
- op1 = copy_to_reg (op1);
- op1 = lowpart_subreg (mode1, op1, GET_MODE (op1));
- }
-
- if (!insn_data[icode].operand[2].predicate (op2, mode2))
- op2 = copy_to_mode_reg (mode2, op2);
-
- if (!insn_data[icode].operand[3].predicate (op3, mode3))
- op3 = copy_to_mode_reg (mode3, op3);
-
- if (!insn_data[icode].operand[4].predicate (op4, mode4))
- {
- error ("the last argument must be scale 1, 2, 4, 8");
- return const0_rtx;
- }
-
- pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
- if (! pat)
- return const0_rtx;
-
- emit_insn (pat);
- return 0;
-
- vec_prefetch_gen:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- arg2 = CALL_EXPR_ARG (exp, 2);
- arg3 = CALL_EXPR_ARG (exp, 3);
- arg4 = CALL_EXPR_ARG (exp, 4);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
- op2 = expand_normal (arg2);
- op3 = expand_normal (arg3);
- op4 = expand_normal (arg4);
- mode0 = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- mode3 = insn_data[icode].operand[3].mode;
- mode4 = insn_data[icode].operand[4].mode;
-
- op0 = fixup_modeless_constant (op0, mode0);
-
- if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
- {
- if (!insn_data[icode].operand[0].predicate (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- }
- else
- {
- op0 = copy_to_reg (op0);
- op0 = lowpart_subreg (mode0, op0, GET_MODE (op0));
- }
-
- if (!insn_data[icode].operand[1].predicate (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- /* Force memory operand only with base register here. But we
- don't want to do it on memory operand for other builtin
- functions. */
- op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
-
- if (!insn_data[icode].operand[2].predicate (op2, Pmode))
- op2 = copy_to_mode_reg (Pmode, op2);
-
- if (!insn_data[icode].operand[3].predicate (op3, mode3))
- {
- error ("the forth argument must be scale 1, 2, 4, 8");
- return const0_rtx;
- }
-
- if (!insn_data[icode].operand[4].predicate (op4, mode4))
- {
- error ("incorrect hint operand");
- return const0_rtx;
- }
-
- pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
- if (! pat)
- return const0_rtx;
-
- emit_insn (pat);
-
- return 0;
-
- case IX86_BUILTIN_XABORT:
- icode = CODE_FOR_xabort;
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- mode0 = insn_data[icode].operand[0].mode;
- if (!insn_data[icode].operand[0].predicate (op0, mode0))
- {
- error ("the argument to %<xabort%> intrinsic must "
- "be an 8-bit immediate");
- return const0_rtx;
- }
- emit_insn (gen_xabort (op0));
- return 0;
-
- case IX86_BUILTIN_RSTORSSP:
- case IX86_BUILTIN_CLRSSBSY:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- icode = (fcode == IX86_BUILTIN_RSTORSSP
- ? CODE_FOR_rstorssp
- : CODE_FOR_clrssbsy);
- if (!address_operand (op0, VOIDmode))
- {
- op1 = convert_memory_address (Pmode, op0);
- op0 = copy_addr_to_reg (op1);
- }
- emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0)));
- return 0;
-
- case IX86_BUILTIN_WRSSD:
- case IX86_BUILTIN_WRSSQ:
- case IX86_BUILTIN_WRUSSD:
- case IX86_BUILTIN_WRUSSQ:
- arg0 = CALL_EXPR_ARG (exp, 0);
- op0 = expand_normal (arg0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op1 = expand_normal (arg1);
- switch (fcode)
- {
- case IX86_BUILTIN_WRSSD:
- icode = CODE_FOR_wrsssi;
- mode = SImode;
- break;
- case IX86_BUILTIN_WRSSQ:
- icode = CODE_FOR_wrssdi;
- mode = DImode;
- break;
- case IX86_BUILTIN_WRUSSD:
- icode = CODE_FOR_wrusssi;
- mode = SImode;
- break;
- case IX86_BUILTIN_WRUSSQ:
- icode = CODE_FOR_wrussdi;
- mode = DImode;
- break;
- }
- op0 = force_reg (mode, op0);
- if (!address_operand (op1, VOIDmode))
- {
- op2 = convert_memory_address (Pmode, op1);
- op1 = copy_addr_to_reg (op2);
- }
- emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1)));
- return 0;
-
- default:
- break;
- }
-
- if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
- && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST)
- {
- i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST;
- return ix86_expand_special_args_builtin (bdesc_special_args + i, exp,
- target);
- }
-
- if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST
- && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST)
- {
- i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST;
- rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL;
- rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx);
- rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx);
- int masked = 1;
- machine_mode mode, wide_mode, nar_mode;
-
- nar_mode = V4SFmode;
- mode = V16SFmode;
- wide_mode = V64SFmode;
- fcn_mask = gen_avx5124fmaddps_4fmaddps_mask;
- fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz;
-
- switch (fcode)
- {
- case IX86_BUILTIN_4FMAPS:
- fcn = gen_avx5124fmaddps_4fmaddps;
- masked = 0;
- goto v4fma_expand;
-
- case IX86_BUILTIN_4DPWSSD:
- nar_mode = V4SImode;
- mode = V16SImode;
- wide_mode = V64SImode;
- fcn = gen_avx5124vnniw_vp4dpwssd;
- masked = 0;
- goto v4fma_expand;
-
- case IX86_BUILTIN_4DPWSSDS:
- nar_mode = V4SImode;
- mode = V16SImode;
- wide_mode = V64SImode;
- fcn = gen_avx5124vnniw_vp4dpwssds;
- masked = 0;
- goto v4fma_expand;
-
- case IX86_BUILTIN_4FNMAPS:
- fcn = gen_avx5124fmaddps_4fnmaddps;
- masked = 0;
- goto v4fma_expand;
-
- case IX86_BUILTIN_4FNMAPS_MASK:
- fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask;
- fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz;
- goto v4fma_expand;
-
- case IX86_BUILTIN_4DPWSSD_MASK:
- nar_mode = V4SImode;
- mode = V16SImode;
- wide_mode = V64SImode;
- fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask;
- fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz;
- goto v4fma_expand;
-
- case IX86_BUILTIN_4DPWSSDS_MASK:
- nar_mode = V4SImode;
- mode = V16SImode;
- wide_mode = V64SImode;
- fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask;
- fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz;
- goto v4fma_expand;
-
- case IX86_BUILTIN_4FMAPS_MASK:
- {
- tree args[4];
- rtx ops[4];
- rtx wide_reg;
- rtx accum;
- rtx addr;
- rtx mem;
-
-v4fma_expand:
- wide_reg = gen_reg_rtx (wide_mode);
- for (i = 0; i < 4; i++)
- {
- args[i] = CALL_EXPR_ARG (exp, i);
- ops[i] = expand_normal (args[i]);
-
- emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64),
- ops[i]);
- }
-
- accum = expand_normal (CALL_EXPR_ARG (exp, 4));
- accum = force_reg (mode, accum);
-
- addr = expand_normal (CALL_EXPR_ARG (exp, 5));
- addr = force_reg (Pmode, addr);
-
- mem = gen_rtx_MEM (nar_mode, addr);
-
- target = gen_reg_rtx (mode);
-
- emit_move_insn (target, accum);
-
- if (! masked)
- emit_insn (fcn (target, accum, wide_reg, mem));
- else
- {
- rtx merge, mask;
- merge = expand_normal (CALL_EXPR_ARG (exp, 6));
-
- mask = expand_normal (CALL_EXPR_ARG (exp, 7));
-
- if (CONST_INT_P (mask))
- mask = fixup_modeless_constant (mask, HImode);
-
- mask = force_reg (HImode, mask);
-
- if (GET_MODE (mask) != HImode)
- mask = gen_rtx_SUBREG (HImode, mask, 0);
-
- /* If merge is 0 then we're about to emit z-masked variant. */
- if (const0_operand (merge, mode))
- emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
- /* If merge is the same as accum then emit merge-masked variant. */
- else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
- {
- merge = force_reg (mode, merge);
- emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
- }
- /* Merge with something unknown might happen if we z-mask w/ -O0. */
- else
- {
- target = gen_reg_rtx (mode);
- emit_move_insn (target, merge);
- emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
- }
- }
- return target;
- }
-
- case IX86_BUILTIN_4FNMASS:
- fcn = gen_avx5124fmaddps_4fnmaddss;
- masked = 0;
- goto s4fma_expand;
-
- case IX86_BUILTIN_4FMASS:
- fcn = gen_avx5124fmaddps_4fmaddss;
- masked = 0;
- goto s4fma_expand;
-
- case IX86_BUILTIN_4FNMASS_MASK:
- fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask;
- fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz;
- goto s4fma_expand;
-
- case IX86_BUILTIN_4FMASS_MASK:
- {
- tree args[4];
- rtx ops[4];
- rtx wide_reg;
- rtx accum;
- rtx addr;
- rtx mem;
-
- fcn_mask = gen_avx5124fmaddps_4fmaddss_mask;
- fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz;
-
-s4fma_expand:
- mode = V4SFmode;
- wide_reg = gen_reg_rtx (V64SFmode);
- for (i = 0; i < 4; i++)
- {
- rtx tmp;
- args[i] = CALL_EXPR_ARG (exp, i);
- ops[i] = expand_normal (args[i]);
-
- tmp = gen_reg_rtx (SFmode);
- emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0));
-
- emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64),
- gen_rtx_SUBREG (V16SFmode, tmp, 0));
- }
-
- accum = expand_normal (CALL_EXPR_ARG (exp, 4));
- accum = force_reg (V4SFmode, accum);
-
- addr = expand_normal (CALL_EXPR_ARG (exp, 5));
- addr = force_reg (Pmode, addr);
-
- mem = gen_rtx_MEM (V4SFmode, addr);
-
- target = gen_reg_rtx (V4SFmode);
-
- emit_move_insn (target, accum);
-
- if (! masked)
- emit_insn (fcn (target, accum, wide_reg, mem));
- else
- {
- rtx merge, mask;
- merge = expand_normal (CALL_EXPR_ARG (exp, 6));
-
- mask = expand_normal (CALL_EXPR_ARG (exp, 7));
-
- if (CONST_INT_P (mask))
- mask = fixup_modeless_constant (mask, QImode);
-
- mask = force_reg (QImode, mask);
-
- if (GET_MODE (mask) != QImode)
- mask = gen_rtx_SUBREG (QImode, mask, 0);
-
- /* If merge is 0 then we're about to emit z-masked variant. */
- if (const0_operand (merge, mode))
- emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask));
- /* If merge is the same as accum then emit merge-masked
- variant. */
- else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4))
- {
- merge = force_reg (mode, merge);
- emit_insn (fcn_mask (target, wide_reg, mem, merge, mask));
- }
- /* Merge with something unknown might happen if we z-mask
- w/ -O0. */
- else
- {
- target = gen_reg_rtx (mode);
- emit_move_insn (target, merge);
- emit_insn (fcn_mask (target, wide_reg, mem, target, mask));
- }
- }
- return target;
- }
- case IX86_BUILTIN_RDPID:
- return ix86_expand_special_args_builtin (bdesc_args + i, exp,
- target);
- case IX86_BUILTIN_FABSQ:
- case IX86_BUILTIN_COPYSIGNQ:
- if (!TARGET_SSE)
- /* Emit a normal call if SSE isn't available. */
- return expand_call (exp, target, ignore);
- /* FALLTHRU */
- default:
- return ix86_expand_args_builtin (bdesc_args + i, exp, target);
- }
- }
-
- if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST
- && fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
- {
- i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
- return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
- }
-
- if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
- && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST)
- {
- i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST;
- return ix86_expand_round_builtin (bdesc_round_args + i, exp, target);
- }
-
- if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
- && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST)
- {
- i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST;
- return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target);
- }
-
- if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
- && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST)
- {
- i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST;
- return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target);
- }
-
- if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
- && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST)
- {
- i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST;
- const struct builtin_description *d = bdesc_multi_arg + i;
- return ix86_expand_multi_arg_builtin (d->icode, exp, target,
- (enum ix86_builtin_func_type)
- d->flag, d->comparison);
- }
-
- if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST
- && fcode <= IX86_BUILTIN__BDESC_CET_LAST)
- {
- i = fcode - IX86_BUILTIN__BDESC_CET_FIRST;
- return ix86_expand_special_args_builtin (bdesc_cet + i, exp,
- target);
- }
-
- if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST
- && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST)
- {
- i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST;
- return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp,
- target);
- }
-
- gcc_unreachable ();
-}
-
-/* A subroutine of ix86_expand_vector_init_duplicate. Tries to
- fill target with val via vec_duplicate. */
-
-static bool
-ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
-{
- bool ok;
- rtx_insn *insn;
- rtx dup;
-
- /* First attempt to recognize VAL as-is. */
- dup = gen_vec_duplicate (mode, val);
- insn = emit_insn (gen_rtx_SET (target, dup));
- if (recog_memoized (insn) < 0)
- {
- rtx_insn *seq;
- machine_mode innermode = GET_MODE_INNER (mode);
- rtx reg;
-
- /* If that fails, force VAL into a register. */
-
- start_sequence ();
- reg = force_reg (innermode, val);
- if (GET_MODE (reg) != innermode)
- reg = gen_lowpart (innermode, reg);
- SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
- seq = get_insns ();
- end_sequence ();
- if (seq)
- emit_insn_before (seq, insn);
-
- ok = recog_memoized (insn) >= 0;
- gcc_assert (ok);
- }
- return true;
-}
-
-/* Get a vector mode of the same size as the original but with elements
- twice as wide. This is only guaranteed to apply to integral vectors. */
-
-static machine_mode
-get_mode_wider_vector (machine_mode o)
-{
- /* ??? Rely on the ordering that genmodes.c gives to vectors. */
- machine_mode n = GET_MODE_WIDER_MODE (o).require ();
- gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
- gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
- return n;
-}
-
-static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
-static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
-
-/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
- with all elements equal to VAR. Return true if successful. */
-
-static bool
-ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
- rtx target, rtx val)
-{
- bool ok;
-
- switch (mode)
- {
- case E_V2SImode:
- case E_V2SFmode:
- if (!mmx_ok)
- return false;
- /* FALLTHRU */
-
- case E_V4DFmode:
- case E_V4DImode:
- case E_V8SFmode:
- case E_V8SImode:
- case E_V2DFmode:
- case E_V2DImode:
- case E_V4SFmode:
- case E_V4SImode:
- case E_V16SImode:
- case E_V8DImode:
- case E_V16SFmode:
- case E_V8DFmode:
- return ix86_vector_duplicate_value (mode, target, val);
-
- case E_V4HImode:
- if (!mmx_ok)
- return false;
- if (TARGET_SSE || TARGET_3DNOW_A)
- {
- rtx x;
-
- val = gen_lowpart (SImode, val);
- x = gen_rtx_TRUNCATE (HImode, val);
- x = gen_rtx_VEC_DUPLICATE (mode, x);
- emit_insn (gen_rtx_SET (target, x));
- return true;
- }
- goto widen;
-
- case E_V8QImode:
- if (!mmx_ok)
- return false;
- goto widen;
-
- case E_V8HImode:
- if (TARGET_AVX2)
- return ix86_vector_duplicate_value (mode, target, val);
-
- if (TARGET_SSE2)
- {
- struct expand_vec_perm_d dperm;
- rtx tmp1, tmp2;
-
- permute:
- memset (&dperm, 0, sizeof (dperm));
- dperm.target = target;
- dperm.vmode = mode;
- dperm.nelt = GET_MODE_NUNITS (mode);
- dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
- dperm.one_operand_p = true;
-
- /* Extend to SImode using a paradoxical SUBREG. */
- tmp1 = gen_reg_rtx (SImode);
- emit_move_insn (tmp1, gen_lowpart (SImode, val));
-
- /* Insert the SImode value as low element of a V4SImode vector. */
- tmp2 = gen_reg_rtx (V4SImode);
- emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
- emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
-
- ok = (expand_vec_perm_1 (&dperm)
- || expand_vec_perm_broadcast_1 (&dperm));
- gcc_assert (ok);
- return ok;
- }
- goto widen;
-
- case E_V16QImode:
- if (TARGET_AVX2)
- return ix86_vector_duplicate_value (mode, target, val);
-
- if (TARGET_SSE2)
- goto permute;
- goto widen;
-
- widen:
- /* Replicate the value once into the next wider mode and recurse. */
- {
- machine_mode smode, wsmode, wvmode;
- rtx x;
-
- smode = GET_MODE_INNER (mode);
- wvmode = get_mode_wider_vector (mode);
- wsmode = GET_MODE_INNER (wvmode);
-
- val = convert_modes (wsmode, smode, val, true);
- x = expand_simple_binop (wsmode, ASHIFT, val,
- GEN_INT (GET_MODE_BITSIZE (smode)),
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
- val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
-
- x = gen_reg_rtx (wvmode);
- ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
- gcc_assert (ok);
- emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
- return ok;
- }
-
- case E_V16HImode:
- case E_V32QImode:
- if (TARGET_AVX2)
- return ix86_vector_duplicate_value (mode, target, val);
- else
- {
- machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
- rtx x = gen_reg_rtx (hvmode);
-
- ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
- gcc_assert (ok);
-
- x = gen_rtx_VEC_CONCAT (mode, x, x);
- emit_insn (gen_rtx_SET (target, x));
- }
- return true;
-
- case E_V64QImode:
- case E_V32HImode:
- if (TARGET_AVX512BW)
- return ix86_vector_duplicate_value (mode, target, val);
- else
- {
- machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
- rtx x = gen_reg_rtx (hvmode);
-
- ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
- gcc_assert (ok);
-
- x = gen_rtx_VEC_CONCAT (mode, x, x);
- emit_insn (gen_rtx_SET (target, x));
- }
- return true;
-
- default:
- return false;
- }
-}
-
-/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
- whose ONE_VAR element is VAR, and other elements are zero. Return true
- if successful. */
-
-static bool
-ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
- rtx target, rtx var, int one_var)
-{
- machine_mode vsimode;
- rtx new_target;
- rtx x, tmp;
- bool use_vector_set = false;
- rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
-
- switch (mode)
- {
- case E_V2DImode:
- /* For SSE4.1, we normally use vector set. But if the second
- element is zero and inter-unit moves are OK, we use movq
- instead. */
- use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
- && !(TARGET_INTER_UNIT_MOVES_TO_VEC
- && one_var == 0));
- break;
- case E_V16QImode:
- case E_V4SImode:
- case E_V4SFmode:
- use_vector_set = TARGET_SSE4_1;
- break;
- case E_V8HImode:
- use_vector_set = TARGET_SSE2;
- break;
- case E_V8QImode:
- use_vector_set = TARGET_MMX_WITH_SSE && TARGET_SSE4_1;
- break;
- case E_V4HImode:
- use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
- break;
- case E_V32QImode:
- case E_V16HImode:
- use_vector_set = TARGET_AVX;
- break;
- case E_V8SImode:
- use_vector_set = TARGET_AVX;
- gen_vec_set_0 = gen_vec_setv8si_0;
- break;
- case E_V8SFmode:
- use_vector_set = TARGET_AVX;
- gen_vec_set_0 = gen_vec_setv8sf_0;
- break;
- case E_V4DFmode:
- use_vector_set = TARGET_AVX;
- gen_vec_set_0 = gen_vec_setv4df_0;
- break;
- case E_V4DImode:
- /* Use ix86_expand_vector_set in 64bit mode only. */
- use_vector_set = TARGET_AVX && TARGET_64BIT;
- gen_vec_set_0 = gen_vec_setv4di_0;
- break;
- case E_V16SImode:
- use_vector_set = TARGET_AVX512F && one_var == 0;
- gen_vec_set_0 = gen_vec_setv16si_0;
- break;
- case E_V16SFmode:
- use_vector_set = TARGET_AVX512F && one_var == 0;
- gen_vec_set_0 = gen_vec_setv16sf_0;
- break;
- case E_V8DFmode:
- use_vector_set = TARGET_AVX512F && one_var == 0;
- gen_vec_set_0 = gen_vec_setv8df_0;
- break;
- case E_V8DImode:
- /* Use ix86_expand_vector_set in 64bit mode only. */
- use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0;
- gen_vec_set_0 = gen_vec_setv8di_0;
- break;
- default:
- break;
- }
-
- if (use_vector_set)
- {
- if (gen_vec_set_0 && one_var == 0)
- {
- var = force_reg (GET_MODE_INNER (mode), var);
- emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var));
- return true;
- }
- emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
- var = force_reg (GET_MODE_INNER (mode), var);
- ix86_expand_vector_set (mmx_ok, target, var, one_var);
- return true;
- }
-
- switch (mode)
- {
- case E_V2SFmode:
- case E_V2SImode:
- if (!mmx_ok)
- return false;
- /* FALLTHRU */
-
- case E_V2DFmode:
- case E_V2DImode:
- if (one_var != 0)
- return false;
- var = force_reg (GET_MODE_INNER (mode), var);
- x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
- emit_insn (gen_rtx_SET (target, x));
- return true;
-
- case E_V4SFmode:
- case E_V4SImode:
- if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
- new_target = gen_reg_rtx (mode);
- else
- new_target = target;
- var = force_reg (GET_MODE_INNER (mode), var);
- x = gen_rtx_VEC_DUPLICATE (mode, var);
- x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
- emit_insn (gen_rtx_SET (new_target, x));
- if (one_var != 0)
- {
- /* We need to shuffle the value to the correct position, so
- create a new pseudo to store the intermediate result. */
-
- /* With SSE2, we can use the integer shuffle insns. */
- if (mode != V4SFmode && TARGET_SSE2)
- {
- emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
- const1_rtx,
- GEN_INT (one_var == 1 ? 0 : 1),
- GEN_INT (one_var == 2 ? 0 : 1),
- GEN_INT (one_var == 3 ? 0 : 1)));
- if (target != new_target)
- emit_move_insn (target, new_target);
- return true;
- }
-
- /* Otherwise convert the intermediate result to V4SFmode and
- use the SSE1 shuffle instructions. */
- if (mode != V4SFmode)
- {
- tmp = gen_reg_rtx (V4SFmode);
- emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
- }
- else
- tmp = new_target;
-
- emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
- const1_rtx,
- GEN_INT (one_var == 1 ? 0 : 1),
- GEN_INT (one_var == 2 ? 0+4 : 1+4),
- GEN_INT (one_var == 3 ? 0+4 : 1+4)));
-
- if (mode != V4SFmode)
- emit_move_insn (target, gen_lowpart (V4SImode, tmp));
- else if (tmp != target)
- emit_move_insn (target, tmp);
- }
- else if (target != new_target)
- emit_move_insn (target, new_target);
- return true;
-
- case E_V8HImode:
- case E_V16QImode:
- vsimode = V4SImode;
- goto widen;
- case E_V4HImode:
- case E_V8QImode:
- if (!mmx_ok)
- return false;
- vsimode = V2SImode;
- goto widen;
- widen:
- if (one_var != 0)
- return false;
-
- /* Zero extend the variable element to SImode and recurse. */
- var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
-
- x = gen_reg_rtx (vsimode);
- if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
- var, one_var))
- gcc_unreachable ();
-
- emit_move_insn (target, gen_lowpart (mode, x));
- return true;
-
- default:
- return false;
- }
-}
-
-/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
- consisting of the values in VALS. It is known that all elements
- except ONE_VAR are constants. Return true if successful. */
-
-static bool
-ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
- rtx target, rtx vals, int one_var)
-{
- rtx var = XVECEXP (vals, 0, one_var);
- machine_mode wmode;
- rtx const_vec, x;
-
- const_vec = copy_rtx (vals);
- XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
- const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
-
- switch (mode)
- {
- case E_V2DFmode:
- case E_V2DImode:
- case E_V2SFmode:
- case E_V2SImode:
- /* For the two element vectors, it's just as easy to use
- the general case. */
- return false;
-
- case E_V4DImode:
- /* Use ix86_expand_vector_set in 64bit mode only. */
- if (!TARGET_64BIT)
- return false;
- /* FALLTHRU */
- case E_V4DFmode:
- case E_V8SFmode:
- case E_V8SImode:
- case E_V16HImode:
- case E_V32QImode:
- case E_V4SFmode:
- case E_V4SImode:
- case E_V8HImode:
- case E_V4HImode:
- break;
-
- case E_V16QImode:
- if (TARGET_SSE4_1)
- break;
- wmode = V8HImode;
- goto widen;
- case E_V8QImode:
- if (TARGET_MMX_WITH_SSE && TARGET_SSE4_1)
- break;
- wmode = V4HImode;
- goto widen;
- widen:
- /* There's no way to set one QImode entry easily. Combine
- the variable value with its adjacent constant value, and
- promote to an HImode set. */
- x = XVECEXP (vals, 0, one_var ^ 1);
- if (one_var & 1)
- {
- var = convert_modes (HImode, QImode, var, true);
- var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
- x = GEN_INT (INTVAL (x) & 0xff);
- }
- else
- {
- var = convert_modes (HImode, QImode, var, true);
- x = gen_int_mode (UINTVAL (x) << 8, HImode);
- }
- if (x != const0_rtx)
- var = expand_simple_binop (HImode, IOR, var, x, var,
- 1, OPTAB_LIB_WIDEN);
-
- x = gen_reg_rtx (wmode);
- emit_move_insn (x, gen_lowpart (wmode, const_vec));
- ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
-
- emit_move_insn (target, gen_lowpart (mode, x));
- return true;
-
- default:
- return false;
- }
-
- emit_move_insn (target, const_vec);
- ix86_expand_vector_set (mmx_ok, target, var, one_var);
- return true;
-}
-
-/* A subroutine of ix86_expand_vector_init_general. Use vector
- concatenate to handle the most general case: all values variable,
- and none identical. */
-
-static void
-ix86_expand_vector_init_concat (machine_mode mode,
- rtx target, rtx *ops, int n)
-{
- machine_mode half_mode = VOIDmode;
- rtx half[2];
- rtvec v;
- int i, j;
-
- switch (n)
- {
- case 2:
- switch (mode)
- {
- case E_V16SImode:
- half_mode = V8SImode;
- break;
- case E_V16SFmode:
- half_mode = V8SFmode;
- break;
- case E_V8DImode:
- half_mode = V4DImode;
- break;
- case E_V8DFmode:
- half_mode = V4DFmode;
- break;
- case E_V8SImode:
- half_mode = V4SImode;
- break;
- case E_V8SFmode:
- half_mode = V4SFmode;
- break;
- case E_V4DImode:
- half_mode = V2DImode;
- break;
- case E_V4DFmode:
- half_mode = V2DFmode;
- break;
- case E_V4SImode:
- half_mode = V2SImode;
- break;
- case E_V4SFmode:
- half_mode = V2SFmode;
- break;
- case E_V2DImode:
- half_mode = DImode;
- break;
- case E_V2SImode:
- half_mode = SImode;
- break;
- case E_V2DFmode:
- half_mode = DFmode;
- break;
- case E_V2SFmode:
- half_mode = SFmode;
- break;
- default:
- gcc_unreachable ();
- }
-
- if (!register_operand (ops[1], half_mode))
- ops[1] = force_reg (half_mode, ops[1]);
- if (!register_operand (ops[0], half_mode))
- ops[0] = force_reg (half_mode, ops[0]);
- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
- ops[1])));
- break;
-
- case 4:
- switch (mode)
- {
- case E_V4DImode:
- half_mode = V2DImode;
- break;
- case E_V4DFmode:
- half_mode = V2DFmode;
- break;
- case E_V4SImode:
- half_mode = V2SImode;
- break;
- case E_V4SFmode:
- half_mode = V2SFmode;
- break;
- default:
- gcc_unreachable ();
- }
- goto half;
-
- case 8:
- switch (mode)
- {
- case E_V8DImode:
- half_mode = V4DImode;
- break;
- case E_V8DFmode:
- half_mode = V4DFmode;
- break;
- case E_V8SImode:
- half_mode = V4SImode;
- break;
- case E_V8SFmode:
- half_mode = V4SFmode;
- break;
- default:
- gcc_unreachable ();
- }
- goto half;
-
- case 16:
- switch (mode)
- {
- case E_V16SImode:
- half_mode = V8SImode;
- break;
- case E_V16SFmode:
- half_mode = V8SFmode;
- break;
- default:
- gcc_unreachable ();
- }
- goto half;
-
-half:
- /* FIXME: We process inputs backward to help RA. PR 36222. */
- i = n - 1;
- for (j = 1; j != -1; j--)
- {
- half[j] = gen_reg_rtx (half_mode);
- switch (n >> 1)
- {
- case 2:
- v = gen_rtvec (2, ops[i-1], ops[i]);
- i -= 2;
- break;
- case 4:
- v = gen_rtvec (4, ops[i-3], ops[i-2], ops[i-1], ops[i]);
- i -= 4;
- break;
- case 8:
- v = gen_rtvec (8, ops[i-7], ops[i-6], ops[i-5], ops[i-4],
- ops[i-3], ops[i-2], ops[i-1], ops[i]);
- i -= 8;
- break;
- default:
- gcc_unreachable ();
- }
- ix86_expand_vector_init (false, half[j],
- gen_rtx_PARALLEL (half_mode, v));
- }
-
- ix86_expand_vector_init_concat (mode, target, half, 2);
- break;
-
- default:
- gcc_unreachable ();
- }
-}
-
-/* A subroutine of ix86_expand_vector_init_general. Use vector
- interleave to handle the most general case: all values variable,
- and none identical. */
-
-static void
-ix86_expand_vector_init_interleave (machine_mode mode,
- rtx target, rtx *ops, int n)
-{
- machine_mode first_imode, second_imode, third_imode, inner_mode;
- int i, j;
- rtx op0, op1;
- rtx (*gen_load_even) (rtx, rtx, rtx);
- rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
- rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
-
- switch (mode)
- {
- case E_V8HImode:
- gen_load_even = gen_vec_setv8hi;
- gen_interleave_first_low = gen_vec_interleave_lowv4si;
- gen_interleave_second_low = gen_vec_interleave_lowv2di;
- inner_mode = HImode;
- first_imode = V4SImode;
- second_imode = V2DImode;
- third_imode = VOIDmode;
- break;
- case E_V16QImode:
- gen_load_even = gen_vec_setv16qi;
- gen_interleave_first_low = gen_vec_interleave_lowv8hi;
- gen_interleave_second_low = gen_vec_interleave_lowv4si;
- inner_mode = QImode;
- first_imode = V8HImode;
- second_imode = V4SImode;
- third_imode = V2DImode;
- break;
- default:
- gcc_unreachable ();
- }
-
- for (i = 0; i < n; i++)
- {
- /* Extend the odd elment to SImode using a paradoxical SUBREG. */
- op0 = gen_reg_rtx (SImode);
- emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
-
- /* Insert the SImode value as low element of V4SImode vector. */
- op1 = gen_reg_rtx (V4SImode);
- op0 = gen_rtx_VEC_MERGE (V4SImode,
- gen_rtx_VEC_DUPLICATE (V4SImode,
- op0),
- CONST0_RTX (V4SImode),
- const1_rtx);
- emit_insn (gen_rtx_SET (op1, op0));
-
- /* Cast the V4SImode vector back to a vector in orignal mode. */
- op0 = gen_reg_rtx (mode);
- emit_move_insn (op0, gen_lowpart (mode, op1));
-
- /* Load even elements into the second position. */
- emit_insn (gen_load_even (op0,
- force_reg (inner_mode,
- ops [i + i + 1]),
- const1_rtx));
-
- /* Cast vector to FIRST_IMODE vector. */
- ops[i] = gen_reg_rtx (first_imode);
- emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
- }
-
- /* Interleave low FIRST_IMODE vectors. */
- for (i = j = 0; i < n; i += 2, j++)
- {
- op0 = gen_reg_rtx (first_imode);
- emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
-
- /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
- ops[j] = gen_reg_rtx (second_imode);
- emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
- }
-
- /* Interleave low SECOND_IMODE vectors. */
- switch (second_imode)
- {
- case E_V4SImode:
- for (i = j = 0; i < n / 2; i += 2, j++)
- {
- op0 = gen_reg_rtx (second_imode);
- emit_insn (gen_interleave_second_low (op0, ops[i],
- ops[i + 1]));
-
- /* Cast the SECOND_IMODE vector to the THIRD_IMODE
- vector. */
- ops[j] = gen_reg_rtx (third_imode);
- emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
- }
- second_imode = V2DImode;
- gen_interleave_second_low = gen_vec_interleave_lowv2di;
- /* FALLTHRU */
-
- case E_V2DImode:
- op0 = gen_reg_rtx (second_imode);
- emit_insn (gen_interleave_second_low (op0, ops[0],
- ops[1]));
-
- /* Cast the SECOND_IMODE vector back to a vector on original
- mode. */
- emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
- break;
-
- default:
- gcc_unreachable ();
- }
-}
-
-/* A subroutine of ix86_expand_vector_init. Handle the most general case:
- all values variable, and none identical. */
-
-static void
-ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
- rtx target, rtx vals)
-{
- rtx ops[64], op0, op1, op2, op3, op4, op5;
- machine_mode half_mode = VOIDmode;
- machine_mode quarter_mode = VOIDmode;
- int n, i;
-
- switch (mode)
- {
- case E_V2SFmode:
- case E_V2SImode:
- if (!mmx_ok && !TARGET_SSE)
- break;
- /* FALLTHRU */
-
- case E_V16SImode:
- case E_V16SFmode:
- case E_V8DFmode:
- case E_V8DImode:
- case E_V8SFmode:
- case E_V8SImode:
- case E_V4DFmode:
- case E_V4DImode:
- case E_V4SFmode:
- case E_V4SImode:
- case E_V2DFmode:
- case E_V2DImode:
- n = GET_MODE_NUNITS (mode);
- for (i = 0; i < n; i++)
- ops[i] = XVECEXP (vals, 0, i);
- ix86_expand_vector_init_concat (mode, target, ops, n);
- return;
-
- case E_V2TImode:
- for (i = 0; i < 2; i++)
- ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
- op0 = gen_reg_rtx (V4DImode);
- ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
- emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
- return;
-
- case E_V4TImode:
- for (i = 0; i < 4; i++)
- ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
- ops[4] = gen_reg_rtx (V4DImode);
- ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
- ops[5] = gen_reg_rtx (V4DImode);
- ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
- op0 = gen_reg_rtx (V8DImode);
- ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
- emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
- return;
-
- case E_V32QImode:
- half_mode = V16QImode;
- goto half;
-
- case E_V16HImode:
- half_mode = V8HImode;
- goto half;
-
-half:
- n = GET_MODE_NUNITS (mode);
- for (i = 0; i < n; i++)
- ops[i] = XVECEXP (vals, 0, i);
- op0 = gen_reg_rtx (half_mode);
- op1 = gen_reg_rtx (half_mode);
- ix86_expand_vector_init_interleave (half_mode, op0, ops,
- n >> 2);
- ix86_expand_vector_init_interleave (half_mode, op1,
- &ops [n >> 1], n >> 2);
- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
- return;
-
- case E_V64QImode:
- quarter_mode = V16QImode;
- half_mode = V32QImode;
- goto quarter;
-
- case E_V32HImode:
- quarter_mode = V8HImode;
- half_mode = V16HImode;
- goto quarter;
-
-quarter:
- n = GET_MODE_NUNITS (mode);
- for (i = 0; i < n; i++)
- ops[i] = XVECEXP (vals, 0, i);
- op0 = gen_reg_rtx (quarter_mode);
- op1 = gen_reg_rtx (quarter_mode);
- op2 = gen_reg_rtx (quarter_mode);
- op3 = gen_reg_rtx (quarter_mode);
- op4 = gen_reg_rtx (half_mode);
- op5 = gen_reg_rtx (half_mode);
- ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
- n >> 3);
- ix86_expand_vector_init_interleave (quarter_mode, op1,
- &ops [n >> 2], n >> 3);
- ix86_expand_vector_init_interleave (quarter_mode, op2,
- &ops [n >> 1], n >> 3);
- ix86_expand_vector_init_interleave (quarter_mode, op3,
- &ops [(n >> 1) | (n >> 2)], n >> 3);
- emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
- emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
- return;
-
- case E_V16QImode:
- if (!TARGET_SSE4_1)
- break;
- /* FALLTHRU */
-
- case E_V8HImode:
- if (!TARGET_SSE2)
- break;
-
- /* Don't use ix86_expand_vector_init_interleave if we can't
- move from GPR to SSE register directly. */
- if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
- break;
-
- n = GET_MODE_NUNITS (mode);
- for (i = 0; i < n; i++)
- ops[i] = XVECEXP (vals, 0, i);
- ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
- return;
-
- case E_V4HImode:
- case E_V8QImode:
- break;
-
- default:
- gcc_unreachable ();
- }
-
- {
- int i, j, n_elts, n_words, n_elt_per_word;
- machine_mode inner_mode;
- rtx words[4], shift;
-
- inner_mode = GET_MODE_INNER (mode);
- n_elts = GET_MODE_NUNITS (mode);
- n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
- n_elt_per_word = n_elts / n_words;
- shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
-
- for (i = 0; i < n_words; ++i)
- {
- rtx word = NULL_RTX;
-
- for (j = 0; j < n_elt_per_word; ++j)
- {
- rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
- elt = convert_modes (word_mode, inner_mode, elt, true);
-
- if (j == 0)
- word = elt;
- else
- {
- word = expand_simple_binop (word_mode, ASHIFT, word, shift,
- word, 1, OPTAB_LIB_WIDEN);
- word = expand_simple_binop (word_mode, IOR, word, elt,
- word, 1, OPTAB_LIB_WIDEN);
- }
- }
-
- words[i] = word;
- }
-
- if (n_words == 1)
- emit_move_insn (target, gen_lowpart (mode, words[0]));
- else if (n_words == 2)
- {
- rtx tmp = gen_reg_rtx (mode);
- emit_clobber (tmp);
- emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
- emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
- emit_move_insn (target, tmp);
- }
- else if (n_words == 4)
- {
- rtx tmp = gen_reg_rtx (V4SImode);
- gcc_assert (word_mode == SImode);
- vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
- ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
- emit_move_insn (target, gen_lowpart (mode, tmp));
- }
- else
- gcc_unreachable ();
- }
-}
-
-/* Initialize vector TARGET via VALS. Suppress the use of MMX
- instructions unless MMX_OK is true. */
-
-void
-ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
-{
- machine_mode mode = GET_MODE (target);
- machine_mode inner_mode = GET_MODE_INNER (mode);
- int n_elts = GET_MODE_NUNITS (mode);
- int n_var = 0, one_var = -1;
- bool all_same = true, all_const_zero = true;
- int i;
- rtx x;
-
- /* Handle first initialization from vector elts. */
- if (n_elts != XVECLEN (vals, 0))
- {
- rtx subtarget = target;
- x = XVECEXP (vals, 0, 0);
- gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode);
- if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts)
- {
- rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
- if (inner_mode == QImode || inner_mode == HImode)
- {
- unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode);
- mode = mode_for_vector (SImode, n_bits / 4).require ();
- inner_mode = mode_for_vector (SImode, n_bits / 8).require ();
- ops[0] = gen_lowpart (inner_mode, ops[0]);
- ops[1] = gen_lowpart (inner_mode, ops[1]);
- subtarget = gen_reg_rtx (mode);
- }
- ix86_expand_vector_init_concat (mode, subtarget, ops, 2);
- if (subtarget != target)
- emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget));
- return;
- }
- gcc_unreachable ();
- }
-
- for (i = 0; i < n_elts; ++i)
- {
- x = XVECEXP (vals, 0, i);
- if (!(CONST_SCALAR_INT_P (x)
- || CONST_DOUBLE_P (x)
- || CONST_FIXED_P (x)))
- n_var++, one_var = i;
- else if (x != CONST0_RTX (inner_mode))
- all_const_zero = false;
- if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
- all_same = false;
- }
-
- /* Constants are best loaded from the constant pool. */
- if (n_var == 0)
- {
- emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
- return;
- }
-
- /* If all values are identical, broadcast the value. */
- if (all_same
- && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
- XVECEXP (vals, 0, 0)))
- return;
-
- /* Values where only one field is non-constant are best loaded from
- the pool and overwritten via move later. */
- if (n_var == 1)
- {
- if (all_const_zero
- && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
- XVECEXP (vals, 0, one_var),
- one_var))
- return;
-
- if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
- return;
- }
-
- ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
-}
-
-void
-ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
-{
- machine_mode mode = GET_MODE (target);
- machine_mode inner_mode = GET_MODE_INNER (mode);
- machine_mode half_mode;
- bool use_vec_merge = false;
- rtx tmp;
- static rtx (*gen_extract[6][2]) (rtx, rtx)
- = {
- { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
- { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
- { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
- { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
- { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
- { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
- };
- static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
- = {
- { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
- { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
- { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
- { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
- { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
- { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
- };
- int i, j, n;
- machine_mode mmode = VOIDmode;
- rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
-
- switch (mode)
- {
- case E_V2SImode:
- use_vec_merge = TARGET_MMX_WITH_SSE && TARGET_SSE4_1;
- if (use_vec_merge)
- break;
- /* FALLTHRU */
-
- case E_V2SFmode:
- if (mmx_ok)
- {
- tmp = gen_reg_rtx (GET_MODE_INNER (mode));
- ix86_expand_vector_extract (true, tmp, target, 1 - elt);
- if (elt == 0)
- tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
- else
- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
- emit_insn (gen_rtx_SET (target, tmp));
- return;
- }
- break;
-
- case E_V2DImode:
- use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
- if (use_vec_merge)
- break;
-
- tmp = gen_reg_rtx (GET_MODE_INNER (mode));
- ix86_expand_vector_extract (false, tmp, target, 1 - elt);
- if (elt == 0)
- tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
- else
- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
- emit_insn (gen_rtx_SET (target, tmp));
- return;
-
- case E_V2DFmode:
- /* NB: For ELT == 0, use standard scalar operation patterns which
- preserve the rest of the vector for combiner:
-
- (vec_merge:V2DF
- (vec_duplicate:V2DF (reg:DF))
- (reg:V2DF)
- (const_int 1))
- */
- if (elt == 0)
- goto do_vec_merge;
-
- {
- rtx op0, op1;
-
- /* For the two element vectors, we implement a VEC_CONCAT with
- the extraction of the other element. */
-
- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
- tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
-
- if (elt == 0)
- op0 = val, op1 = tmp;
- else
- op0 = tmp, op1 = val;
-
- tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
- emit_insn (gen_rtx_SET (target, tmp));
- }
- return;
-
- case E_V4SFmode:
- use_vec_merge = TARGET_SSE4_1;
- if (use_vec_merge)
- break;
-
- switch (elt)
- {
- case 0:
- use_vec_merge = true;
- break;
-
- case 1:
- /* tmp = target = A B C D */
- tmp = copy_to_reg (target);
- /* target = A A B B */
- emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
- /* target = X A B B */
- ix86_expand_vector_set (false, target, val, 0);
- /* target = A X C D */
- emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
- const1_rtx, const0_rtx,
- GEN_INT (2+4), GEN_INT (3+4)));
- return;
-
- case 2:
- /* tmp = target = A B C D */
- tmp = copy_to_reg (target);
- /* tmp = X B C D */
- ix86_expand_vector_set (false, tmp, val, 0);
- /* target = A B X D */
- emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
- const0_rtx, const1_rtx,
- GEN_INT (0+4), GEN_INT (3+4)));
- return;
-
- case 3:
- /* tmp = target = A B C D */
- tmp = copy_to_reg (target);
- /* tmp = X B C D */
- ix86_expand_vector_set (false, tmp, val, 0);
- /* target = A B X D */
- emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
- const0_rtx, const1_rtx,
- GEN_INT (2+4), GEN_INT (0+4)));
- return;
-
- default:
- gcc_unreachable ();
- }
- break;
-
- case E_V4SImode:
- use_vec_merge = TARGET_SSE4_1;
- if (use_vec_merge)
- break;
-
- /* Element 0 handled by vec_merge below. */
- if (elt == 0)
- {
- use_vec_merge = true;
- break;
- }
-
- if (TARGET_SSE2)
- {
- /* With SSE2, use integer shuffles to swap element 0 and ELT,
- store into element 0, then shuffle them back. */
-
- rtx order[4];
-
- order[0] = GEN_INT (elt);
- order[1] = const1_rtx;
- order[2] = const2_rtx;
- order[3] = GEN_INT (3);
- order[elt] = const0_rtx;
-
- emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
- order[1], order[2], order[3]));
-
- ix86_expand_vector_set (false, target, val, 0);
-
- emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
- order[1], order[2], order[3]));
- }
- else
- {
- /* For SSE1, we have to reuse the V4SF code. */
- rtx t = gen_reg_rtx (V4SFmode);
- emit_move_insn (t, gen_lowpart (V4SFmode, target));
- ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
- emit_move_insn (target, gen_lowpart (mode, t));
- }
- return;
-
- case E_V8HImode:
- use_vec_merge = TARGET_SSE2;
- break;
- case E_V4HImode:
- use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
- break;
-
- case E_V16QImode:
- use_vec_merge = TARGET_SSE4_1;
- break;
-
- case E_V8QImode:
- use_vec_merge = TARGET_MMX_WITH_SSE && TARGET_SSE4_1;
- break;
-
- case E_V32QImode:
- half_mode = V16QImode;
- j = 0;
- n = 16;
- goto half;
-
- case E_V16HImode:
- half_mode = V8HImode;
- j = 1;
- n = 8;
- goto half;
-
- case E_V8SImode:
- half_mode = V4SImode;
- j = 2;
- n = 4;
- goto half;
-
- case E_V4DImode:
- half_mode = V2DImode;
- j = 3;
- n = 2;
- goto half;
-
- case E_V8SFmode:
- half_mode = V4SFmode;
- j = 4;
- n = 4;
- goto half;
-
- case E_V4DFmode:
- half_mode = V2DFmode;
- j = 5;
- n = 2;
- goto half;
-
-half:
- /* Compute offset. */
- i = elt / n;
- elt %= n;
-
- gcc_assert (i <= 1);
-
- /* Extract the half. */
- tmp = gen_reg_rtx (half_mode);
- emit_insn (gen_extract[j][i] (tmp, target));
-
- /* Put val in tmp at elt. */
- ix86_expand_vector_set (false, tmp, val, elt);
-
- /* Put it back. */
- emit_insn (gen_insert[j][i] (target, target, tmp));
- return;
-
- case E_V8DFmode:
- if (TARGET_AVX512F)
- {
- mmode = QImode;
- gen_blendm = gen_avx512f_blendmv8df;
- }
- break;
-
- case E_V8DImode:
- if (TARGET_AVX512F)
- {
- mmode = QImode;
- gen_blendm = gen_avx512f_blendmv8di;
- }
- break;
-
- case E_V16SFmode:
- if (TARGET_AVX512F)
- {
- mmode = HImode;
- gen_blendm = gen_avx512f_blendmv16sf;
- }
- break;
-
- case E_V16SImode:
- if (TARGET_AVX512F)
- {
- mmode = HImode;
- gen_blendm = gen_avx512f_blendmv16si;
- }
- break;
-
- case E_V32HImode:
- if (TARGET_AVX512BW)
- {
- mmode = SImode;
- gen_blendm = gen_avx512bw_blendmv32hi;
- }
- else if (TARGET_AVX512F)
- {
- half_mode = E_V8HImode;
- n = 8;
- goto quarter;
- }
- break;
-
- case E_V64QImode:
- if (TARGET_AVX512BW)
- {
- mmode = DImode;
- gen_blendm = gen_avx512bw_blendmv64qi;
- }
- else if (TARGET_AVX512F)
- {
- half_mode = E_V16QImode;
- n = 16;
- goto quarter;
- }
- break;
-
-quarter:
- /* Compute offset. */
- i = elt / n;
- elt %= n;
-
- gcc_assert (i <= 3);
-
- {
- /* Extract the quarter. */
- tmp = gen_reg_rtx (V4SImode);
- rtx tmp2 = gen_lowpart (V16SImode, target);
- rtx mask = gen_reg_rtx (QImode);
-
- emit_move_insn (mask, constm1_rtx);
- emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i),
- tmp, mask));
-
- tmp2 = gen_reg_rtx (half_mode);
- emit_move_insn (tmp2, gen_lowpart (half_mode, tmp));
- tmp = tmp2;
-
- /* Put val in tmp at elt. */
- ix86_expand_vector_set (false, tmp, val, elt);
-
- /* Put it back. */
- tmp2 = gen_reg_rtx (V16SImode);
- rtx tmp3 = gen_lowpart (V16SImode, target);
- mask = gen_reg_rtx (HImode);
- emit_move_insn (mask, constm1_rtx);
- tmp = gen_lowpart (V4SImode, tmp);
- emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i),
- tmp3, mask));
- emit_move_insn (target, gen_lowpart (mode, tmp2));
- }
- return;
-
- default:
- break;
- }
-
- if (mmode != VOIDmode)
- {
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
- /* The avx512*_blendm<mode> expanders have different operand order
- from VEC_MERGE. In VEC_MERGE, the first input operand is used for
- elements where the mask is set and second input operand otherwise,
- in {sse,avx}*_*blend* the first input operand is used for elements
- where the mask is clear and second input operand otherwise. */
- emit_insn (gen_blendm (target, target, tmp,
- force_reg (mmode,
- gen_int_mode (HOST_WIDE_INT_1U << elt,
- mmode))));
- }
- else if (use_vec_merge)
- {
-do_vec_merge:
- tmp[...]
[diff truncated at 524288 bytes]
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-10-17 23:36 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-17 23:36 [gcc r13-3346] Remove accidential commits Jeff Law
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).