From: Uros Bizjak <ubizjak@gmail.com>
To: Ilya Tocar <tocarip.intel@gmail.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>
Subject: Re: [PATCH x86] Add march/mtune=knl
Date: Wed, 10 Dec 2014 16:35:00 -0000 [thread overview]
Message-ID: <CAFULd4YUcmmkDJ2UxmYPwUQ=AOq9CNQ833b7YLjo+KfPxVw_8w@mail.gmail.com> (raw)
In-Reply-To: <20141210162047.GA128205@msticlxl7.ims.intel.com>
On Wed, Dec 10, 2014 at 5:20 PM, Ilya Tocar <tocarip.intel@gmail.com> wrote:
> Hi,
>
> Patch bellow adds march/mtune/attribute=knl.
> For now this is just silvermont tuning and avx/avx2/avx512 support.
> Ok for trunk?
>
> gcc/
> * config.gcc: Support "knl".
> * config/i386/driver-i386.c (host_detect_local_cpu): Detect "knl".
> * config/i386/i386-c.c (ix86_target_macros_internal): Handle
> PROCESSOR_KNL.
> * config/i386/i386.c (m_KNL): Define.
> (processor_target_table): Add "knl".
> (PTA_KNL): Define.
> (ix86_issue_rate): Add PROCESSOR_KNL.
> (ix86_adjust_cost): Ditto.
> (ia32_multipass_dfa_lookahead): Ditto.
> (get_builtin_code_for_version): Handle "knl".
> (fold_builtin_cpu): Ditto.
> * config/i386/i386.h (TARGET_KNL): Define.
> (processor_type): Add PROCESSOR_KNL.
> * config/i386/i386.md (attr "cpu"): Add knl.
> * config/i386/x86-tune.def: Add m_KNL.
>
> gcc/testsuite/
> * gcc.target/i386/funcspec-5.c: Test avx512f and knl.
OK with a small comment nit below.
Thanks,
Uros.
>
> ---
> gcc/config.gcc | 3 +-
> gcc/config/i386/driver-i386.c | 6 +++-
> gcc/config/i386/i386-c.c | 7 +++++
> gcc/config/i386/i386.c | 17 ++++++++++-
> gcc/config/i386/i386.h | 2 ++
> gcc/config/i386/i386.md | 2 +-
> gcc/config/i386/x86-tune.def | 47 +++++++++++++++---------------
> gcc/testsuite/gcc.target/i386/funcspec-5.c | 3 ++
> 8 files changed, 60 insertions(+), 27 deletions(-)
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index fa3e1fc..8541274 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -591,7 +591,8 @@ pentium4 pentium4m pentiumpro prescott"
> x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \
> bdver3 bdver4 btver1 btver2 k8 k8-sse3 opteron opteron-sse3 nocona \
> core2 corei7 corei7-avx core-avx-i core-avx2 atom slm nehalem westmere \
> -sandybridge ivybridge haswell broadwell bonnell silvermont x86-64 native"
> +sandybridge ivybridge haswell broadwell bonnell silvermont knl x86-64 \
> +native"
>
> # Additional x86 processors supported by --with-cpu=. Each processor
> # MUST be separated by exactly one space.
> diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
> index a2248ce..69ebebd 100644
> --- a/gcc/config/i386/driver-i386.c
> +++ b/gcc/config/i386/driver-i386.c
> @@ -747,7 +747,11 @@ const char *host_detect_local_cpu (int argc, const char **argv)
> if (arch)
> {
> /* This is unknown family 0x6 CPU. */
> - if (has_adx)
> + /* Assume Knl. */
/* Assume Knights Landing. */
> + if (has_avx512f)
> + cpu = "knl";
> + /* Assume Broadwell. */
> + else if (has_adx)
> cpu = "broadwell";
> else if (has_avx2)
> /* Assume Haswell. */
> diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
> index 3ad7d49..1c604fc3 100644
> --- a/gcc/config/i386/i386-c.c
> +++ b/gcc/config/i386/i386-c.c
> @@ -171,6 +171,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
> def_or_undef (parse_in, "__silvermont");
> def_or_undef (parse_in, "__silvermont__");
> break;
> + case PROCESSOR_KNL:
> + def_or_undef (parse_in, "__knl");
> + def_or_undef (parse_in, "__knl__");
> + break;
> /* use PROCESSOR_max to not set/unset the arch macro. */
> case PROCESSOR_max:
> break;
> @@ -277,6 +281,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
> def_or_undef (parse_in, "__tune_slm__");
> def_or_undef (parse_in, "__tune_silvermont__");
> break;
> + case PROCESSOR_KNL:
> + def_or_undef (parse_in, "__tune_knl__");
> + break;
> case PROCESSOR_INTEL:
> case PROCESSOR_GENERIC:
> break;
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 1e1716e..f0cbe48 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -2040,6 +2040,7 @@ const struct processor_costs *ix86_cost = &pentium_cost;
> #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
> #define m_BONNELL (1<<PROCESSOR_BONNELL)
> #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
> +#define m_KNL (1<<PROCESSOR_KNL)
> #define m_INTEL (1<<PROCESSOR_INTEL)
>
> #define m_GEODE (1<<PROCESSOR_GEODE)
> @@ -2505,6 +2506,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
> {"haswell", &core_cost, 16, 10, 16, 10, 16},
> {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
> {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
> + {"knl", &slm_cost, 16, 15, 16, 7, 16},
> {"intel", &intel_cost, 16, 15, 16, 7, 16},
> {"geode", &geode_cost, 0, 0, 0, 0, 0},
> {"k6", &k6_cost, 32, 7, 32, 7, 32},
> @@ -3178,6 +3180,8 @@ ix86_option_override_internal (bool main_args_p,
> | PTA_FMA | PTA_MOVBE | PTA_HLE)
> #define PTA_BROADWELL \
> (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
> +#define PTA_KNL \
> + (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
> #define PTA_BONNELL \
> (PTA_CORE2 | PTA_MOVBE)
> #define PTA_SILVERMONT \
> @@ -3241,6 +3245,7 @@ ix86_option_override_internal (bool main_args_p,
> {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
> {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
> {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
> + {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
> {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
> {"geode", PROCESSOR_GEODE, CPU_GEODE,
> PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
> @@ -25934,6 +25939,7 @@ ix86_issue_rate (void)
> case PROCESSOR_PENTIUM:
> case PROCESSOR_BONNELL:
> case PROCESSOR_SILVERMONT:
> + case PROCESSOR_KNL:
> case PROCESSOR_INTEL:
> case PROCESSOR_K6:
> case PROCESSOR_BTVER2:
> @@ -26276,6 +26282,7 @@ ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
> break;
>
> case PROCESSOR_SILVERMONT:
> + case PROCESSOR_KNL:
> case PROCESSOR_INTEL:
> if (!reload_completed)
> return cost;
> @@ -26345,6 +26352,7 @@ ia32_multipass_dfa_lookahead (void)
> case PROCESSOR_HASWELL:
> case PROCESSOR_BONNELL:
> case PROCESSOR_SILVERMONT:
> + case PROCESSOR_KNL:
> case PROCESSOR_INTEL:
> /* Generally, we want haifa-sched:max_issue() to look ahead as far
> as many instructions can be executed on a cycle, i.e.,
> @@ -34246,7 +34254,8 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
> P_PROC_FMA,
> P_AVX2,
> P_PROC_AVX2,
> - P_AVX512F
> + P_AVX512F,
> + P_PROC_AVX512F
> };
>
> enum feature_priority priority = P_ZERO;
> @@ -34350,6 +34359,10 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
> arg_str = "bonnell";
> priority = P_PROC_SSSE3;
> break;
> + case PROCESSOR_KNL:
> + arg_str = "knl";
> + priority = P_PROC_AVX512F;
> + break;
> case PROCESSOR_SILVERMONT:
> arg_str = "silvermont";
> priority = P_PROC_SSE4_2;
> @@ -35268,6 +35281,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
> M_AMDFAM10H,
> M_AMDFAM15H,
> M_INTEL_SILVERMONT,
> + M_INTEL_KNL,
> M_AMD_BTVER1,
> M_AMD_BTVER2,
> M_CPU_SUBTYPE_START,
> @@ -35305,6 +35319,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
> {"haswell", M_INTEL_COREI7_HASWELL},
> {"bonnell", M_INTEL_BONNELL},
> {"silvermont", M_INTEL_SILVERMONT},
> + {"knl", M_INTEL_KNL},
> {"amdfam10h", M_AMDFAM10H},
> {"barcelona", M_AMDFAM10H_BARCELONA},
> {"shanghai", M_AMDFAM10H_SHANGHAI},
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index df7789d..7c35758 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -337,6 +337,7 @@ extern const struct processor_costs ix86_size_cost;
> #define TARGET_HASWELL (ix86_tune == PROCESSOR_HASWELL)
> #define TARGET_BONNELL (ix86_tune == PROCESSOR_BONNELL)
> #define TARGET_SILVERMONT (ix86_tune == PROCESSOR_SILVERMONT)
> +#define TARGET_KNL (ix86_tune == PROCESSOR_KNL)
> #define TARGET_INTEL (ix86_tune == PROCESSOR_INTEL)
> #define TARGET_GENERIC (ix86_tune == PROCESSOR_GENERIC)
> #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
> @@ -2272,6 +2273,7 @@ enum processor_type
> PROCESSOR_HASWELL,
> PROCESSOR_BONNELL,
> PROCESSOR_SILVERMONT,
> + PROCESSOR_KNL,
> PROCESSOR_INTEL,
> PROCESSOR_GEODE,
> PROCESSOR_K6,
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 9019ed8..7ae511c 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -399,7 +399,7 @@
> ;; Processor type.
> (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
> atom,slm,generic,amdfam10,bdver1,bdver2,bdver3,bdver4,
> - btver2"
> + btver2,knl"
> (const (symbol_ref "ix86_schedule")))
>
> ;; A basic instruction type. Refinements due to arguments to be
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index b5c6e4f..db43b3d 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -41,7 +41,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> /* X86_TUNE_SCHEDULE: Enable scheduling. */
> DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
> m_PENT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
> - | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
> + | m_KNL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC)
>
> /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming
> on modern chips. Preffer stores affecting whole integer register
> @@ -49,7 +49,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
> value over movb. */
> DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
> m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
> - | m_AMD_MULTIPLE | m_GENERIC)
> + | m_KNL | m_AMD_MULTIPLE | m_GENERIC)
>
> /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
> destinations to be 128bit to allow register renaming on 128bit SSE units,
> @@ -85,13 +85,13 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
> partial dependencies. */
> DEF_TUNE (X86_TUNE_MOVX, "movx",
> m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
> - | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)
> + | m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC)
>
> /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
> full sized loads. */
> DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
> m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
> - | m_AMD_MULTIPLE | m_GENERIC)
> + | m_KNL | m_AMD_MULTIPLE | m_GENERIC)
>
> /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
> conditional jump instruction for 32 bit TARGET.
> @@ -125,7 +125,7 @@ DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
> /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
> during reassociation of fp computation. */
> DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
> - m_BONNELL | m_SILVERMONT | m_HASWELL | m_INTEL | m_BDVER1
> + m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL |m_INTEL | m_BDVER1
> | m_BDVER2 | m_GENERIC)
>
> /*****************************************************************************/
> @@ -145,7 +145,7 @@ DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
> regression on mgrid due to IRA limitation leading to unecessary
> use of the frame pointer in 32bit mode. */
> DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
> - m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL
> + m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
> | m_ATHLON_K8)
>
> /* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are
> @@ -205,7 +205,7 @@ DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
> /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
> than 4 branch instructions in the 16 byte window. */
> DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
> - m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL |
> + m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL |m_INTEL |
> m_ATHLON_K8 | m_AMDFAM10)
>
> /*****************************************************************************/
> @@ -229,21 +229,22 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_PPRO))
> /* X86_TUNE_USE_INCDEC: Enable use of inc/dec instructions. */
> DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
> ~(m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
> - | m_GENERIC))
> + | m_KNL | m_GENERIC))
>
> /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
> for DFmode copies */
> DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
> ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
> - | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
> + | m_KNL | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GENERIC))
>
> /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
> will impact LEA instruction selection. */
> -DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_INTEL)
> +DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL
> + | m_INTEL)
>
> /* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */
> DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr",
> - m_BONNELL | m_SILVERMONT)
> + m_BONNELL | m_SILVERMONT | m_KNL)
>
> /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
> vector path on AMD machines.
> @@ -260,7 +261,7 @@ DEF_TUNE (X86_TUNE_SLOW_IMUL_IMM8, "slow_imul_imm8",
> /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
> a conditional move. */
> DEF_TUNE (X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, "avoid_mem_opnd_for_cmove",
> - m_BONNELL | m_SILVERMONT | m_INTEL)
> + m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL)
>
> /* X86_TUNE_SINGLE_STRINGOP: Enable use of single string operations, such
> as MOVS and STOS (without a REP prefix) to move/set sequences of bytes. */
> @@ -278,17 +279,17 @@ DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
> /* X86_TUNE_USE_SAHF: Controls use of SAHF. */
> DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
> m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
> - | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER
> - | m_GENERIC)
> + | m_KNL | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
> + | m_BTVER | m_GENERIC)
>
> /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */
> DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
> - ~(m_PENT | m_BONNELL | m_SILVERMONT | m_INTEL | m_K6))
> + ~(m_PENT | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL | m_K6))
>
> /* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */
> DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
> - m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL | m_AMD_MULTIPLE
> - | m_GENERIC)
> + m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL
> + | m_AMD_MULTIPLE | m_GENERIC)
>
> /*****************************************************************************/
> /* 387 instruction selection tuning */
> @@ -304,7 +305,7 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
> integer operand. */
> DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
> ~(m_PENT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
> - | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
> + | m_KNL | m_INTEL | m_AMD_MULTIPLE | m_GENERIC))
>
> /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */
> DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
> @@ -312,7 +313,7 @@ DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE)
> /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */
> DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
> m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
> - | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
> + | m_KNL | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC)
>
> /*****************************************************************************/
> /* SSE instruction selection tuning */
> @@ -331,13 +332,13 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
> of a sequence loading registers by parts. */
> DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
> m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_AMDFAM10 | m_BDVER
> - | m_BTVER | m_SILVERMONT | m_INTEL | m_GENERIC)
> + | m_BTVER | m_SILVERMONT | m_KNL | m_INTEL | m_GENERIC)
>
> /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead
> of a sequence loading registers by parts. */
> DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
> m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_SILVERMONT
> - | m_INTEL | m_GENERIC)
> + | m_KNL | m_INTEL | m_GENERIC)
>
> /* Use packed single precision instructions where posisble. I.e. movups instead
> of movupd. */
> @@ -374,7 +375,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
> /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
> fp converts to destination register. */
> DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",
> - m_SILVERMONT | m_INTEL)
> + m_SILVERMONT | m_KNL | m_INTEL)
>
> /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
> from FP to FP. This form of instructions avoids partial write to the
> @@ -388,7 +389,7 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
>
> /* X86_TUNE_SLOW_SHUFB: Indicates tunings with slow pshufb instruction. */
> DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
> - m_BONNELL | m_SILVERMONT | m_INTEL)
> + m_BONNELL | m_SILVERMONT | m_KNL | m_INTEL)
>
> /* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to
> execute 2 or more vector instructions in parallel. */
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c b/gcc/testsuite/gcc.target/i386/funcspec-5.c
> index 0acfe00..269e610 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c
> @@ -24,6 +24,7 @@ extern void test_ssse3 (void) __attribute__((__target__("ssse3")));
> extern void test_tbm (void) __attribute__((__target__("tbm")));
> extern void test_avx (void) __attribute__((__target__("avx")));
> extern void test_avx2 (void) __attribute__((__target__("avx2")));
> +extern void test_avx512 (void) __attribute__((__target__("avx512")));
>
> extern void test_no_abm (void) __attribute__((__target__("no-abm")));
> extern void test_no_aes (void) __attribute__((__target__("no-aes")));
> @@ -46,6 +47,7 @@ extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3")));
> extern void test_no_tbm (void) __attribute__((__target__("no-tbm")));
> extern void test_no_avx (void) __attribute__((__target__("no-avx")));
> extern void test_no_avx2 (void) __attribute__((__target__("no-avx2")));
> +extern void test_no_avx512 (void) __attribute__((__target__("no-avx512")));
>
> extern void test_arch_i386 (void) __attribute__((__target__("arch=i386")));
> extern void test_arch_i486 (void) __attribute__((__target__("arch=i486")));
> @@ -70,6 +72,7 @@ extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
> extern void test_arch_corei7 (void) __attribute__((__target__("arch=corei7")));
> extern void test_arch_corei7_avx (void) __attribute__((__target__("arch=corei7-avx")));
> extern void test_arch_core_avx2 (void) __attribute__((__target__("arch=core-avx2")));
> +extern void test_arch_knl (void) __attribute__((__target__("arch=knl")));
> extern void test_arch_geode (void) __attribute__((__target__("arch=geode")));
> extern void test_arch_k6 (void) __attribute__((__target__("arch=k6")));
> extern void test_arch_k6_2 (void) __attribute__((__target__("arch=k6-2")));
> --
> 1.8.3.1
>
next prev parent reply other threads:[~2014-12-10 16:35 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-12-10 16:21 Ilya Tocar
2014-12-10 16:35 ` Uros Bizjak [this message]
2015-01-24 11:53 ` Tom de Vries
2015-01-24 12:13 ` Uros Bizjak
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAFULd4YUcmmkDJ2UxmYPwUQ=AOq9CNQ833b7YLjo+KfPxVw_8w@mail.gmail.com' \
--to=ubizjak@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=tocarip.intel@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).