public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch][x86] skylake costs
@ 2017-11-17  9:19 Koval, Julia
  2017-11-17  9:46 ` Uros Bizjak
  0 siblings, 1 reply; 2+ messages in thread
From: Koval, Julia @ 2017-11-17  9:19 UTC (permalink / raw)
  To: GCC Patches; +Cc: Kirill Yukhin, Uros Bizjak

[-- Attachment #1: Type: text/plain, Size: 281 bytes --]

Hi, this patch introduces separate cost model for skylake-avx512. Ok for trunk?

gcc/
	* config/i386/i386.c (processor_target_table): Add skylake_cost for
	skylake-avx512.
	* config/i386/x86-tune-costs.h (skylake_memcpy, skylake_memset,
	skylake_cost): New.
Thanks,
Julia

[-- Attachment #2: 0001-cost-model.patch --]
[-- Type: application/octet-stream, Size: 5593 bytes --]

From 3d7caa64117ce44f2bfa995bc76b238d58ed818b Mon Sep 17 00:00:00 2001
From: "julia.koval" <jkoval@gkliclel211.igk.intel.com>
Date: Thu, 16 Nov 2017 17:15:57 +0100
Subject: [PATCH] cost-model

---
 gcc/config/i386/i386.c           |  2 +-
 gcc/config/i386/x86-tune-costs.h | 89 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c9580ba..18fb5ca 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -853,7 +853,7 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
   {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
   {"knl", &slm_cost, 16, 15, 16, 7, 16},
   {"knm", &slm_cost, 16, 15, 16, 7, 16},
-  {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
+  {"skylake-avx512", &skylake_cost, 16, 10, 16, 10, 16},
   {"intel", &intel_cost, 16, 15, 16, 7, 16},
   {"geode", &geode_cost, 0, 0, 0, 0, 0},
   {"k6", &k6_cost, 32, 7, 32, 7, 32},
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index c7ac70e..75a5906 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1515,6 +1515,95 @@ struct processor_costs znver1_cost = {
   COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
 };
 
+/* skylake_cost should produce code tuned for Skylake familly of CPUs.  */
+static stringop_algs skylake_memcpy[2] =   {
+  {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
+  {libcall, {{16, loop, false}, {512, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+
+static stringop_algs skylake_memset[2] = {
+  {libcall, {{6, loop_1_byte, true},
+             {24, loop, true},
+             {8192, rep_prefix_4_byte, true},
+             {-1, libcall, false}}},
+  {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+
+static const
+struct processor_costs skylake_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1)+1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (4)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (8),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (8),			/*			    HI */
+   COSTS_N_INSNS (11),			/*			    SI */
+   COSTS_N_INSNS (76),			/*			    DI */
+   COSTS_N_INSNS (76)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (0),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+
+  6,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {6, 6, 6},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {6, 6, 8},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 10},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {6, 6},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {6, 6},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
+  {6, 6, 6, 10, 20},			/* cost of loading SSE registers
+					   in 32,64,128,256 and 512-bit */
+  {6, 6, 6, 10, 20},			/* cost of unaligned loads.  */
+  {8, 8, 8, 8, 16},			/* cost of storing SSE registers
+					   in 32,64,128,256 and 512-bit */
+  {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
+  2, 2,					/* SSE->integer and integer->SSE moves */
+  20, 8,				/* Gather load static, per_elt.  */
+  22, 10,				/* Gather store static, per_elt.  */
+  64,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FSQRT instruction.  */
+
+  COSTS_N_INSNS (1),			/* cost of cheap SSE instruction.  */
+  COSTS_N_INSNS (4),			/* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_INSNS (4),			/* cost of MULSS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of MULSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of FMA SS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of FMA SD instruction.  */
+  COSTS_N_INSNS (11),			/* cost of DIVSS instruction.  */
+  COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
+  COSTS_N_INSNS (12),			/* cost of SQRTSS instruction.  */
+  COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
+  skylake_memcpy,
+  skylake_memset,
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
+};
   /* BTVER1 has optimized REP instruction for medium sized blocks, but for
      very small blocks it is better to use loop. For large blocks, libcall can
      do nontemporary accesses and beat inline considerably.  */
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [patch][x86] skylake costs
  2017-11-17  9:19 [patch][x86] skylake costs Koval, Julia
@ 2017-11-17  9:46 ` Uros Bizjak
  0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2017-11-17  9:46 UTC (permalink / raw)
  To: Koval, Julia; +Cc: GCC Patches, Kirill Yukhin

On Fri, Nov 17, 2017 at 10:18 AM, Koval, Julia <julia.koval@intel.com> wrote:
> Hi, this patch introduces separate cost model for skylake-avx512. Ok for trunk?
>
> gcc/
>         * config/i386/i386.c (processor_target_table): Add skylake_cost for
>         skylake-avx512.
>         * config/i386/x86-tune-costs.h (skylake_memcpy, skylake_memset,
>         skylake_cost): New.

LGTM.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2017-11-17  9:29 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-17  9:19 [patch][x86] skylake costs Koval, Julia
2017-11-17  9:46 ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).