From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-patches-return-385156-listarch-gcc-patches=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 22280 invoked by alias); 19 Nov 2014 17:52:06 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Received: (qmail 22268 invoked by uid 89); 19 Nov 2014 17:52:05 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-1.0 required=5.0 tests=AWL,BAYES_00,T_RP_MATCHES_RCVD autolearn=ham version=3.3.2
X-HELO: mail.theobroma-systems.com
Received: from vegas.theobroma-systems.com (HELO mail.theobroma-systems.com) (144.76.126.164) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Wed, 19 Nov 2014 17:52:04 +0000
Received: from [86.59.122.178] (port=59813 helo=android.com)	by mail.theobroma-systems.com with esmtpsa (TLS1.2:DHE_RSA_AES_256_CBC_SHA256:256)	(Exim 4.80)	(envelope-from <philipp.tomsich@theobroma-systems.com>)	id 1Xr9Q7-0006p0-Tu; Wed, 19 Nov 2014 18:52:00 +0100
From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
To: gcc-patches@gcc.gnu.org,	kyrylo.tkachov@arm.com
Cc: marcus.shawcroft@gmail.com,	benedikt.huber@theobroma-systems.com,	ksankaran@apm.com,	Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
Subject: [PATCH 1/2, AArch64, v2] Core definition for APM XGene-1 and associated cost-table.
Date: Wed, 19 Nov 2014 18:02:00 -0000
Message-Id: <1416419511-50674-1-git-send-email-philipp.tomsich@theobroma-systems.com>
In-Reply-To: <546CD687.9060707@arm.com>
References: <546CD687.9060707@arm.com>
X-SW-Source: 2014-11/txt/msg02535.txt.bz2

To keep this change separately buildable from the pipeline model,
this patch directs the APM XGene-1 to use the generic scheduling
model.

v2: Revised to document -mcpu=xgene1 in invoke.texi

---
 gcc/ChangeLog                        |   8 +++
 gcc/config/aarch64/aarch64-cores.def |   1 +
 gcc/config/aarch64/aarch64-tune.md   |   2 +-
 gcc/config/aarch64/aarch64.c         |  62 +++++++++++++++++++++
 gcc/config/arm/aarch-cost-tables.h   | 101 +++++++++++++++++++++++++++++++++++
 gcc/doc/invoke.texi                  |   3 +-
 6 files changed, 175 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2fa58ca..c9ac0d9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2014-11-19  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
+	* config/aarch64/aarch64-cores.def (xgene1): Update/add the
+	xgene1 (APM XGene-1) core definition.
+	* gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1
+	* config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1
+	* doc/invoke.texi: Document -mcpu=xgene1.
+
 2014-11-18  Maciej W. Rozycki  <macro@codesourcery.com>
 
 	* config/mips/mips.md (compression): Add `micromips32' setting.
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 312941f..e553e50 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -37,6 +37,7 @@
 AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa53)
 AARCH64_CORE("cortex-a57",  cortexa15, cortexa15, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC, cortexa57)
 AARCH64_CORE("thunderx",    thunderx,  thunderx, 8,  AARCH64_FL_FPSIMD | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx)
+AARCH64_CORE("xgene1",      xgene1,    xgene1,    8,  AARCH64_FL_FPSIMD, xgene1)
 
 /* V8 big.LITTLE implementations.  */
 
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index c717ea8..6409082 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa53,cortexa15,thunderx,cortexa57cortexa53"
+	"cortexa53,cortexa15,thunderx,xgene1,cortexa57cortexa53"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4fec21e..9b92527 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -226,6 +226,27 @@ static const struct cpu_addrcost_table cortexa57_addrcost_table =
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
+static const struct cpu_addrcost_table xgene1_addrcost_table =
+{
+#if HAVE_DESIGNATED_INITIALIZERS
+  .addr_scale_costs =
+#endif
+    {
+      NAMED_PARAM (hi, 1),
+      NAMED_PARAM (si, 0),
+      NAMED_PARAM (di, 0),
+      NAMED_PARAM (ti, 1),
+    },
+  NAMED_PARAM (pre_modify, 1),
+  NAMED_PARAM (post_modify, 0),
+  NAMED_PARAM (register_offset, 0),
+  NAMED_PARAM (register_extend, 1),
+  NAMED_PARAM (imm_offset, 0),
+};
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
 static const struct cpu_regmove_cost generic_regmove_cost =
 {
   NAMED_PARAM (GP2GP, 1),
@@ -262,6 +283,17 @@ static const struct cpu_regmove_cost thunderx_regmove_cost =
   NAMED_PARAM (FP2FP, 4)
 };
 
+static const struct cpu_regmove_cost xgene1_regmove_cost =
+{
+  NAMED_PARAM (GP2GP, 1),
+  NAMED_PARAM (GP2FP, 8),
+  NAMED_PARAM (FP2GP, 8),
+  /* We currently do not provide direct support for TFmode Q->Q move.
+     Therefore we need to raise the cost above 2 in order to have
+     reload handle the situation.  */
+  NAMED_PARAM (FP2FP, 4)
+};
+
 /* Generic costs for vector insn classes.  */
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -302,6 +334,26 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 };
 
+/* Generic costs for vector insn classes.  */
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_vector_cost xgene1_vector_cost =
+{
+  NAMED_PARAM (scalar_stmt_cost, 1),
+  NAMED_PARAM (scalar_load_cost, 5),
+  NAMED_PARAM (scalar_store_cost, 1),
+  NAMED_PARAM (vec_stmt_cost, 2),
+  NAMED_PARAM (vec_to_scalar_cost, 4),
+  NAMED_PARAM (scalar_to_vec_cost, 4),
+  NAMED_PARAM (vec_align_load_cost, 10),
+  NAMED_PARAM (vec_unalign_load_cost, 10),
+  NAMED_PARAM (vec_unalign_store_cost, 2),
+  NAMED_PARAM (vec_store_cost, 2),
+  NAMED_PARAM (cond_taken_branch_cost, 2),
+  NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
@@ -345,6 +397,16 @@ static const struct tune_params thunderx_tunings =
   NAMED_PARAM (issue_rate, 2)
 };
 
+static const struct tune_params xgene1_tunings =
+{
+  &xgene1_extra_costs,
+  &xgene1_addrcost_table,
+  &xgene1_regmove_cost,
+  &xgene1_vector_cost,
+  NAMED_PARAM (memmov_cost, 4),
+  NAMED_PARAM (issue_rate, 4)
+};
+
 /* A processor implementing AArch64.  */
 struct processor
 {
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index adf8708..a6313d6 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -325,4 +325,105 @@ const struct cpu_cost_table cortexa57_extra_costs =
   }
 };
 
+const struct cpu_cost_table xgene1_extra_costs =
+{
+  /* ALU */
+  {
+    0,                 /* arith.  */
+    0,                 /* logical.  */
+    0,                 /* shift.  */
+    COSTS_N_INSNS (1), /* shift_reg.  */
+    COSTS_N_INSNS (1), /* arith_shift.  */
+    COSTS_N_INSNS (1), /* arith_shift_reg.  */
+    COSTS_N_INSNS (1), /* log_shift.  */
+    COSTS_N_INSNS (1), /* log_shift_reg.  */
+    COSTS_N_INSNS (1), /* extend.  */
+    0,                 /* extend_arithm.  */
+    COSTS_N_INSNS (1), /* bfi.  */
+    COSTS_N_INSNS (1), /* bfx.  */
+    0,                 /* clz.  */
+    COSTS_N_INSNS (1), /* rev.  */
+    0,                 /* non_exec.  */
+    true               /* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (4),       /* simple.  */
+      COSTS_N_INSNS (4),       /* flag_setting.  */
+      COSTS_N_INSNS (4),       /* extend.  */
+      COSTS_N_INSNS (4),       /* add.  */
+      COSTS_N_INSNS (4),       /* extend_add.  */
+      COSTS_N_INSNS (20)       /* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (5),       /* simple.  */
+      0,                       /* flag_setting (N/A).  */
+      COSTS_N_INSNS (5),       /* extend.  */
+      COSTS_N_INSNS (5),       /* add.  */
+      COSTS_N_INSNS (5),       /* extend_add.  */
+      COSTS_N_INSNS (21)       /* idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (5),         /* load.  */
+    COSTS_N_INSNS (6),         /* load_sign_extend.  */
+    COSTS_N_INSNS (5),         /* ldrd.  */
+    COSTS_N_INSNS (5),         /* ldm_1st.  */
+    1,                         /* ldm_regs_per_insn_1st.  */
+    1,                         /* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (10),        /* loadf.  */
+    COSTS_N_INSNS (10),        /* loadd.  */
+    COSTS_N_INSNS (5),         /* load_unaligned.  */
+    0,                         /* store.  */
+    0,                         /* strd.  */
+    0,                         /* stm_1st.  */
+    1,                         /* stm_regs_per_insn_1st.  */
+    1,                         /* stm_regs_per_insn_subsequent.  */
+    0,                         /* storef.  */
+    0,                         /* stored.  */
+    0,                         /* store_unaligned.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (23),      /* div.  */
+      COSTS_N_INSNS (5),       /* mult.  */
+      COSTS_N_INSNS (5),       /* mult_addsub. */
+      COSTS_N_INSNS (5),       /* fma.  */
+      COSTS_N_INSNS (5),       /* addsub.  */
+      COSTS_N_INSNS (2),       /* fpconst. */
+      COSTS_N_INSNS (3),       /* neg.  */
+      COSTS_N_INSNS (2),       /* compare.  */
+      COSTS_N_INSNS (6),       /* widen.  */
+      COSTS_N_INSNS (6),       /* narrow.  */
+      COSTS_N_INSNS (4),       /* toint.  */
+      COSTS_N_INSNS (4),       /* fromint.  */
+      COSTS_N_INSNS (4)        /* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (29),      /* div.  */
+      COSTS_N_INSNS (5),       /* mult.  */
+      COSTS_N_INSNS (5),       /* mult_addsub.  */
+      COSTS_N_INSNS (5),       /* fma.  */
+      COSTS_N_INSNS (5),       /* addsub.  */
+      COSTS_N_INSNS (3),       /* fpconst.  */
+      COSTS_N_INSNS (3),       /* neg.  */
+      COSTS_N_INSNS (2),       /* compare.  */
+      COSTS_N_INSNS (6),       /* widen.  */
+      COSTS_N_INSNS (6),       /* narrow.  */
+      COSTS_N_INSNS (4),       /* toint.  */
+      COSTS_N_INSNS (4),       /* fromint.  */
+      COSTS_N_INSNS (4)        /* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)  /* alu.  */
+  }
+};
+
 #endif /* GCC_AARCH_COST_TABLES_H */
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 89edddb..10f4716bb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11935,7 +11935,8 @@ architecture.
 @opindex mtune
 Specify the name of the target processor for which GCC should tune the
 performance of the code.  Permissible values for this option are:
-@samp{generic}, @samp{cortex-a53}, @samp{cortex-a57}, @samp{thunderx}.
+@samp{generic}, @samp{cortex-a53}, @samp{cortex-a57}, @samp{thunderx},
+@samp{xgene1}.
 
 Additionally, this option can specify that GCC should tune the performance
 of the code for a big.LITTLE system.  The only permissible value is
-- 
1.9.1