public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][AArch64] Add branch-cost to cpu tuning information.
@ 2015-04-21 14:00 Matthew Wahab
  2015-05-01  9:18 ` Marcus Shawcroft
  0 siblings, 1 reply; 4+ messages in thread
From: Matthew Wahab @ 2015-04-21 14:00 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 983 bytes --]

The AArch64 backend sets BRANCH_COST to be the constant value 2 for all cpus,
meaning that the compiler thinks that branches cost the same across all cpus.

This patch reworks the handling of branch costs to allow per-cpu values to be
set. The actual value of the branch-costs is unchanged as the correct values for
will need to be decided for each core.

Tested aarch64-none-linux-gnu with gcc-check.

Ok for trunk?
Matthew

2015-05-21  Matthew Wahab  <matthew.wahab@arm.com>

	* gcc/config/aarch64-protos.h (struct cpu_branch_cost): New.
         (tune_params): Add field branch_costs.
	(aarch64_branch_cost): Declare.
	* gcc/config/aarch64.c (generic_branch_cost): New.
	(generic_tunings): Set field cpu_branch_cost to generic_branch_cost.
	(cortexa53_tunings): Likewise.
	(cortexa57_tunings): Likewise.
	(thunderx_tunings): Likewise.
	(xgene1_tunings): Likewise.
	(aarch64_branch_cost): Define.
	* gcc/config/aarch64/aarch64.h (BRANCH_COST): Redefine.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: percpu_branchcost.patch --]
[-- Type: text/x-patch; name=percpu_branchcost.patch, Size: 4244 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 8676c5c..77b01fa 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -162,12 +162,20 @@ struct cpu_vector_cost
   const int cond_not_taken_branch_cost;  /* Cost of not taken branch.  */
 };
 
+/* Branch costs.  */
+struct cpu_branch_cost
+{
+  const int predictable;    /* Predictable branch or optimizing for size.  */
+  const int unpredictable;  /* Unpredictable branch or optimizing for speed.  */
+};
+
 struct tune_params
 {
   const struct cpu_cost_table *const insn_extra_cost;
   const struct cpu_addrcost_table *const addr_cost;
   const struct cpu_regmove_cost *const regmove_cost;
   const struct cpu_vector_cost *const vec_costs;
+  const struct cpu_branch_cost *const branch_costs;
   const int memmov_cost;
   const int issue_rate;
   const unsigned int fuseable_ops;
@@ -259,6 +267,8 @@ void aarch64_print_operand (FILE *, rtx, char);
 void aarch64_print_operand_address (FILE *, rtx);
 void aarch64_emit_call_insn (rtx);
 
+int aarch64_branch_cost (bool, bool);
+
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 77a641e..a020316 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -339,12 +339,20 @@ static const struct cpu_vector_cost xgene1_vector_cost =
 #define AARCH64_FUSE_ADRP_LDR	(1 << 3)
 #define AARCH64_FUSE_CMP_BRANCH	(1 << 4)
 
+/* Generic costs for branch instructions.  */
+static const struct cpu_branch_cost generic_branch_cost =
+{
+  2,  /* Predictable.  */
+  2   /* Unpredictable.  */
+};
+
 static const struct tune_params generic_tunings =
 {
   &cortexa57_extra_costs,
   &generic_addrcost_table,
   &generic_regmove_cost,
   &generic_vector_cost,
+  &generic_branch_cost,
   4, /* memmov_cost  */
   2, /* issue_rate  */
   AARCH64_FUSE_NOTHING, /* fuseable_ops  */
@@ -362,6 +370,7 @@ static const struct tune_params cortexa53_tunings =
   &generic_addrcost_table,
   &cortexa53_regmove_cost,
   &generic_vector_cost,
+  &generic_branch_cost,
   4, /* memmov_cost  */
   2, /* issue_rate  */
   (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -380,6 +389,7 @@ static const struct tune_params cortexa57_tunings =
   &cortexa57_addrcost_table,
   &cortexa57_regmove_cost,
   &cortexa57_vector_cost,
+  &generic_branch_cost,
   4, /* memmov_cost  */
   3, /* issue_rate  */
   (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -398,6 +408,7 @@ static const struct tune_params thunderx_tunings =
   &generic_addrcost_table,
   &thunderx_regmove_cost,
   &generic_vector_cost,
+  &generic_branch_cost,
   6, /* memmov_cost  */
   2, /* issue_rate  */
   AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops  */
@@ -415,6 +426,7 @@ static const struct tune_params xgene1_tunings =
   &xgene1_addrcost_table,
   &xgene1_regmove_cost,
   &xgene1_vector_cost,
+  &generic_branch_cost,
   6, /* memmov_cost  */
   4, /* issue_rate  */
   AARCH64_FUSE_NOTHING, /* fuseable_ops  */
@@ -5361,6 +5373,19 @@ aarch64_address_cost (rtx x,
   return cost;
 }
 
+int
+aarch64_branch_cost (bool speed_p, bool predictable_p)
+{
+  /* When optimizing for speed, use the cost of unpredictable branches.  */
+  const struct cpu_branch_cost *branch_costs =
+    aarch64_tune_params->branch_costs;
+
+  if (!speed_p || predictable_p)
+    return branch_costs->predictable;
+  else
+    return branch_costs->unpredictable;
+}
+
 /* Return true if the RTX X in mode MODE is a zero or sign extract
    usable in an ADD or SUB (extended register) instruction.  */
 static bool
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index bf59e40..93a32f5 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -823,7 +823,8 @@ do {									     \
 #define TRAMPOLINE_SECTION text_section
 
 /* To start with.  */
-#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2
+#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \
+  (aarch64_branch_cost (SPEED_P, PREDICTABLE_P))
 \f
 
 /* Assembly output.  */

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64] Add branch-cost to cpu tuning information.
  2015-04-21 14:00 [PATCH][AArch64] Add branch-cost to cpu tuning information Matthew Wahab
@ 2015-05-01  9:18 ` Marcus Shawcroft
  2015-05-05 10:01   ` Matthew Wahab
  0 siblings, 1 reply; 4+ messages in thread
From: Marcus Shawcroft @ 2015-05-01  9:18 UTC (permalink / raw)
  To: Matthew Wahab; +Cc: gcc-patches

On 21 April 2015 at 15:00, Matthew Wahab <matthew.wahab@arm.com> wrote:


> 2015-05-21  Matthew Wahab  <matthew.wahab@arm.com>
>
>         * gcc/config/aarch64-protos.h (struct cpu_branch_cost): New.
>         (tune_params): Add field branch_costs.
>         (aarch64_branch_cost): Declare.
>         * gcc/config/aarch64.c (generic_branch_cost): New.
>         (generic_tunings): Set field cpu_branch_cost to generic_branch_cost.
>         (cortexa53_tunings): Likewise.
>         (cortexa57_tunings): Likewise.
>         (thunderx_tunings): Likewise.
>         (xgene1_tunings): Likewise.
>         (aarch64_branch_cost): Define.
>         * gcc/config/aarch64/aarch64.h (BRANCH_COST): Redefine.
>

+int aarch64_branch_cost (bool, bool);
+

You would never guess looking at this .h today, but long ago there was
something close to alphabetical order by function name in place.
Please lift this definition between aarch64_bitmask_imm and
aarch64_classify_symbolic_expression.

+int
+aarch64_branch_cost (bool speed_p, bool predictable_p)
+{

Add an appropriate comment before the function please.

Cheers
/Marcus

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64] Add branch-cost to cpu tuning information.
  2015-05-01  9:18 ` Marcus Shawcroft
@ 2015-05-05 10:01   ` Matthew Wahab
  2015-05-05 11:00     ` Marcus Shawcroft
  0 siblings, 1 reply; 4+ messages in thread
From: Matthew Wahab @ 2015-05-05 10:01 UTC (permalink / raw)
  To: Marcus Shawcroft; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1340 bytes --]

On 01/05/15 10:18, Marcus Shawcroft wrote:
> On 21 April 2015 at 15:00, Matthew Wahab <matthew.wahab@arm.com> wrote:
>
> +int aarch64_branch_cost (bool, bool);
> +
>
> You would never guess looking at this .h today, but long ago there was
> something close to alphabetical order by function name in place.
> Please lift this definition between aarch64_bitmask_imm and
> aarch64_classify_symbolic_expression.
>
> +int
> +aarch64_branch_cost (bool speed_p, bool predictable_p)
> +{
>
> Add an appropriate comment before the function please.

Attached reworked patch:

- Moved declaration of aarch64_branch_cost to after aarch64_bitmask_imm.
- Added comment before definition of aarch64_branch_cost.

Tested aarch64-none-linux-gnu with gcc-check.

Ok for trunk?
Matthew

2015-05-05  Matthew Wahab  <matthew.wahab@arm.com>

	* gcc/config/aarch64-protos.h (struct cpu_branch_cost): New.
         (tune_params): Add field branch_costs.
	(aarch64_branch_cost): Declare.
	* gcc/config/aarch64.c (generic_branch_cost): New.
	(generic_tunings): Set field cpu_branch_cost to generic_branch_cost.
	(cortexa53_tunings): Likewise.
	(cortexa57_tunings): Likewise.
	(thunderx_tunings): Likewise.
	(xgene1_tunings): Likewise.
	(aarch64_branch_cost): Define.
	* gcc/config/aarch64/aarch64.h (BRANCH_COST): Redefine.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: percpu_branchcost_v2.patch --]
[-- Type: text/x-patch; name=percpu_branchcost_v2.patch, Size: 4546 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 08ce5f1..931c8b8 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -162,12 +162,20 @@ struct cpu_vector_cost
   const int cond_not_taken_branch_cost;  /* Cost of not taken branch.  */
 };
 
+/* Branch costs.  */
+struct cpu_branch_cost
+{
+  const int predictable;    /* Predictable branch or optimizing for size.  */
+  const int unpredictable;  /* Unpredictable branch or optimizing for speed.  */
+};
+
 struct tune_params
 {
   const struct cpu_cost_table *const insn_extra_cost;
   const struct cpu_addrcost_table *const addr_cost;
   const struct cpu_regmove_cost *const regmove_cost;
   const struct cpu_vector_cost *const vec_costs;
+  const struct cpu_branch_cost *const branch_costs;
   const int memmov_cost;
   const int issue_rate;
   const unsigned int fuseable_ops;
@@ -184,6 +192,7 @@ struct tune_params
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
 int aarch64_get_condition_code (rtx);
 bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+int aarch64_branch_cost (bool, bool);
 enum aarch64_symbol_type
 aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
 bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 374b0a9..7bc28ae 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -340,12 +340,20 @@ static const struct cpu_vector_cost xgene1_vector_cost =
 #define AARCH64_FUSE_ADRP_LDR	(1 << 3)
 #define AARCH64_FUSE_CMP_BRANCH	(1 << 4)
 
+/* Generic costs for branch instructions.  */
+static const struct cpu_branch_cost generic_branch_cost =
+{
+  2,  /* Predictable.  */
+  2   /* Unpredictable.  */
+};
+
 static const struct tune_params generic_tunings =
 {
   &cortexa57_extra_costs,
   &generic_addrcost_table,
   &generic_regmove_cost,
   &generic_vector_cost,
+  &generic_branch_cost,
   4, /* memmov_cost  */
   2, /* issue_rate  */
   AARCH64_FUSE_NOTHING, /* fuseable_ops  */
@@ -365,6 +373,7 @@ static const struct tune_params cortexa53_tunings =
   &generic_addrcost_table,
   &cortexa53_regmove_cost,
   &generic_vector_cost,
+  &generic_branch_cost,
   4, /* memmov_cost  */
   2, /* issue_rate  */
   (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -385,6 +394,7 @@ static const struct tune_params cortexa57_tunings =
   &cortexa57_addrcost_table,
   &cortexa57_regmove_cost,
   &cortexa57_vector_cost,
+  &generic_branch_cost,
   4, /* memmov_cost  */
   3, /* issue_rate  */
   (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -405,6 +415,7 @@ static const struct tune_params thunderx_tunings =
   &generic_addrcost_table,
   &thunderx_regmove_cost,
   &generic_vector_cost,
+  &generic_branch_cost,
   6, /* memmov_cost  */
   2, /* issue_rate  */
   AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops  */
@@ -424,6 +435,7 @@ static const struct tune_params xgene1_tunings =
   &xgene1_addrcost_table,
   &xgene1_regmove_cost,
   &xgene1_vector_cost,
+  &generic_branch_cost,
   6, /* memmov_cost  */
   4, /* issue_rate  */
   AARCH64_FUSE_NOTHING, /* fuseable_ops  */
@@ -5409,6 +5421,23 @@ aarch64_address_cost (rtx x,
   return cost;
 }
 
+/* Return the cost of a branch.  If SPEED_P is true then the compiler is
+   optimizing for speed.  If PREDICTABLE_P is true then the branch is predicted
+   to be taken.  */
+
+int
+aarch64_branch_cost (bool speed_p, bool predictable_p)
+{
+  /* When optimizing for speed, use the cost of unpredictable branches.  */
+  const struct cpu_branch_cost *branch_costs =
+    aarch64_tune_params->branch_costs;
+
+  if (!speed_p || predictable_p)
+    return branch_costs->predictable;
+  else
+    return branch_costs->unpredictable;
+}
+
 /* Return true if the RTX X in mode MODE is a zero or sign extract
    usable in an ADD or SUB (extended register) instruction.  */
 static bool
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 3fd1b3f..c85d279 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -827,7 +827,8 @@ do {									     \
 #define TRAMPOLINE_SECTION text_section
 
 /* To start with.  */
-#define BRANCH_COST(SPEED_P, PREDICTABLE_P) 2
+#define BRANCH_COST(SPEED_P, PREDICTABLE_P) \
+  (aarch64_branch_cost (SPEED_P, PREDICTABLE_P))
 \f
 
 /* Assembly output.  */

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64] Add branch-cost to cpu tuning information.
  2015-05-05 10:01   ` Matthew Wahab
@ 2015-05-05 11:00     ` Marcus Shawcroft
  0 siblings, 0 replies; 4+ messages in thread
From: Marcus Shawcroft @ 2015-05-05 11:00 UTC (permalink / raw)
  To: Matthew Wahab; +Cc: gcc-patches

On 5 May 2015 at 11:00, Matthew Wahab <matthew.wahab@arm.com> wrote:

> 2015-05-05  Matthew Wahab  <matthew.wahab@arm.com>
>
>
>         * gcc/config/aarch64-protos.h (struct cpu_branch_cost): New.
>         (tune_params): Add field branch_costs.
>         (aarch64_branch_cost): Declare.
>         * gcc/config/aarch64.c (generic_branch_cost): New.
>         (generic_tunings): Set field cpu_branch_cost to generic_branch_cost.
>         (cortexa53_tunings): Likewise.
>         (cortexa57_tunings): Likewise.
>         (thunderx_tunings): Likewise.
>         (xgene1_tunings): Likewise.
>         (aarch64_branch_cost): Define.
>         * gcc/config/aarch64/aarch64.h (BRANCH_COST): Redefine.
>

OK /Marcus

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-05-05 11:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-21 14:00 [PATCH][AArch64] Add branch-cost to cpu tuning information Matthew Wahab
2015-05-01  9:18 ` Marcus Shawcroft
2015-05-05 10:01   ` Matthew Wahab
2015-05-05 11:00     ` Marcus Shawcroft

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).