* [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable
@ 2015-03-03 18:06 Wilco Dijkstra
2015-03-03 18:34 ` Andrew Pinski
0 siblings, 1 reply; 9+ messages in thread
From: Wilco Dijkstra @ 2015-03-03 18:06 UTC (permalink / raw)
To: GCC Patches
This patch makes aarch64_min_divisions_for_recip_mul configurable for float and double. This allows
CPUs with really fast or multiple dividers to return 3 (or even 4) if that happens to be faster
overall. No code generation change - bootstrap & regression OK.
ChangeLog:
2015-03-03 Wilco Dijkstra <wdijkstr@arm.com>
* gcc/config/aarch64/aarch64-protos.h (tune_params):
Add min_div_recip_mul_sf and min_div_recip_mul_df fields.
* gcc/config/aarch64/aarch64.c (aarch64_min_divisions_for_recip_mul):
Return value depending on target.
(generic_tunings): Initialize new target settings.
(cortexa53_tunings): Likewise.
(cortexa57_tunings): Likewise.
(thunderx_tunings): Likewise.
(xgene1_tunings): Likewise.
---
gcc/config/aarch64/aarch64-protos.h | 2 ++
gcc/config/aarch64/aarch64.c | 26 +++++++++++++++++++-------
2 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 59c5824..4331e5c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -177,6 +177,8 @@ struct tune_params
const int int_reassoc_width;
const int fp_reassoc_width;
const int vec_reassoc_width;
+ const int min_div_recip_mul_sf;
+ const int min_div_recip_mul_df;
};
HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e22d72e..42a96f6 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -353,7 +353,9 @@ static const struct tune_params generic_tunings =
4, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1 /* vec_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2 /* min_div_recip_mul_df. */
};
static const struct tune_params cortexa53_tunings =
@@ -371,7 +373,9 @@ static const struct tune_params cortexa53_tunings =
4, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1 /* vec_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2 /* min_div_recip_mul_df. */
};
static const struct tune_params cortexa57_tunings =
@@ -389,7 +393,9 @@ static const struct tune_params cortexa57_tunings =
4, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1 /* vec_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2 /* min_div_recip_mul_df. */
};
static const struct tune_params thunderx_tunings =
@@ -406,7 +412,9 @@ static const struct tune_params thunderx_tunings =
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1 /* vec_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2 /* min_div_recip_mul_df. */
};
static const struct tune_params xgene1_tunings =
@@ -423,7 +431,9 @@ static const struct tune_params xgene1_tunings =
16, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1 /* vec_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2 /* min_div_recip_mul_df. */
};
/* A processor implementing AArch64. */
@@ -512,9 +522,11 @@ static const char * const aarch64_condition_codes[] =
};
static unsigned int
-aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
+aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
{
- return 2;
+ if (GET_MODE_UNIT_SIZE (mode) == 4)
+ return aarch64_tune_params->min_div_recip_mul_sf;
+ return aarch64_tune_params->min_div_recip_mul_df;
}
static int
--
1.9.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable
2015-03-03 18:06 [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable Wilco Dijkstra
@ 2015-03-03 18:34 ` Andrew Pinski
2015-03-03 19:08 ` Wilco Dijkstra
0 siblings, 1 reply; 9+ messages in thread
From: Andrew Pinski @ 2015-03-03 18:34 UTC (permalink / raw)
To: Wilco Dijkstra; +Cc: GCC Patches
On Tue, Mar 3, 2015 at 10:06 AM, Wilco Dijkstra <wdijkstr@arm.com> wrote:
> This patch makes aarch64_min_divisions_for_recip_mul configurable for float and double. This allows
> CPUs with really fast or multiple dividers to return 3 (or even 4) if that happens to be faster
> overall. No code generation change - bootstrap & regression OK.
Are you planing on doing the optimization where you turn the divide
into recip est followed by a few steps?
Because if so then this should be changed to be handle that case too.
Thanks,
Andrew
>
> ChangeLog:
> 2015-03-03 Wilco Dijkstra <wdijkstr@arm.com>
>
> * gcc/config/aarch64/aarch64-protos.h (tune_params):
> Add min_div_recip_mul_sf and min_div_recip_mul_df fields.
> * gcc/config/aarch64/aarch64.c (aarch64_min_divisions_for_recip_mul):
> Return value depending on target.
> (generic_tunings): Initialize new target settings.
> (cortexa53_tunings): Likewise.
> (cortexa57_tunings): Likewise.
> (thunderx_tunings): Likewise.
> (xgene1_tunings): Likewise.
>
> ---
> gcc/config/aarch64/aarch64-protos.h | 2 ++
> gcc/config/aarch64/aarch64.c | 26 +++++++++++++++++++-------
> 2 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 59c5824..4331e5c 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -177,6 +177,8 @@ struct tune_params
> const int int_reassoc_width;
> const int fp_reassoc_width;
> const int vec_reassoc_width;
> + const int min_div_recip_mul_sf;
> + const int min_div_recip_mul_df;
> };
>
> HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index e22d72e..42a96f6 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -353,7 +353,9 @@ static const struct tune_params generic_tunings =
> 4, /* loop_align. */
> 2, /* int_reassoc_width. */
> 4, /* fp_reassoc_width. */
> - 1 /* vec_reassoc_width. */
> + 1, /* vec_reassoc_width. */
> + 2, /* min_div_recip_mul_sf. */
> + 2 /* min_div_recip_mul_df. */
> };
>
> static const struct tune_params cortexa53_tunings =
> @@ -371,7 +373,9 @@ static const struct tune_params cortexa53_tunings =
> 4, /* loop_align. */
> 2, /* int_reassoc_width. */
> 4, /* fp_reassoc_width. */
> - 1 /* vec_reassoc_width. */
> + 1, /* vec_reassoc_width. */
> + 2, /* min_div_recip_mul_sf. */
> + 2 /* min_div_recip_mul_df. */
> };
>
> static const struct tune_params cortexa57_tunings =
> @@ -389,7 +393,9 @@ static const struct tune_params cortexa57_tunings =
> 4, /* loop_align. */
> 2, /* int_reassoc_width. */
> 4, /* fp_reassoc_width. */
> - 1 /* vec_reassoc_width. */
> + 1, /* vec_reassoc_width. */
> + 2, /* min_div_recip_mul_sf. */
> + 2 /* min_div_recip_mul_df. */
> };
>
> static const struct tune_params thunderx_tunings =
> @@ -406,7 +412,9 @@ static const struct tune_params thunderx_tunings =
> 8, /* loop_align. */
> 2, /* int_reassoc_width. */
> 4, /* fp_reassoc_width. */
> - 1 /* vec_reassoc_width. */
> + 1, /* vec_reassoc_width. */
> + 2, /* min_div_recip_mul_sf. */
> + 2 /* min_div_recip_mul_df. */
> };
>
> static const struct tune_params xgene1_tunings =
> @@ -423,7 +431,9 @@ static const struct tune_params xgene1_tunings =
> 16, /* loop_align. */
> 2, /* int_reassoc_width. */
> 4, /* fp_reassoc_width. */
> - 1 /* vec_reassoc_width. */
> + 1, /* vec_reassoc_width. */
> + 2, /* min_div_recip_mul_sf. */
> + 2 /* min_div_recip_mul_df. */
> };
>
> /* A processor implementing AArch64. */
> @@ -512,9 +522,11 @@ static const char * const aarch64_condition_codes[] =
> };
>
> static unsigned int
> -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
> +aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
> {
> - return 2;
> + if (GET_MODE_UNIT_SIZE (mode) == 4)
> + return aarch64_tune_params->min_div_recip_mul_sf;
> + return aarch64_tune_params->min_div_recip_mul_df;
> }
>
> static int
> --
> 1.9.1
>
>
>
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable
2015-03-03 18:34 ` Andrew Pinski
@ 2015-03-03 19:08 ` Wilco Dijkstra
0 siblings, 0 replies; 9+ messages in thread
From: Wilco Dijkstra @ 2015-03-03 19:08 UTC (permalink / raw)
To: 'Andrew Pinski'; +Cc: GCC Patches
> Andrew Pinski wrote:
> On Tue, Mar 3, 2015 at 10:06 AM, Wilco Dijkstra <wdijkstr@arm.com> wrote:
> > This patch makes aarch64_min_divisions_for_recip_mul configurable for float and double. This
> allows
> > CPUs with really fast or multiple dividers to return 3 (or even 4) if that happens to be
> faster
> > overall. No code generation change - bootstrap & regression OK.
>
> Are you planing on doing the optimization where you turn the divide
> into recip est followed by a few steps?
> Because if so then this should be changed to be handle that case too.
No I don't think that will be faster.
Wilco
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable
2015-05-01 13:12 ` Wilco Dijkstra
@ 2015-05-01 13:20 ` Kyrill Tkachov
0 siblings, 0 replies; 9+ messages in thread
From: Kyrill Tkachov @ 2015-05-01 13:20 UTC (permalink / raw)
To: Wilco Dijkstra; +Cc: GCC Patches
On 01/05/15 14:11, Wilco Dijkstra wrote:
>> Marcus Shawcroft wrote:
>> On 1 May 2015 at 12:26, Wilco Dijkstra <wdijkstr@arm.com> wrote:
>>>
>>>> Marcus Shawcroft wrote:
>>>> On 27 April 2015 at 14:43, Wilco Dijkstra <wdijkstr@arm.com> wrote:
>>>>
>>>>>> static unsigned int
>>>>>> -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
>>>>>> +aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
>>>>>> {
>>>>>> - return 2;
>>>>>> + if (GET_MODE_UNIT_SIZE (mode) == 4)
>>>>>> + return aarch64_tune_params->min_div_recip_mul_sf;
>>>>>> + return aarch64_tune_params->min_div_recip_mul_df;
>>>> This should be expressed directly as mode == SFmode (or DFmode) rather
>>>> than the indirect approach of first computing the size first.
>>> Can we never see vector types at this point?
>> Fair point, curiously we don't appear to see them, but I see no reason
>> why we should not. Commit your patch as proposed.
>>
>> Cheers /Marcus
> And this one please.
Done with r222679.
Kyrill
>
> Wilco
>
> 2015-05-01 Wilco Dijkstra <wdijkstr@arm.com>
>
> * gcc/config/aarch64/aarch64-protos.h (tune_params):
> Add min_div_recip_mul_sf and min_div_recip_mul_df fields.
> * gcc/config/aarch64/aarch64.c (aarch64_min_divisions_for_recip_mul):
> Return value depending on target.
> (generic_tunings): Initialize new target settings.
> (cortexa53_tunings): Likewise.
> (cortexa57_tunings): Likewise.
> (thunderx_tunings): Likewise.
> (xgene1_tunings): Likewise.
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable
2015-05-01 12:17 ` Marcus Shawcroft
@ 2015-05-01 13:12 ` Wilco Dijkstra
2015-05-01 13:20 ` Kyrill Tkachov
0 siblings, 1 reply; 9+ messages in thread
From: Wilco Dijkstra @ 2015-05-01 13:12 UTC (permalink / raw)
To: Kyrylo Tkachov; +Cc: GCC Patches
[-- Attachment #1: Type: text/plain, Size: 1463 bytes --]
> Marcus Shawcroft wrote:
> On 1 May 2015 at 12:26, Wilco Dijkstra <wdijkstr@arm.com> wrote:
> >
> >
> >> Marcus Shawcroft wrote:
> >> On 27 April 2015 at 14:43, Wilco Dijkstra <wdijkstr@arm.com> wrote:
> >>
> >> >> static unsigned int
> >> >> -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
> >> >> +aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
> >> >> {
> >> >> - return 2;
> >> >> + if (GET_MODE_UNIT_SIZE (mode) == 4)
> >> >> + return aarch64_tune_params->min_div_recip_mul_sf;
> >> >> + return aarch64_tune_params->min_div_recip_mul_df;
> >>
> >> This should be expressed directly as mode == SFmode (or DFmode) rather
> >> than the indirect approach of first computing the size first.
> >
> > Can we never see vector types at this point?
>
> Fair point, curiously we don't appear to see them, but I see no reason
> why we should not. Commit your patch as proposed.
>
> Cheers /Marcus
And this one please.
Wilco
2015-05-01 Wilco Dijkstra <wdijkstr@arm.com>
* gcc/config/aarch64/aarch64-protos.h (tune_params):
Add min_div_recip_mul_sf and min_div_recip_mul_df fields.
* gcc/config/aarch64/aarch64.c (aarch64_min_divisions_for_recip_mul):
Return value depending on target.
(generic_tunings): Initialize new target settings.
(cortexa53_tunings): Likewise.
(cortexa57_tunings): Likewise.
(thunderx_tunings): Likewise.
(xgene1_tunings): Likewise.
[-- Attachment #2: 0001-Make-aarch64_min_divisions_for_recip_mul-configurabl.txt --]
[-- Type: text/plain, Size: 3089 bytes --]
---
gcc/config/aarch64/aarch64-protos.h | 2 ++
gcc/config/aarch64/aarch64.c | 26 +++++++++++++++++++-------
2 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 59c5824..4331e5c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -177,6 +177,8 @@ struct tune_params
const int int_reassoc_width;
const int fp_reassoc_width;
const int vec_reassoc_width;
+ const int min_div_recip_mul_sf;
+ const int min_div_recip_mul_df;
};
HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e22d72e..42a96f6 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -353,7 +353,9 @@ static const struct tune_params generic_tunings =
4, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1 /* vec_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2 /* min_div_recip_mul_df. */
};
static const struct tune_params cortexa53_tunings =
@@ -371,7 +373,9 @@ static const struct tune_params cortexa53_tunings =
4, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1 /* vec_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2 /* min_div_recip_mul_df. */
};
static const struct tune_params cortexa57_tunings =
@@ -389,7 +393,9 @@ static const struct tune_params cortexa57_tunings =
4, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1 /* vec_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2 /* min_div_recip_mul_df. */
};
static const struct tune_params thunderx_tunings =
@@ -406,7 +412,9 @@ static const struct tune_params thunderx_tunings =
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1 /* vec_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2 /* min_div_recip_mul_df. */
};
static const struct tune_params xgene1_tunings =
@@ -423,7 +431,9 @@ static const struct tune_params xgene1_tunings =
16, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1 /* vec_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2 /* min_div_recip_mul_df. */
};
/* A processor implementing AArch64. */
@@ -512,9 +522,11 @@ static const char * const aarch64_condition_codes[] =
};
static unsigned int
-aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
+aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
{
- return 2;
+ if (GET_MODE_UNIT_SIZE (mode) == 4)
+ return aarch64_tune_params->min_div_recip_mul_sf;
+ return aarch64_tune_params->min_div_recip_mul_df;
}
static int
--
1.9.1
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable
2015-05-01 11:26 ` Wilco Dijkstra
@ 2015-05-01 12:17 ` Marcus Shawcroft
2015-05-01 13:12 ` Wilco Dijkstra
0 siblings, 1 reply; 9+ messages in thread
From: Marcus Shawcroft @ 2015-05-01 12:17 UTC (permalink / raw)
To: Wilco Dijkstra; +Cc: GCC Patches
On 1 May 2015 at 12:26, Wilco Dijkstra <wdijkstr@arm.com> wrote:
>
>
>> Marcus Shawcroft wrote:
>> On 27 April 2015 at 14:43, Wilco Dijkstra <wdijkstr@arm.com> wrote:
>>
>> >> static unsigned int
>> >> -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
>> >> +aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
>> >> {
>> >> - return 2;
>> >> + if (GET_MODE_UNIT_SIZE (mode) == 4)
>> >> + return aarch64_tune_params->min_div_recip_mul_sf;
>> >> + return aarch64_tune_params->min_div_recip_mul_df;
>>
>> This should be expressed directly as mode == SFmode (or DFmode) rather
>> than the indirect approach of first computing the size first.
>
> Can we never see vector types at this point?
Fair point, curiously we don't appear to see them, but I see no reason
why we should not. Commit your patch as proposed.
Cheers /Marcus
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable
2015-05-01 7:44 ` Marcus Shawcroft
@ 2015-05-01 11:26 ` Wilco Dijkstra
2015-05-01 12:17 ` Marcus Shawcroft
0 siblings, 1 reply; 9+ messages in thread
From: Wilco Dijkstra @ 2015-05-01 11:26 UTC (permalink / raw)
To: 'Marcus Shawcroft'; +Cc: GCC Patches
> Marcus Shawcroft wrote:
> On 27 April 2015 at 14:43, Wilco Dijkstra <wdijkstr@arm.com> wrote:
>
> >> static unsigned int
> >> -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
> >> +aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
> >> {
> >> - return 2;
> >> + if (GET_MODE_UNIT_SIZE (mode) == 4)
> >> + return aarch64_tune_params->min_div_recip_mul_sf;
> >> + return aarch64_tune_params->min_div_recip_mul_df;
>
> This should be expressed directly as mode == SFmode (or DFmode) rather
> than the indirect approach of first computing the size first.
Can we never see vector types at this point?
Wilco
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable
2015-04-27 13:43 Wilco Dijkstra
@ 2015-05-01 7:44 ` Marcus Shawcroft
2015-05-01 11:26 ` Wilco Dijkstra
0 siblings, 1 reply; 9+ messages in thread
From: Marcus Shawcroft @ 2015-05-01 7:44 UTC (permalink / raw)
To: Wilco Dijkstra; +Cc: GCC Patches
On 27 April 2015 at 14:43, Wilco Dijkstra <wdijkstr@arm.com> wrote:
>> static unsigned int
>> -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
>> +aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
>> {
>> - return 2;
>> + if (GET_MODE_UNIT_SIZE (mode) == 4)
>> + return aarch64_tune_params->min_div_recip_mul_sf;
>> + return aarch64_tune_params->min_div_recip_mul_df;
This should be expressed directly as mode == SFmode (or DFmode) rather
than the indirect approach of first computing the size first.
/Marcus
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable
@ 2015-04-27 13:43 Wilco Dijkstra
2015-05-01 7:44 ` Marcus Shawcroft
0 siblings, 1 reply; 9+ messages in thread
From: Wilco Dijkstra @ 2015-04-27 13:43 UTC (permalink / raw)
To: 'GCC Patches'
ping
> -----Original Message-----
> From: Wilco Dijkstra [mailto:wdijkstr@arm.com]
> Sent: 03 March 2015 18:06
> To: GCC Patches
> Subject: [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable
>
> This patch makes aarch64_min_divisions_for_recip_mul configurable for float and double. This
> allows CPUs with really fast or multiple dividers to return 3 (or even 4) if that happens to
> be faster overall. No code generation change - bootstrap & regression OK.
>
> ChangeLog:
> 2015-03-03 Wilco Dijkstra <wdijkstr@arm.com>
>
> * gcc/config/aarch64/aarch64-protos.h (tune_params):
> Add min_div_recip_mul_sf and min_div_recip_mul_df fields.
> * gcc/config/aarch64/aarch64.c (aarch64_min_divisions_for_recip_mul):
> Return value depending on target.
> (generic_tunings): Initialize new target settings.
> (cortexa53_tunings): Likewise.
> (cortexa57_tunings): Likewise.
> (thunderx_tunings): Likewise.
> (xgene1_tunings): Likewise.
>
> ---
> gcc/config/aarch64/aarch64-protos.h | 2 ++
> gcc/config/aarch64/aarch64.c | 26 +++++++++++++++++++-------
> 2 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 59c5824..4331e5c 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -177,6 +177,8 @@ struct tune_params
> const int int_reassoc_width;
> const int fp_reassoc_width;
> const int vec_reassoc_width;
> + const int min_div_recip_mul_sf;
> + const int min_div_recip_mul_df;
> };
>
> HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index e22d72e..42a96f6 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -353,7 +353,9 @@ static const struct tune_params generic_tunings =
> 4, /* loop_align. */
> 2, /* int_reassoc_width. */
> 4, /* fp_reassoc_width. */
> - 1 /* vec_reassoc_width. */
> + 1, /* vec_reassoc_width. */
> + 2, /* min_div_recip_mul_sf. */
> + 2 /* min_div_recip_mul_df. */
> };
>
> static const struct tune_params cortexa53_tunings =
> @@ -371,7 +373,9 @@ static const struct tune_params cortexa53_tunings =
> 4, /* loop_align. */
> 2, /* int_reassoc_width. */
> 4, /* fp_reassoc_width. */
> - 1 /* vec_reassoc_width. */
> + 1, /* vec_reassoc_width. */
> + 2, /* min_div_recip_mul_sf. */
> + 2 /* min_div_recip_mul_df. */
> };
>
> static const struct tune_params cortexa57_tunings =
> @@ -389,7 +393,9 @@ static const struct tune_params cortexa57_tunings =
> 4, /* loop_align. */
> 2, /* int_reassoc_width. */
> 4, /* fp_reassoc_width. */
> - 1 /* vec_reassoc_width. */
> + 1, /* vec_reassoc_width. */
> + 2, /* min_div_recip_mul_sf. */
> + 2 /* min_div_recip_mul_df. */
> };
>
> static const struct tune_params thunderx_tunings =
> @@ -406,7 +412,9 @@ static const struct tune_params thunderx_tunings =
> 8, /* loop_align. */
> 2, /* int_reassoc_width. */
> 4, /* fp_reassoc_width. */
> - 1 /* vec_reassoc_width. */
> + 1, /* vec_reassoc_width. */
> + 2, /* min_div_recip_mul_sf. */
> + 2 /* min_div_recip_mul_df. */
> };
>
> static const struct tune_params xgene1_tunings =
> @@ -423,7 +431,9 @@ static const struct tune_params xgene1_tunings =
> 16, /* loop_align. */
> 2, /* int_reassoc_width. */
> 4, /* fp_reassoc_width. */
> - 1 /* vec_reassoc_width. */
> + 1, /* vec_reassoc_width. */
> + 2, /* min_div_recip_mul_sf. */
> + 2 /* min_div_recip_mul_df. */
> };
>
> /* A processor implementing AArch64. */
> @@ -512,9 +522,11 @@ static const char * const aarch64_condition_codes[] =
> };
>
> static unsigned int
> -aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
> +aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
> {
> - return 2;
> + if (GET_MODE_UNIT_SIZE (mode) == 4)
> + return aarch64_tune_params->min_div_recip_mul_sf;
> + return aarch64_tune_params->min_div_recip_mul_df;
> }
>
> static int
> --
> 1.9.1
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2015-05-01 13:20 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-03 18:06 [PATCH][AArch64] Make aarch64_min_divisions_for_recip_mul configurable Wilco Dijkstra
2015-03-03 18:34 ` Andrew Pinski
2015-03-03 19:08 ` Wilco Dijkstra
2015-04-27 13:43 Wilco Dijkstra
2015-05-01 7:44 ` Marcus Shawcroft
2015-05-01 11:26 ` Wilco Dijkstra
2015-05-01 12:17 ` Marcus Shawcroft
2015-05-01 13:12 ` Wilco Dijkstra
2015-05-01 13:20 ` Kyrill Tkachov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).