From: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
To: GCC Patches <gcc-patches@gcc.gnu.org>
Cc: Richard Sandiford <Richard.Sandiford@arm.com>
Subject: [PATCH] AArch64: Add fma_reassoc_width [PR107413]
Date: Wed, 9 Nov 2022 12:40:05 +0000 [thread overview]
Message-ID: <PAWPR08MB89824348B31E96B4F6432A3C833E9@PAWPR08MB8982.eurprd08.prod.outlook.com> (raw)
Add a reassocation width for FMAs in per-CPU tuning structures. Keep the
existing setting for cores with 2 FMA pipes, and use 4 for cores with 4
FMA pipes. This improves SPECFP2017 on Neoverse V1 by ~1.5%.
Passes regress/bootstrap, OK for commit?
gcc/
PR 107413
* config/aarch64/aarch64.cc (struct tune_params): Add
fma_reassoc_width to all CPU tuning structures.
* config/aarch64/aarch64-protos.h (struct tune_params): Add
fma_reassoc_width.
---
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index a73bfa20acb9b92ae0475794c3f11c67d22feb97..71365a446007c26b906b61ca8b2a68ee06c83037 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -540,6 +540,7 @@ struct tune_params
const char *loop_align;
int int_reassoc_width;
int fp_reassoc_width;
+ int fma_reassoc_width;
int vec_reassoc_width;
int min_div_recip_mul_sf;
int min_div_recip_mul_df;
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 798363bcc449c414de5bbb4f26b8e1c64a0cf71a..643162cdecd6a8fe5587164cb2d0d62b709a491d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -1346,6 +1346,7 @@ static const struct tune_params generic_tunings =
"8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1382,6 +1383,7 @@ static const struct tune_params cortexa35_tunings =
"8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1415,6 +1417,7 @@ static const struct tune_params cortexa53_tunings =
"8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1448,6 +1451,7 @@ static const struct tune_params cortexa57_tunings =
"8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1481,6 +1485,7 @@ static const struct tune_params cortexa72_tunings =
"8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1514,6 +1519,7 @@ static const struct tune_params cortexa73_tunings =
"8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1548,6 +1554,7 @@ static const struct tune_params exynosm1_tunings =
"4", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1580,6 +1587,7 @@ static const struct tune_params thunderxt88_tunings =
"8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1612,6 +1620,7 @@ static const struct tune_params thunderx_tunings =
"8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1646,6 +1655,7 @@ static const struct tune_params tsv110_tunings =
"8", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1678,6 +1688,7 @@ static const struct tune_params xgene1_tunings =
"16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1710,6 +1721,7 @@ static const struct tune_params emag_tunings =
"16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1743,6 +1755,7 @@ static const struct tune_params qdf24xx_tunings =
"16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1778,6 +1791,7 @@ static const struct tune_params saphira_tunings =
"16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1811,6 +1825,7 @@ static const struct tune_params thunderx2t99_tunings =
"16", /* loop_align. */
3, /* int_reassoc_width. */
2, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1844,6 +1859,7 @@ static const struct tune_params thunderx3t110_tunings =
"16", /* loop_align. */
3, /* int_reassoc_width. */
2, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1876,6 +1892,7 @@ static const struct tune_params neoversen1_tunings =
"32:16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -1912,6 +1929,7 @@ static const struct tune_params ampere1_tunings =
"32:16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -2089,6 +2107,7 @@ static const struct tune_params neoversev1_tunings =
"32:16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 4, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -2226,6 +2245,7 @@ static const struct tune_params neoverse512tvb_tunings =
"32:16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 4, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -2414,6 +2434,7 @@ static const struct tune_params neoversen2_tunings =
"32:16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -2603,6 +2624,7 @@ static const struct tune_params neoversev2_tunings =
"32:16", /* loop_align. */
3, /* int_reassoc_width. */
6, /* fp_reassoc_width. */
+ 4, /* fma_reassoc_width. */
3, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -2638,6 +2660,7 @@ static const struct tune_params a64fx_tunings =
"32", /* loop_align. */
4, /* int_reassoc_width. */
2, /* fp_reassoc_width. */
+ 1, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
@@ -3350,9 +3373,10 @@ aarch64_reassociation_width (unsigned opc, machine_mode mode)
return aarch64_tune_params.vec_reassoc_width;
if (INTEGRAL_MODE_P (mode))
return aarch64_tune_params.int_reassoc_width;
- /* Avoid reassociating floating point addition so we emit more FMAs. */
- if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
- return aarch64_tune_params.fp_reassoc_width;
+ /* FMA's can have a different reassociation width. */
+ if (FLOAT_MODE_P (mode))
+ return opc == PLUS_EXPR ? aarch64_tune_params.fma_reassoc_width
+ : aarch64_tune_params.fp_reassoc_width;
return 1;
}
next reply other threads:[~2022-11-09 12:40 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-09 12:40 Wilco Dijkstra [this message]
2022-11-22 7:58 ` Richard Sandiford
2022-11-22 8:28 ` Richard Biener
2022-11-22 10:35 ` Wilco Dijkstra
2022-11-22 14:13 ` Richard Sandiford
2022-11-23 18:10 ` Wilco Dijkstra
2022-11-24 7:06 ` Richard Sandiford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=PAWPR08MB89824348B31E96B4F6432A3C833E9@PAWPR08MB8982.eurprd08.prod.outlook.com \
--to=wilco.dijkstra@arm.com \
--cc=Richard.Sandiford@arm.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).