public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] aarch64: add 'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA'
@ 2023-12-27 10:40 Di Zhao OS
  2023-12-29 10:23 ` Richard Sandiford
  0 siblings, 1 reply; 3+ messages in thread
From: Di Zhao OS @ 2023-12-27 10:40 UTC (permalink / raw)
  To: gcc-patches

This patch adds a new tuning option 'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA',
to consider fully pipelined FMAs in reassociation. Also, set this option
by default for Ampere CPUs.

Tested on aarch64-unknown-linux-gnu. Is this OK for trunk?

Thanks,
Di Zhao

gcc/ChangeLog:

	* config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNING_OPTION):
	New tuning option AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA.
	* config/aarch64/aarch64.cc (aarch64_override_options_internal): Set
	param_fully_pipelined_fma according to tuning option.
	* config/aarch64/tuning_models/ampere1.h: Add
	AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA to tune_flags.
	* config/aarch64/tuning_models/ampere1a.h: Likewise.
	* config/aarch64/tuning_models/ampere1b.h: Likewise.

---
 gcc/config/aarch64/aarch64-tuning-flags.def | 2 ++
 gcc/config/aarch64/aarch64.cc               | 6 ++++++
 gcc/config/aarch64/tuning_models/ampere1.h  | 3 ++-
 gcc/config/aarch64/tuning_models/ampere1a.h | 3 ++-
 gcc/config/aarch64/tuning_models/ampere1b.h | 3 ++-
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index f28a73839a6..256f17bad60 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -49,4 +49,6 @@ AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGH
 
 AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
 
+AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_FMA", FULLY_PIPELINED_FMA)
+
 #undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f9850320f61..1b3b288cdf9 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18289,6 +18289,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
     SET_OPTION_IF_UNSET (opts, &global_options_set, param_avoid_fma_max_bits,
 			 512);
 
+  /* Consider fully pipelined FMA in reassociation.  */
+  if (aarch64_tune_params.extra_tuning_flags
+      & AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA)
+    SET_OPTION_IF_UNSET (opts, &global_options_set, param_fully_pipelined_fma,
+			 1);
+
   aarch64_override_options_after_change_1 (opts);
 }
 
diff --git a/gcc/config/aarch64/tuning_models/ampere1.h b/gcc/config/aarch64/tuning_models/ampere1.h
index a144e8f94b3..d63788528a7 100644
--- a/gcc/config/aarch64/tuning_models/ampere1.h
+++ b/gcc/config/aarch64/tuning_models/ampere1.h
@@ -104,7 +104,8 @@ static const struct tune_params ampere1_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA |
+   AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA),	/* tune_flags.  */
   &ampere1_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/ampere1a.h b/gcc/config/aarch64/tuning_models/ampere1a.h
index f688ed08a79..63506e1d1c6 100644
--- a/gcc/config/aarch64/tuning_models/ampere1a.h
+++ b/gcc/config/aarch64/tuning_models/ampere1a.h
@@ -56,7 +56,8 @@ static const struct tune_params ampere1a_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA |
+   AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA),	/* tune_flags.  */
   &ampere1_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h b/gcc/config/aarch64/tuning_models/ampere1b.h
index a98b6a980f7..7894e730174 100644
--- a/gcc/config/aarch64/tuning_models/ampere1b.h
+++ b/gcc/config/aarch64/tuning_models/ampere1b.h
@@ -106,7 +106,8 @@ static const struct tune_params ampere1b_tunings =
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_STRONG,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND |
-   AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),	/* tune_flags.  */
+   AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA |
+   AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA),	/* tune_flags.  */
   &ampere1b_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
-- 
2.25.1



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-01-03  6:53 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-27 10:40 [PATCH] aarch64: add 'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA' Di Zhao OS
2023-12-29 10:23 ` Richard Sandiford
2024-01-03  6:53   ` Di Zhao OS

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).