* Re: [PATCH] target/108738 - limit STV chain discovery
[not found] <6400a47b.5d0a0220.153aa.9f41SMTPIN_ADDED_MISSING@mx.google.com>
@ 2023-03-03 7:03 ` Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2023-03-03 7:03 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches, Jan Hubicka
On Thu, Mar 2, 2023 at 2:28 PM Richard Biener <rguenther@suse.de> wrote:
>
> The following puts a hard limit on the inherently quadratic STV chain
> discovery. Without a limit for the compiler.i testcase in PR26854
> we see at -O2
>
> machine dep reorg : 574.45 ( 53%)
>
> with release checking while with the proposed limit it's
>
> machine dep reorg : 2.86 ( 1%)
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?
>
> Thanks,
> Richard.
>
> PR target/108738
> * config/i386/i386.opt (--param x86-stv-max-visits): New param.
> * doc/invoke.texi (--param x86-stv-max-visits): Document it.
> * config/i386/i386-features.h (scalar_chain::max_visits): New.
> (scalar_chain::build): Add bitmap parameter, return boolean.
> (scalar_chain::add_insn): Likewise.
> (scalar_chain::analyze_register_chain): Likewise.
> * config/i386/i386-features.cc (scalar_chain::scalar_chain):
> Initialize max_visits.
> (scalar_chain::analyze_register_chain): When we exhaust
> max_visits, abort. Also abort when running into any
> disallowed insn.
> (scalar_chain::add_insn): Propagate abort.
> (scalar_chain::build): Likewise. When aborting amend
> the set of disallowed insn with the insns set.
> (convert_scalars_to_vector): Adjust. Do not convert aborted
> chains.
LGTM.
Thanks,
Uros.
> ---
> gcc/config/i386/i386-features.cc | 77 +++++++++++++++++++++++---------
> gcc/config/i386/i386-features.h | 10 +++--
> gcc/config/i386/i386.opt | 4 ++
> gcc/doc/invoke.texi | 4 ++
> 4 files changed, 70 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
> index eff91301009..c09abf8fc20 100644
> --- a/gcc/config/i386/i386-features.cc
> +++ b/gcc/config/i386/i386-features.cc
> @@ -296,6 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
>
> n_sse_to_integer = 0;
> n_integer_to_sse = 0;
> +
> + max_visits = x86_stv_max_visits;
> }
>
> /* Free chain's data. */
> @@ -354,10 +356,12 @@ scalar_chain::mark_dual_mode_def (df_ref def)
> }
>
> /* Check REF's chain to add new insns into a queue
> - and find registers requiring conversion. */
> + and find registers requiring conversion. Return true if OK, false
> + if the analysis was aborted. */
>
> -void
> -scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
> +bool
> +scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
> + bitmap disallowed)
> {
> df_link *chain;
> bool mark_def = false;
> @@ -371,6 +375,9 @@ scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
> if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
> continue;
>
> + if (--max_visits == 0)
> + return false;
> +
> if (!DF_REF_REG_MEM_P (chain->ref))
> {
> if (bitmap_bit_p (insns, uid))
> @@ -381,6 +388,10 @@ scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
> add_to_queue (uid);
> continue;
> }
> +
> + /* If we run into parts of an aborted chain discovery abort. */
> + if (bitmap_bit_p (disallowed, uid))
> + return false;
> }
>
> if (DF_REF_REG_DEF_P (chain->ref))
> @@ -401,15 +412,19 @@ scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
>
> if (mark_def)
> mark_dual_mode_def (ref);
> +
> + return true;
> }
>
> -/* Add instruction into a chain. */
> +/* Add instruction into a chain. Return true if OK, false if the search
> + was aborted. */
>
> -void
> -scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
> +bool
> +scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
> + bitmap disallowed)
> {
> if (!bitmap_set_bit (insns, insn_uid))
> - return;
> + return true;
>
> if (dump_file)
> fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
> @@ -426,22 +441,27 @@ scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
> df_ref ref;
> for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
> if (!HARD_REGISTER_P (DF_REF_REG (ref)))
> - analyze_register_chain (candidates, ref);
> + if (!analyze_register_chain (candidates, ref, disallowed))
> + return false;
>
> /* The operand(s) of VEC_SELECT don't need to be converted/convertible. */
> if (def_set && GET_CODE (SET_SRC (def_set)) == VEC_SELECT)
> - return;
> + return true;
>
> for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
> if (!DF_REF_REG_MEM_P (ref))
> - analyze_register_chain (candidates, ref);
> + if (!analyze_register_chain (candidates, ref, disallowed))
> + return false;
> +
> + return true;
> }
>
> /* Build new chain starting from insn INSN_UID recursively
> - adding all dependent uses and definitions. */
> + adding all dependent uses and definitions. Return true if OK, false
> + if the chain discovery was aborted. */
>
> -void
> -scalar_chain::build (bitmap candidates, unsigned insn_uid)
> +bool
> +scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
> {
> queue = BITMAP_ALLOC (NULL);
> bitmap_set_bit (queue, insn_uid);
> @@ -454,7 +474,17 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid)
> insn_uid = bitmap_first_set_bit (queue);
> bitmap_clear_bit (queue, insn_uid);
> bitmap_clear_bit (candidates, insn_uid);
> - add_insn (candidates, insn_uid);
> + if (!add_insn (candidates, insn_uid, disallowed))
> + {
> + /* If we aborted the search put sofar found insn on the set of
> + disallowed insns so that further searches reaching them also
> + abort and thus we abort the whole but yet undiscovered chain. */
> + bitmap_ior_into (disallowed, insns);
> + if (dump_file)
> + fprintf (dump_file, "Aborted chain #%d discovery\n", chain_id);
> + BITMAP_FREE (queue);
> + return false;
> + }
> }
>
> if (dump_file)
> @@ -478,6 +508,8 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid)
> }
>
> BITMAP_FREE (queue);
> +
> + return true;
> }
>
> /* Return a cost of building a vector costant
> @@ -2282,6 +2314,7 @@ convert_scalars_to_vector (bool timode_p)
>
> for (unsigned i = 0; i <= 2; ++i)
> {
> + auto_bitmap disallowed;
> bitmap_tree_view (&candidates[i]);
> while (!bitmap_empty_p (&candidates[i]))
> {
> @@ -2296,14 +2329,14 @@ convert_scalars_to_vector (bool timode_p)
> /* Find instructions chain we want to convert to vector mode.
> Check all uses and definitions to estimate all required
> conversions. */
> - chain->build (&candidates[i], uid);
> -
> - if (chain->compute_convert_gain () > 0)
> - converted_insns += chain->convert ();
> - else
> - if (dump_file)
> - fprintf (dump_file, "Chain #%d conversion is not profitable\n",
> - chain->chain_id);
> + if (chain->build (&candidates[i], uid, disallowed))
> + {
> + if (chain->compute_convert_gain () > 0)
> + converted_insns += chain->convert ();
> + else if (dump_file)
> + fprintf (dump_file, "Chain #%d conversion is not profitable\n",
> + chain->chain_id);
> + }
>
> delete chain;
> }
> diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
> index 00c2c5e8c2d..72a9f54b4e2 100644
> --- a/gcc/config/i386/i386-features.h
> +++ b/gcc/config/i386/i386-features.h
> @@ -148,12 +148,15 @@ class scalar_chain
> /* Registers used in both vector and sclar modes. */
> bitmap defs_conv;
>
> + /* Limit on chain discovery. */
> + unsigned max_visits;
> +
> bitmap insns_conv;
> hash_map<rtx, rtx> defs_map;
> unsigned n_sse_to_integer;
> unsigned n_integer_to_sse;
>
> - void build (bitmap candidates, unsigned insn_uid);
> + bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed);
> virtual int compute_convert_gain () = 0;
> int convert ();
>
> @@ -168,8 +171,9 @@ class scalar_chain
> void convert_registers ();
>
> private:
> - void add_insn (bitmap candidates, unsigned insn_uid);
> - void analyze_register_chain (bitmap candidates, df_ref ref);
> + bool add_insn (bitmap candidates, unsigned insn_uid, bitmap disallowed);
> + bool analyze_register_chain (bitmap candidates, df_ref ref,
> + bitmap disallowed);
> virtual void convert_insn (rtx_insn *insn) = 0;
> virtual void convert_op (rtx *op, rtx_insn *insn) = 0;
> };
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 7d57f617d65..94fdd639ff1 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -599,6 +599,10 @@ Target Mask(STV) Save
> Disable Scalar to Vector optimization pass transforming 64-bit integer
> computations into a vector ones.
>
> +-param=x86-stv-max-visits=
> +Target Joined UInteger Var(x86_stv_max_visits) Init(10000) IntegerRange(1, 1000000) Param
> +The maximum number of use and def visits when discovering a STV chain before the discovery is aborted.
> +
> mdispatch-scheduler
> Target RejectNegative Var(flag_dispatch_scheduler)
> Do dispatch scheduling if processor is bdver1, bdver2, bdver3, bdver4
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 0045661cc5d..2da68802356 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -16229,6 +16229,10 @@ The following choices of @var{name} are available on i386 and x86_64 targets:
> @item x86-stlf-window-ninsns
> Instructions number above which STFL stall penalty can be compensated.
>
> +@item x86-stv-max-visits
> +The maximum number of use and def visits when discovering a STV chain before
> +the discovery is aborted.
> +
> @end table
>
> @end table
> --
> 2.35.3
^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH] target/108738 - limit STV chain discovery
@ 2023-03-02 13:28 Richard Biener
0 siblings, 0 replies; 2+ messages in thread
From: Richard Biener @ 2023-03-02 13:28 UTC (permalink / raw)
To: gcc-patches; +Cc: Jan Hubicka, ubizjak
The following puts a hard limit on the inherently quadratic STV chain
discovery. Without a limit for the compiler.i testcase in PR26854
we see at -O2
machine dep reorg : 574.45 ( 53%)
with release checking while with the proposed limit it's
machine dep reorg : 2.86 ( 1%)
Bootstrapped and tested on x86_64-unknown-linux-gnu.
OK?
Thanks,
Richard.
PR target/108738
* config/i386/i386.opt (--param x86-stv-max-visits): New param.
* doc/invoke.texi (--param x86-stv-max-visits): Document it.
* config/i386/i386-features.h (scalar_chain::max_visits): New.
(scalar_chain::build): Add bitmap parameter, return boolean.
(scalar_chain::add_insn): Likewise.
(scalar_chain::analyze_register_chain): Likewise.
* config/i386/i386-features.cc (scalar_chain::scalar_chain):
Initialize max_visits.
(scalar_chain::analyze_register_chain): When we exhaust
max_visits, abort. Also abort when running into any
disallowed insn.
(scalar_chain::add_insn): Propagate abort.
(scalar_chain::build): Likewise. When aborting amend
the set of disallowed insn with the insns set.
(convert_scalars_to_vector): Adjust. Do not convert aborted
chains.
---
gcc/config/i386/i386-features.cc | 77 +++++++++++++++++++++++---------
gcc/config/i386/i386-features.h | 10 +++--
gcc/config/i386/i386.opt | 4 ++
gcc/doc/invoke.texi | 4 ++
4 files changed, 70 insertions(+), 25 deletions(-)
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index eff91301009..c09abf8fc20 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -296,6 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
n_sse_to_integer = 0;
n_integer_to_sse = 0;
+
+ max_visits = x86_stv_max_visits;
}
/* Free chain's data. */
@@ -354,10 +356,12 @@ scalar_chain::mark_dual_mode_def (df_ref def)
}
/* Check REF's chain to add new insns into a queue
- and find registers requiring conversion. */
+ and find registers requiring conversion. Return true if OK, false
+ if the analysis was aborted. */
-void
-scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
+bool
+scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
+ bitmap disallowed)
{
df_link *chain;
bool mark_def = false;
@@ -371,6 +375,9 @@ scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
continue;
+ if (--max_visits == 0)
+ return false;
+
if (!DF_REF_REG_MEM_P (chain->ref))
{
if (bitmap_bit_p (insns, uid))
@@ -381,6 +388,10 @@ scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
add_to_queue (uid);
continue;
}
+
+ /* If we run into parts of an aborted chain discovery abort. */
+ if (bitmap_bit_p (disallowed, uid))
+ return false;
}
if (DF_REF_REG_DEF_P (chain->ref))
@@ -401,15 +412,19 @@ scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
if (mark_def)
mark_dual_mode_def (ref);
+
+ return true;
}
-/* Add instruction into a chain. */
+/* Add instruction into a chain. Return true if OK, false if the search
+ was aborted. */
-void
-scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
+bool
+scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
+ bitmap disallowed)
{
if (!bitmap_set_bit (insns, insn_uid))
- return;
+ return true;
if (dump_file)
fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
@@ -426,22 +441,27 @@ scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
df_ref ref;
for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
if (!HARD_REGISTER_P (DF_REF_REG (ref)))
- analyze_register_chain (candidates, ref);
+ if (!analyze_register_chain (candidates, ref, disallowed))
+ return false;
/* The operand(s) of VEC_SELECT don't need to be converted/convertible. */
if (def_set && GET_CODE (SET_SRC (def_set)) == VEC_SELECT)
- return;
+ return true;
for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
if (!DF_REF_REG_MEM_P (ref))
- analyze_register_chain (candidates, ref);
+ if (!analyze_register_chain (candidates, ref, disallowed))
+ return false;
+
+ return true;
}
/* Build new chain starting from insn INSN_UID recursively
- adding all dependent uses and definitions. */
+ adding all dependent uses and definitions. Return true if OK, false
+ if the chain discovery was aborted. */
-void
-scalar_chain::build (bitmap candidates, unsigned insn_uid)
+bool
+scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
{
queue = BITMAP_ALLOC (NULL);
bitmap_set_bit (queue, insn_uid);
@@ -454,7 +474,17 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid)
insn_uid = bitmap_first_set_bit (queue);
bitmap_clear_bit (queue, insn_uid);
bitmap_clear_bit (candidates, insn_uid);
- add_insn (candidates, insn_uid);
+ if (!add_insn (candidates, insn_uid, disallowed))
+ {
+ /* If we aborted the search put sofar found insn on the set of
+ disallowed insns so that further searches reaching them also
+ abort and thus we abort the whole but yet undiscovered chain. */
+ bitmap_ior_into (disallowed, insns);
+ if (dump_file)
+ fprintf (dump_file, "Aborted chain #%d discovery\n", chain_id);
+ BITMAP_FREE (queue);
+ return false;
+ }
}
if (dump_file)
@@ -478,6 +508,8 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid)
}
BITMAP_FREE (queue);
+
+ return true;
}
/* Return a cost of building a vector costant
@@ -2282,6 +2314,7 @@ convert_scalars_to_vector (bool timode_p)
for (unsigned i = 0; i <= 2; ++i)
{
+ auto_bitmap disallowed;
bitmap_tree_view (&candidates[i]);
while (!bitmap_empty_p (&candidates[i]))
{
@@ -2296,14 +2329,14 @@ convert_scalars_to_vector (bool timode_p)
/* Find instructions chain we want to convert to vector mode.
Check all uses and definitions to estimate all required
conversions. */
- chain->build (&candidates[i], uid);
-
- if (chain->compute_convert_gain () > 0)
- converted_insns += chain->convert ();
- else
- if (dump_file)
- fprintf (dump_file, "Chain #%d conversion is not profitable\n",
- chain->chain_id);
+ if (chain->build (&candidates[i], uid, disallowed))
+ {
+ if (chain->compute_convert_gain () > 0)
+ converted_insns += chain->convert ();
+ else if (dump_file)
+ fprintf (dump_file, "Chain #%d conversion is not profitable\n",
+ chain->chain_id);
+ }
delete chain;
}
diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
index 00c2c5e8c2d..72a9f54b4e2 100644
--- a/gcc/config/i386/i386-features.h
+++ b/gcc/config/i386/i386-features.h
@@ -148,12 +148,15 @@ class scalar_chain
/* Registers used in both vector and sclar modes. */
bitmap defs_conv;
+ /* Limit on chain discovery. */
+ unsigned max_visits;
+
bitmap insns_conv;
hash_map<rtx, rtx> defs_map;
unsigned n_sse_to_integer;
unsigned n_integer_to_sse;
- void build (bitmap candidates, unsigned insn_uid);
+ bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed);
virtual int compute_convert_gain () = 0;
int convert ();
@@ -168,8 +171,9 @@ class scalar_chain
void convert_registers ();
private:
- void add_insn (bitmap candidates, unsigned insn_uid);
- void analyze_register_chain (bitmap candidates, df_ref ref);
+ bool add_insn (bitmap candidates, unsigned insn_uid, bitmap disallowed);
+ bool analyze_register_chain (bitmap candidates, df_ref ref,
+ bitmap disallowed);
virtual void convert_insn (rtx_insn *insn) = 0;
virtual void convert_op (rtx *op, rtx_insn *insn) = 0;
};
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 7d57f617d65..94fdd639ff1 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -599,6 +599,10 @@ Target Mask(STV) Save
Disable Scalar to Vector optimization pass transforming 64-bit integer
computations into a vector ones.
+-param=x86-stv-max-visits=
+Target Joined UInteger Var(x86_stv_max_visits) Init(10000) IntegerRange(1, 1000000) Param
+The maximum number of use and def visits when discovering a STV chain before the discovery is aborted.
+
mdispatch-scheduler
Target RejectNegative Var(flag_dispatch_scheduler)
Do dispatch scheduling if processor is bdver1, bdver2, bdver3, bdver4
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0045661cc5d..2da68802356 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -16229,6 +16229,10 @@ The following choices of @var{name} are available on i386 and x86_64 targets:
@item x86-stlf-window-ninsns
Instructions number above which STFL stall penalty can be compensated.
+@item x86-stv-max-visits
+The maximum number of use and def visits when discovering a STV chain before
+the discovery is aborted.
+
@end table
@end table
--
2.35.3
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-03-03 7:03 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <6400a47b.5d0a0220.153aa.9f41SMTPIN_ADDED_MISSING@mx.google.com>
2023-03-03 7:03 ` [PATCH] target/108738 - limit STV chain discovery Uros Bizjak
2023-03-02 13:28 Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).