public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-3222] tree-optimization/102128 - rework if-converted BB vect heuristic
@ 2021-08-30 12:04 Richard Biener
0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2021-08-30 12:04 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:89f33f44addbf9853bc3e6677db1fa941713cb6c
commit r12-3222-g89f33f44addbf9853bc3e6677db1fa941713cb6c
Author: Richard Biener <rguenther@suse.de>
Date: Mon Aug 30 12:56:26 2021 +0200
tree-optimization/102128 - rework if-converted BB vect heuristic
This reworks the previous attempt to avoid leaving around if-converted
scalar code in BB vectorized loop bodies to keep costing independent
subgraphs which should address the observed regression with 519.lbm_r.
For this to work we now first cost all subgraphs and only after
doing that proceed to emit vectorized code.
2021-08-30 Richard Biener <rguenther@suse.de>
PR tree-optimization/102128
* tree-vect-slp.c (vect_bb_vectorization_profitable_p):
Move scanning for if-converted scalar code to the caller
and instead delay clearing the visited flag for profitable
subgraphs.
(vect_slp_region): Cost all subgraphs before scheduling.
For if-converted BB vectorization scan for scalar COND_EXPRs
and do not vectorize if any found and the cost model is
very-cheap.
Diff:
---
gcc/tree-vect-slp.c | 112 +++++++++++++++++++++++++++-------------------------
1 file changed, 58 insertions(+), 54 deletions(-)
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 4d688c7a267..4ca24408249 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -5275,34 +5275,6 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
vector_costs.safe_splice (instance->cost_vec);
instance->cost_vec.release ();
}
- /* When we're vectorizing an if-converted loop body with the
- very-cheap cost model make sure we vectorized all if-converted
- code. */
- bool force_not_profitable = false;
- if (orig_loop && flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP)
- {
- gcc_assert (bb_vinfo->bbs.length () == 1);
- for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]);
- !gsi_end_p (gsi); gsi_next (&gsi))
- {
- /* The costing above left us with DCEable vectorized scalar
- stmts having the visited flag set. */
- if (gimple_visited_p (gsi_stmt (gsi)))
- continue;
-
- if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)))
- if (gimple_assign_rhs_code (ass) == COND_EXPR)
- {
- force_not_profitable = true;
- break;
- }
- }
- }
-
- /* Unset visited flag. */
- stmt_info_for_cost *cost;
- FOR_EACH_VEC_ELT (scalar_costs, i, cost)
- gimple_set_visited (cost->stmt_info->stmt, false);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
@@ -5319,6 +5291,7 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
li_scalar_costs (scalar_costs.length ());
auto_vec<std::pair<unsigned, stmt_info_for_cost *> >
li_vector_costs (vector_costs.length ());
+ stmt_info_for_cost *cost;
FOR_EACH_VEC_ELT (scalar_costs, i, cost)
{
unsigned l = gimple_bb (cost->stmt_info->stmt)->loop_father->num;
@@ -5341,6 +5314,7 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
/* Now cost the portions individually. */
unsigned vi = 0;
unsigned si = 0;
+ bool profitable = true;
while (si < li_scalar_costs.length ()
&& vi < li_vector_costs.length ())
{
@@ -5407,30 +5381,29 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
example). */
if (vec_outside_cost + vec_inside_cost > scalar_cost)
{
- scalar_costs.release ();
- vector_costs.release ();
- return false;
+ profitable = false;
+ break;
}
}
- if (vi < li_vector_costs.length ())
+ if (profitable && vi < li_vector_costs.length ())
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"Excess vector cost for part in loop %d:\n",
li_vector_costs[vi].first);
- scalar_costs.release ();
- vector_costs.release ();
- return false;
+ profitable = false;
}
- if (dump_enabled_p () && force_not_profitable)
- dump_printf_loc (MSG_NOTE, vect_location,
- "not profitable because of unprofitable if-converted "
- "scalar code\n");
+ /* Unset visited flag. This is delayed when the subgraph is profitable
+ and we process the loop for remaining unvectorized if-converted code. */
+ if (orig_loop && !profitable)
+ FOR_EACH_VEC_ELT (scalar_costs, i, cost)
+ gimple_set_visited (cost->stmt_info->stmt, false);
scalar_costs.release ();
vector_costs.release ();
- return !force_not_profitable;
+
+ return profitable;
}
/* qsort comparator for lane defs. */
@@ -5884,9 +5857,8 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
bb_vinfo->shared->check_datarefs ();
- unsigned i;
- slp_instance instance;
- FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
+ auto_vec<slp_instance> profitable_subgraphs;
+ for (slp_instance instance : BB_VINFO_SLP_INSTANCES (bb_vinfo))
{
if (instance->subgraph_entries.is_empty ())
continue;
@@ -5894,9 +5866,7 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
vect_location = instance->location ();
if (!unlimited_cost_model (NULL)
&& !vect_bb_vectorization_profitable_p
- (bb_vinfo,
- orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo)
- : instance->subgraph_entries, orig_loop))
+ (bb_vinfo, instance->subgraph_entries, orig_loop))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5908,15 +5878,54 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
if (!dbg_cnt (vect_slp))
continue;
+ profitable_subgraphs.safe_push (instance);
+ }
+
+ /* When we're vectorizing an if-converted loop body with the
+ very-cheap cost model make sure we vectorized all if-converted
+ code. */
+ if (!profitable_subgraphs.is_empty ()
+ && orig_loop)
+ {
+ gcc_assert (bb_vinfo->bbs.length () == 1);
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ /* The costing above left us with DCEable vectorized scalar
+ stmts having the visited flag set on profitable
+ subgraphs. Do the delayed clearing of the flag here. */
+ if (gimple_visited_p (gsi_stmt (gsi)))
+ {
+ gimple_set_visited (gsi_stmt (gsi), false);
+ continue;
+ }
+ if (flag_vect_cost_model != VECT_COST_MODEL_VERY_CHEAP)
+ continue;
+
+ if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)))
+ if (gimple_assign_rhs_code (ass) == COND_EXPR)
+ {
+ if (!profitable_subgraphs.is_empty ()
+ && dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "not profitable because of "
+ "unprofitable if-converted scalar "
+ "code\n");
+ profitable_subgraphs.truncate (0);
+ }
+ }
+ }
+
+ /* Finally schedule the profitable subgraphs. */
+ for (slp_instance instance : profitable_subgraphs)
+ {
if (!vectorized && dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"Basic block will be vectorized "
"using SLP\n");
vectorized = true;
- vect_schedule_slp (bb_vinfo,
- orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo)
- : instance->subgraph_entries);
+ vect_schedule_slp (bb_vinfo, instance->subgraph_entries);
unsigned HOST_WIDE_INT bytes;
if (dump_enabled_p ())
@@ -5931,11 +5940,6 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
"basic block part vectorized using "
"variable length vectors\n");
}
-
- /* When we're called from loop vectorization we're considering
- all subgraphs at once. */
- if (orig_loop)
- break;
}
}
else
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-08-30 12:04 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-30 12:04 [gcc r12-3222] tree-optimization/102128 - rework if-converted BB vect heuristic Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).