* [PATCH] tree-optimization/97678 - fix SLP induction epilogue vectorization
@ 2020-11-03 10:13 Richard Biener
0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2020-11-03 10:13 UTC (permalink / raw)
To: gcc-patches
This restores not tracking SLP nodes for induction initial values
in not nested context because this interferes with peeling and
epilogue vectorization.
Boostrapped and tested on x86_64-unknown-linux-gnu, pushed.
2020-11-03 Richard Biener <rguenther@suse.de>
PR tree-optimization/97678
* tree-vect-slp.c (vect_build_slp_tree_2): Do not track
the initial values of inductions when not nested.
* tree-vect-loop.c (vectorizable_induction): Look at
PHI node initial values again for SLP and not nested
inductions. Handle LOOP_VINFO_MASK_SKIP_NITERS and cost
invariants.
* gcc.dg/vect/pr97678.c: New testcase.
---
gcc/testsuite/gcc.dg/vect/pr97678.c | 29 +++++++++++++++++
gcc/tree-vect-loop.c | 49 ++++++++++++++++++++++++++---
gcc/tree-vect-slp.c | 8 +++--
3 files changed, 79 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr97678.c
diff --git a/gcc/testsuite/gcc.dg/vect/pr97678.c b/gcc/testsuite/gcc.dg/vect/pr97678.c
new file mode 100644
index 00000000000..ebe4a35bb3f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr97678.c
@@ -0,0 +1,29 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
+
+#include "tree-vect.h"
+
+int
+main ()
+{
+ unsigned int i = 0;
+ unsigned short b[158 * 2];
+
+ check_vect ();
+
+ for (i = 0; i < 158; i++)
+ {
+ b[i * 2] = i * 7;
+ b[i * 2 + 1] = i * 8;
+ }
+
+ for (i = 0; i < 158; ++i)
+ if (b[i*2] != (unsigned short)(i*7)
+ || b[i*2+1] != (unsigned short)(i*8))
+ abort ();
+
+ return 0;
+}
+
+/* The init loop should be vectorized with SLP. */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index fcea28935bc..6fa185daa28 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -7800,6 +7800,10 @@ vectorizable_induction (loop_vec_info loop_vinfo,
= record_stmt_cost (cost_vec,
SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
vector_stmt, stmt_info, 0, vect_body);
+ /* prologue cost for vec_init (if not nested) and step. */
+ prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop,
+ scalar_to_vec,
+ stmt_info, 0, vect_prologue);
}
else /* if (!slp_node) */
{
@@ -7858,9 +7862,15 @@ vectorizable_induction (loop_vec_info loop_vinfo,
cycles we have to reconstruct the step from SCEV data. */
unsigned group_size = SLP_TREE_LANES (slp_node);
tree *steps = XALLOCAVEC (tree, group_size);
+ tree *inits = XALLOCAVEC (tree, group_size);
stmt_vec_info phi_info;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
- steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+ {
+ steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
+ if (!init_node)
+ inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt),
+ pe->dest_idx);
+ }
/* Now generate the IVs. */
unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
@@ -7875,16 +7885,39 @@ vectorizable_induction (loop_vec_info loop_vinfo,
? build_real_from_wide (stept, lup_mul,
UNSIGNED)
: build_int_cstu (stept, lup_mul));
+ tree peel_mul = NULL_TREE;
+ if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
+ {
+ stmts = NULL;
+ if (SCALAR_FLOAT_TYPE_P (stept))
+ peel_mul = gimple_build (&stmts, FLOAT_EXPR, stept,
+ LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ else
+ peel_mul = gimple_convert (&stmts, stept,
+ LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ peel_mul = gimple_build_vector_from_val (&stmts, step_vectype, peel_mul);
+ if (stmts)
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+ gcc_assert (!new_bb);
+ }
+ }
unsigned ivn;
auto_vec<tree> vec_steps;
for (ivn = 0; ivn < nivs; ++ivn)
{
- tree_vector_builder elts (step_vectype, const_nunits, 1);
+ tree_vector_builder step_elts (step_vectype, const_nunits, 1);
+ tree_vector_builder init_elts (vectype, const_nunits, 1);
tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
{
tree elt = steps[(ivn*const_nunits + eltn) % group_size];
- elts.quick_push (elt);
+ step_elts.quick_push (elt);
+ if (!init_node)
+ {
+ elt = inits[(ivn*const_nunits + eltn) % group_size];
+ init_elts.quick_push (elt);
+ }
unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
? build_real_from_wide (stept,
@@ -7892,10 +7925,15 @@ vectorizable_induction (loop_vec_info loop_vinfo,
: build_int_cstu (stept, mul_elt));
}
stmts = NULL;
- vec_step = gimple_build_vector (&stmts, &elts);
+ vec_step = gimple_build_vector (&stmts, &step_elts);
vec_step = gimple_convert (&stmts, step_vectype, vec_step);
vec_steps.safe_push (vec_step);
tree step_mul = gimple_build_vector (&stmts, &mul_elts);
+ if (peel_mul)
+ gimple_build (&stmts, PLUS_EXPR, step_vectype,
+ step_mul, peel_mul);
+ if (!init_node)
+ vec_init = gimple_build_vector (&stmts, &init_elts);
if (stmts)
{
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
@@ -7926,7 +7964,8 @@ vectorizable_induction (loop_vec_info loop_vinfo,
add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
UNKNOWN_LOCATION);
- vec_init = vect_get_slp_vect_def (init_node, ivn);
+ if (init_node)
+ vec_init = vect_get_slp_vect_def (init_node, ivn);
if (!integer_zerop (step_mul))
{
stmts = NULL;
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 63a59c0c8ed..e97fbe897a7 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1444,9 +1444,13 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
if (def_type == vect_induction_def)
{
/* Induction PHIs are not cycles but walk the initial
- value. */
+ value. Only for inner loops through, for outer loops
+ we need to pick up the value from the actual PHIs
+ to more easily support peeling and epilogue vectorization. */
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- if (nested_in_vect_loop_p (loop, stmt_info))
+ if (!nested_in_vect_loop_p (loop, stmt_info))
+ skip_args[loop_preheader_edge (loop)->dest_idx] = true;
+ else
loop = loop->inner;
skip_args[loop_latch_edge (loop)->dest_idx] = true;
}
--
2.26.2
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2020-11-03 10:13 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-03 10:13 [PATCH] tree-optimization/97678 - fix SLP induction epilogue vectorization Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).