From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1666) id DDB8E385383B; Tue, 29 Jun 2021 12:35:45 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org DDB8E385383B MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Richard Biener To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-1868] Refactor SLP permute opt propagation X-Act-Checkin: gcc X-Git-Author: Richard Biener X-Git-Refname: refs/heads/master X-Git-Oldrev: 53fd7544aff6d0a18869017cb9bb921a7f5dcd04 X-Git-Newrev: 2dfc0f2203e875621f4aeb2e2496aaeb9a2dc05b Message-Id: <20210629123545.DDB8E385383B@sourceware.org> Date: Tue, 29 Jun 2021 12:35:45 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 29 Jun 2021 12:35:46 -0000 https://gcc.gnu.org/g:2dfc0f2203e875621f4aeb2e2496aaeb9a2dc05b commit r12-1868-g2dfc0f2203e875621f4aeb2e2496aaeb9a2dc05b Author: Richard Biener Date: Tue Jun 29 11:30:23 2021 +0200 Refactor SLP permute opt propagation This rewrites the SLP permute opt propagation to elide the visited bit for an incoming permute of -1 as well as allowing the initial propagation to take more than one iteration before starting on materialization. As we still lack propagation in the reverse direction I've added gcc.dg/vect/bb-slp-71.c and a stopgap to restrict "any" permute handling to the supported cases. 2021-06-29 Richard Biener * tree-vect-slp.c (slpg_vertex::visited): Remove. (vect_slp_perms_eq): Handle -1 permutes. (vect_optimize_slp): Rewrite permute propagation. * gcc.dg/vect/pr67790.c: Un-XFAIL. * gcc.dg/vect/bb-slp-71.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/vect/bb-slp-71.c | 32 ++++++++ gcc/testsuite/gcc.dg/vect/pr67790.c | 2 +- gcc/tree-vect-slp.c | 140 +++++++++++++++++++++------------- 3 files changed, 120 insertions(+), 54 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-71.c b/gcc/testsuite/gcc.dg/vect/bb-slp-71.c new file mode 100644 index 00000000000..6816511cd0f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-71.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ + +#include "tree-vect.h" + +int a[4], b[4]; + +void __attribute__((noipa)) +foo(int x, int y) +{ + int tem0 = x + 1; + int tem1 = y + 2; + int tem2 = x + 3; + int tem3 = y + 4; + a[0] = tem0 + b[1]; + a[1] = tem1 + b[0]; + a[2] = tem2 + b[2]; + a[3] = tem3 + b[3]; +} + +int main() +{ + check_vect (); + + b[0] = 10; + b[1] = 14; + b[2] = 18; + b[3] = 22; + foo (-1, -3); + if (a[0] != 14 || a[1] != 9 || a[2] != 20 || a[3] != 23) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr67790.c b/gcc/testsuite/gcc.dg/vect/pr67790.c index 0555d41abf7..32eacd91fda 100644 --- a/gcc/testsuite/gcc.dg/vect/pr67790.c +++ b/gcc/testsuite/gcc.dg/vect/pr67790.c @@ -38,4 +38,4 @@ int main() } /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ -/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 63b6e6a24b9..524bfaa1c7f 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3470,12 +3470,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) struct slpg_vertex { slpg_vertex (slp_tree node_) - : node (node_), visited (0), perm_out (0), materialize (0) {} + : node (node_), perm_out (-1), materialize (0) {} int get_perm_in () const { return materialize ? materialize : perm_out; } slp_tree node; - unsigned visited : 1; /* The permutation on the outgoing lanes (towards SLP parents). */ int perm_out; /* The permutation that is applied by this node. perm_out is @@ -3567,7 +3566,8 @@ vect_slp_perms_eq (const vec > &perms, int perm_a, int perm_b) { return (perm_a == perm_b - || (perms[perm_a].length () == perms[perm_b].length () + || (perm_a != -1 && perm_b != -1 + && perms[perm_a].length () == perms[perm_b].length () && memcmp (&perms[perm_a][0], &perms[perm_b][0], sizeof (unsigned) * perms[perm_a].length ()) == 0)); } @@ -3614,7 +3614,7 @@ vect_optimize_slp (vec_info *vinfo) /* Leafs do not change across iterations. Note leafs also double as entries to the reverse graph. */ if (!slpg->vertices[idx].succ) - vertices[idx].visited = 1; + vertices[idx].perm_out = 0; /* Loads are the only thing generating permutes. */ if (!SLP_TREE_LOAD_PERMUTATION (node).exists ()) continue; @@ -3668,12 +3668,17 @@ vect_optimize_slp (vec_info *vinfo) /* Propagate permutes along the graph and compute materialization points. */ bool changed; + bool do_materialization = false; unsigned iteration = 0; do { changed = false; ++iteration; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "SLP optimize iteration %d\n", iteration); + for (i = vertices.length (); i > 0 ; --i) { int idx = ipo[i-1]; @@ -3685,19 +3690,21 @@ vect_optimize_slp (vec_info *vinfo) || SLP_TREE_DEF_TYPE (node) == vect_constant_def) continue; - vertices[idx].visited = 1; - /* We still eventually have failed backedge SLP nodes in the graph, those are only cancelled when analyzing operations. Simply treat them as transparent ops, propagating permutes through them. */ if (SLP_TREE_DEF_TYPE (node) == vect_internal_def) { - /* We do not handle stores with a permutation. */ + /* We do not handle stores with a permutation, so all + incoming permutes must have been materialized. */ stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (node); if (STMT_VINFO_DATA_REF (rep) && DR_IS_WRITE (STMT_VINFO_DATA_REF (rep))) - continue; + { + vertices[idx].perm_out = 0; + continue; + } /* We cannot move a permute across an operation that is not independent on lanes. Note this is an explicit negative list since that's much shorter than the respective @@ -3710,63 +3717,82 @@ vect_optimize_slp (vec_info *vinfo) case CFN_COMPLEX_MUL: case CFN_COMPLEX_MUL_CONJ: case CFN_VEC_ADDSUB: + vertices[idx].perm_out = 0; continue; default:; } } - int perm = -1; - for (graph_edge *succ = slpg->vertices[idx].succ; - succ; succ = succ->succ_next) - { - int succ_idx = succ->dest; - /* Handle unvisited nodes optimistically. */ - /* ??? But for constants once we want to handle non-bijective - permutes we have to verify the permute, when unifying lanes, - will not unify different constants. For example see - gcc.dg/vect/bb-slp-14.c for a case that would break. */ - if (!vertices[succ_idx].visited) - continue; - int succ_perm = vertices[succ_idx].perm_out; - if (perm == -1) - perm = succ_perm; - else if (succ_perm == 0) - { - perm = 0; - break; - } - else if (!vect_slp_perms_eq (perms, perm, succ_perm)) - { - perm = 0; - break; - } - } - - if (perm == -1) + int perm; + if (!slpg->vertices[idx].succ) /* Pick up pre-computed leaf values. */ perm = vertices[idx].perm_out; - else if (!vect_slp_perms_eq (perms, perm, - vertices[idx].get_perm_in ())) + else { - if (iteration > 1) - /* Make sure we eventually converge. */ - gcc_checking_assert (perm == 0); - if (perm == 0) + perm = -1; + bool all_constant = true; + for (graph_edge *succ = slpg->vertices[idx].succ; + succ; succ = succ->succ_next) { - vertices[idx].perm_out = 0; - vertices[idx].materialize = 0; + int succ_idx = succ->dest; + slp_tree succ_node = vertices[succ_idx].node; + if (SLP_TREE_DEF_TYPE (succ_node) != vect_external_def + && SLP_TREE_DEF_TYPE (succ_node) != vect_constant_def) + all_constant = false; + int succ_perm = vertices[succ_idx].perm_out; + /* Handle unvisited (and constant) nodes optimistically. */ + /* ??? But for constants once we want to handle + non-bijective permutes we have to verify the permute, + when unifying lanes, will not unify different constants. + For example see gcc.dg/vect/bb-slp-14.c for a case + that would break. */ + if (succ_perm == -1) + continue; + if (perm == -1) + perm = succ_perm; + else if (succ_perm == 0) + { + perm = 0; + break; + } + else if (!vect_slp_perms_eq (perms, perm, succ_perm)) + { + perm = 0; + break; + } + } + /* We still lack a forward propagation of materializations + and thus only allow "any" permutes on constant or external + nodes which we handle during materialization by looking + at SLP children. So avoid having internal "any" permutes + for now, see gcc.dg/vect/bb-slp-71.c for a testcase that + breaks when removing this restriction. */ + if (perm == -1 && all_constant) + perm = 0; + + if (!vect_slp_perms_eq (perms, perm, + vertices[idx].get_perm_in ())) + { + /* Make sure we eventually converge. */ + gcc_checking_assert (vertices[idx].get_perm_in () == -1 + || perm == 0); + if (perm == 0) + { + vertices[idx].perm_out = 0; + vertices[idx].materialize = 0; + } + if (!vertices[idx].materialize) + vertices[idx].perm_out = perm; + changed = true; } - if (!vertices[idx].materialize) - vertices[idx].perm_out = perm; - changed = true; } - if (perm == 0) + /* Elide pruning at materialization points in the first + iteration phase. */ + if (!do_materialization) continue; - /* Elide pruning at materialization points in the first - iteration so every node was visited once at least. */ - if (iteration == 1) + if (perm == 0 || perm == -1) continue; /* Decide on permute materialization. Look whether there's @@ -3784,8 +3810,8 @@ vect_optimize_slp (vec_info *vinfo) for (graph_edge *pred = slpg->vertices[idx].pred; pred; pred = pred->pred_next) { - gcc_checking_assert (vertices[pred->src].visited); int pred_perm = vertices[pred->src].get_perm_in (); + gcc_checking_assert (pred_perm != -1); if (!vect_slp_perms_eq (perms, perm, pred_perm)) { all_preds_permuted = false; @@ -3800,8 +3826,16 @@ vect_optimize_slp (vec_info *vinfo) vertices[idx].perm_out = 0; } } + + /* If the initial propagation converged, switch on materialization + and re-propagate. */ + if (!changed && !do_materialization) + { + do_materialization = true; + changed = true; + } } - while (changed || iteration == 1); + while (changed); /* Materialize. */ for (i = 0; i < vertices.length (); ++i)