public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-1868] Refactor SLP permute opt propagation
@ 2021-06-29 12:35 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2021-06-29 12:35 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2dfc0f2203e875621f4aeb2e2496aaeb9a2dc05b

commit r12-1868-g2dfc0f2203e875621f4aeb2e2496aaeb9a2dc05b
Author: Richard Biener <rguenther@suse.de>
Date:   Tue Jun 29 11:30:23 2021 +0200

    Refactor SLP permute opt propagation
    
    This rewrites the SLP permute opt propagation to elide the visited
    bit for an incoming permute of -1 as well as allowing the initial
    propagation to take more than one iteration before starting on
    materialization.  As we still lack propagation in the reverse
    direction I've added gcc.dg/vect/bb-slp-71.c and a stopgap to
    restrict "any" permute handling to the supported cases.
    
    2021-06-29  Richard Biener  <rguenther@suse.de>
    
            * tree-vect-slp.c (slpg_vertex::visited): Remove.
            (vect_slp_perms_eq): Handle -1 permutes.
            (vect_optimize_slp): Rewrite permute propagation.
    
            * gcc.dg/vect/pr67790.c: Un-XFAIL.
            * gcc.dg/vect/bb-slp-71.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/bb-slp-71.c |  32 ++++++++
 gcc/testsuite/gcc.dg/vect/pr67790.c   |   2 +-
 gcc/tree-vect-slp.c                   | 140 +++++++++++++++++++++-------------
 3 files changed, 120 insertions(+), 54 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-71.c b/gcc/testsuite/gcc.dg/vect/bb-slp-71.c
new file mode 100644
index 00000000000..6816511cd0f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-71.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+
+#include "tree-vect.h"
+
+int a[4], b[4];
+
+void __attribute__((noipa))
+foo(int x, int y)
+{
+  int tem0 = x + 1;
+  int tem1 = y + 2;
+  int tem2 = x + 3;
+  int tem3 = y + 4;
+  a[0] = tem0 + b[1];
+  a[1] = tem1 + b[0];
+  a[2] = tem2 + b[2];
+  a[3] = tem3 + b[3];
+}
+
+int main()
+{
+  check_vect ();
+
+  b[0] = 10;
+  b[1] = 14;
+  b[2] = 18;
+  b[3] = 22;
+  foo (-1, -3);
+  if (a[0] != 14 || a[1] != 9 || a[2] != 20 || a[3] != 23)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr67790.c b/gcc/testsuite/gcc.dg/vect/pr67790.c
index 0555d41abf7..32eacd91fda 100644
--- a/gcc/testsuite/gcc.dg/vect/pr67790.c
+++ b/gcc/testsuite/gcc.dg/vect/pr67790.c
@@ -38,4 +38,4 @@ int main()
 }
 
 /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
-/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 63b6e6a24b9..524bfaa1c7f 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3470,12 +3470,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
 struct slpg_vertex
 {
   slpg_vertex (slp_tree node_)
-    : node (node_), visited (0), perm_out (0), materialize (0) {}
+    : node (node_), perm_out (-1), materialize (0) {}
 
   int get_perm_in () const { return materialize ? materialize : perm_out; }
 
   slp_tree node;
-  unsigned visited : 1;
   /* The permutation on the outgoing lanes (towards SLP parents).  */
   int perm_out;
   /* The permutation that is applied by this node.  perm_out is
@@ -3567,7 +3566,8 @@ vect_slp_perms_eq (const vec<vec<unsigned> > &perms,
 		   int perm_a, int perm_b)
 {
   return (perm_a == perm_b
-	  || (perms[perm_a].length () == perms[perm_b].length ()
+	  || (perm_a != -1 && perm_b != -1
+	      && perms[perm_a].length () == perms[perm_b].length ()
 	      && memcmp (&perms[perm_a][0], &perms[perm_b][0],
 			 sizeof (unsigned) * perms[perm_a].length ()) == 0));
 }
@@ -3614,7 +3614,7 @@ vect_optimize_slp (vec_info *vinfo)
       /* Leafs do not change across iterations.  Note leafs also double
 	 as entries to the reverse graph.  */
       if (!slpg->vertices[idx].succ)
-	vertices[idx].visited = 1;
+	vertices[idx].perm_out = 0;
       /* Loads are the only thing generating permutes.  */
       if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
 	continue;
@@ -3668,12 +3668,17 @@ vect_optimize_slp (vec_info *vinfo)
 
   /* Propagate permutes along the graph and compute materialization points.  */
   bool changed;
+  bool do_materialization = false;
   unsigned iteration = 0;
   do
     {
       changed = false;
       ++iteration;
 
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "SLP optimize iteration %d\n", iteration);
+
       for (i = vertices.length (); i > 0 ; --i)
 	{
 	  int idx = ipo[i-1];
@@ -3685,19 +3690,21 @@ vect_optimize_slp (vec_info *vinfo)
 	      || SLP_TREE_DEF_TYPE (node) == vect_constant_def)
 	    continue;
 
-	  vertices[idx].visited = 1;
-
 	  /* We still eventually have failed backedge SLP nodes in the
 	     graph, those are only cancelled when analyzing operations.
 	     Simply treat them as transparent ops, propagating permutes
 	     through them.  */
 	  if (SLP_TREE_DEF_TYPE (node) == vect_internal_def)
 	    {
-	      /* We do not handle stores with a permutation.  */
+	      /* We do not handle stores with a permutation, so all
+		 incoming permutes must have been materialized.  */
 	      stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (node);
 	      if (STMT_VINFO_DATA_REF (rep)
 		  && DR_IS_WRITE (STMT_VINFO_DATA_REF (rep)))
-		continue;
+		{
+		  vertices[idx].perm_out = 0;
+		  continue;
+		}
 	      /* We cannot move a permute across an operation that is
 		 not independent on lanes.  Note this is an explicit
 		 negative list since that's much shorter than the respective
@@ -3710,63 +3717,82 @@ vect_optimize_slp (vec_info *vinfo)
 		  case CFN_COMPLEX_MUL:
 		  case CFN_COMPLEX_MUL_CONJ:
 		  case CFN_VEC_ADDSUB:
+		    vertices[idx].perm_out = 0;
 		    continue;
 		  default:;
 		  }
 	    }
 
-	  int perm = -1;
-	  for (graph_edge *succ = slpg->vertices[idx].succ;
-	       succ; succ = succ->succ_next)
-	    {
-	      int succ_idx = succ->dest;
-	      /* Handle unvisited nodes optimistically.  */
-	      /* ???  But for constants once we want to handle non-bijective
-		 permutes we have to verify the permute, when unifying lanes,
-		 will not unify different constants.  For example see
-		 gcc.dg/vect/bb-slp-14.c for a case that would break.  */
-	      if (!vertices[succ_idx].visited)
-		continue;
-	      int succ_perm = vertices[succ_idx].perm_out;
-	      if (perm == -1)
-		perm = succ_perm;
-	      else if (succ_perm == 0)
-		{
-		  perm = 0;
-		  break;
-		}
-	      else if (!vect_slp_perms_eq (perms, perm, succ_perm))
-		{
-		  perm = 0;
-		  break;
-		}
-	    }
-
-	  if (perm == -1)
+	  int perm;
+	  if (!slpg->vertices[idx].succ)
 	    /* Pick up pre-computed leaf values.  */
 	    perm = vertices[idx].perm_out;
-	  else if (!vect_slp_perms_eq (perms, perm,
-				       vertices[idx].get_perm_in ()))
+	  else
 	    {
-	      if (iteration > 1)
-		/* Make sure we eventually converge.  */
-		gcc_checking_assert (perm == 0);
-	      if (perm == 0)
+	      perm = -1;
+	      bool all_constant = true;
+	      for (graph_edge *succ = slpg->vertices[idx].succ;
+		   succ; succ = succ->succ_next)
 		{
-		  vertices[idx].perm_out = 0;
-		  vertices[idx].materialize = 0;
+		  int succ_idx = succ->dest;
+		  slp_tree succ_node = vertices[succ_idx].node;
+		  if (SLP_TREE_DEF_TYPE (succ_node) != vect_external_def
+		      && SLP_TREE_DEF_TYPE (succ_node) != vect_constant_def)
+		    all_constant = false;
+		  int succ_perm = vertices[succ_idx].perm_out;
+		  /* Handle unvisited (and constant) nodes optimistically.  */
+		  /* ???  But for constants once we want to handle
+		     non-bijective permutes we have to verify the permute,
+		     when unifying lanes, will not unify different constants.
+		     For example see gcc.dg/vect/bb-slp-14.c for a case
+		     that would break.  */
+		  if (succ_perm == -1)
+		    continue;
+		  if (perm == -1)
+		    perm = succ_perm;
+		  else if (succ_perm == 0)
+		    {
+		      perm = 0;
+		      break;
+		    }
+		  else if (!vect_slp_perms_eq (perms, perm, succ_perm))
+		    {
+		      perm = 0;
+		      break;
+		    }
+		}
+	      /* We still lack a forward propagation of materializations
+		 and thus only allow "any" permutes on constant or external
+		 nodes which we handle during materialization by looking
+		 at SLP children.  So avoid having internal "any" permutes
+		 for now, see gcc.dg/vect/bb-slp-71.c for a testcase that
+		 breaks when removing this restriction.  */
+	      if (perm == -1 && all_constant)
+		perm = 0;
+
+	      if (!vect_slp_perms_eq (perms, perm,
+				      vertices[idx].get_perm_in ()))
+		{
+		  /* Make sure we eventually converge.  */
+		  gcc_checking_assert (vertices[idx].get_perm_in () == -1
+				       || perm == 0);
+		  if (perm == 0)
+		    {
+		      vertices[idx].perm_out = 0;
+		      vertices[idx].materialize = 0;
+		    }
+		  if (!vertices[idx].materialize)
+		    vertices[idx].perm_out = perm;
+		  changed = true;
 		}
-	      if (!vertices[idx].materialize)
-		vertices[idx].perm_out = perm;
-	      changed = true;
 	    }
 
-	  if (perm == 0)
+	  /* Elide pruning at materialization points in the first
+	     iteration phase.  */
+	  if (!do_materialization)
 	    continue;
 
-	  /* Elide pruning at materialization points in the first
-	     iteration so every node was visited once at least.  */
-	  if (iteration == 1)
+	  if (perm == 0 || perm == -1)
 	    continue;
 
 	  /* Decide on permute materialization.  Look whether there's
@@ -3784,8 +3810,8 @@ vect_optimize_slp (vec_info *vinfo)
 	    for (graph_edge *pred = slpg->vertices[idx].pred;
 		 pred; pred = pred->pred_next)
 	      {
-		gcc_checking_assert (vertices[pred->src].visited);
 		int pred_perm = vertices[pred->src].get_perm_in ();
+		gcc_checking_assert (pred_perm != -1);
 		if (!vect_slp_perms_eq (perms, perm, pred_perm))
 		  {
 		    all_preds_permuted = false;
@@ -3800,8 +3826,16 @@ vect_optimize_slp (vec_info *vinfo)
 	      vertices[idx].perm_out = 0;
 	    }
 	}
+
+      /* If the initial propagation converged, switch on materialization
+	 and re-propagate.  */
+      if (!changed && !do_materialization)
+	{
+	  do_materialization = true;
+	  changed = true;
+	}
     }
-  while (changed || iteration == 1);
+  while (changed);
 
   /* Materialize.  */
   for (i = 0; i < vertices.length (); ++i)


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-06-29 12:35 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-29 12:35 [gcc r12-1868] Refactor SLP permute opt propagation Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).