public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/aldyh/heads/ranger-staging)] emit SLP vectorized loads earlier
@ 2020-06-27 21:27 Aldy Hernandez
  0 siblings, 0 replies; only message in thread
From: Aldy Hernandez @ 2020-06-27 21:27 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:6924b5e6bd3c89e229c52eafb1353bcbe17ab405

commit 6924b5e6bd3c89e229c52eafb1353bcbe17ab405
Author: Richard Biener <rguenther@suse.de>
Date:   Tue Jun 23 14:47:47 2020 +0200

    emit SLP vectorized loads earlier
    
    This makes sure to emit SLP vectorized loads where the first scalar
    load is.  This makes SLP dependence checking more powerful because
    hoisting loads can use TBAA and it increases the freedom for
    vector placement when there are constraints from live lanes.
    
    Vectorized shifts block inserting vectorized stmts always after
    vectorized defs because it ends up using the original scalar
    operand even when the SLP graph indicates the shift operand
    is vectorized (and we actually emit and cost those stmts).
    
    vect_slp_analyze_and_verify_node_alignment shows we need alignment
    for too many places, this is a temporary solution and my plan
    is to have a single meta-info for a dataref group instead
    (also getting rid of DR_GROUP_FIRST/NEXT_ELEMENT).
    
    2020-06-24  Richard Biener  <rguenther@suse.de>
    
            * tree-vectorizer.h (vect_find_first_scalar_stmt_in_slp):
            Declare.
            * tree-vect-data-refs.c (vect_preserves_scalar_order_p):
            Simplify for new position of vectorized SLP loads.
            (vect_slp_analyze_node_dependences): Adjust for it.
            (vect_slp_analyze_and_verify_node_alignment): Compute alignment
            for the first stmts dataref.
            * tree-vect-slp.c (vect_find_first_scalar_stmt_in_slp): New.
            (vect_schedule_slp_instance): Emit loads before the
            first scalar stmt.
            * tree-vect-stmts.c (vectorizable_load): Do what the comment
            says and use vect_find_first_scalar_stmt_in_slp.

Diff:
---
 gcc/tree-vect-data-refs.c | 267 ++++++++++++++++++++++++++++------------------
 gcc/tree-vect-slp.c       |  51 +++++++--
 gcc/tree-vect-stmts.c     |   3 +-
 gcc/tree-vectorizer.h     |   1 +
 4 files changed, 213 insertions(+), 109 deletions(-)

diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 2365a3925bb..eb8288e7a85 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -232,61 +232,43 @@ vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
       && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
     return true;
 
-  /* STMT_A and STMT_B belong to overlapping groups.  All loads in a
-     SLP group are emitted at the position of the last scalar load and
-     all loads in an interleaving group are emitted at the position
-     of the first scalar load.
+  /* STMT_A and STMT_B belong to overlapping groups.  All loads are
+     emitted at the position of the first scalar load.
      Stores in a group are emitted at the position of the last scalar store.
      Compute that position and check whether the resulting order matches
-     the current one.
-     We have not yet decided between SLP and interleaving so we have
-     to conservatively assume both.  */
-  stmt_vec_info il_a;
-  stmt_vec_info last_a = il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
-  if (last_a)
+     the current one.  */
+  stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
+  if (il_a)
     {
-      for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s;
-	   s = DR_GROUP_NEXT_ELEMENT (s))
-	last_a = get_later_stmt (last_a, s);
-      if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
-	{
-	  for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
-	       s = DR_GROUP_NEXT_ELEMENT (s))
-	    if (get_later_stmt (il_a, s) == il_a)
-	      il_a = s;
-	}
-      else
-	il_a = last_a;
+      if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
+	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
+	     s = DR_GROUP_NEXT_ELEMENT (s))
+	  il_a = get_later_stmt (il_a, s);
+      else /* DR_IS_READ */
+	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
+	     s = DR_GROUP_NEXT_ELEMENT (s))
+	  if (get_later_stmt (il_a, s) == il_a)
+	    il_a = s;
     }
   else
-    last_a = il_a = stmtinfo_a;
-  stmt_vec_info il_b;
-  stmt_vec_info last_b = il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
-  if (last_b)
+    il_a = stmtinfo_a;
+  stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
+  if (il_b)
     {
-      for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s;
-	   s = DR_GROUP_NEXT_ELEMENT (s))
-	last_b = get_later_stmt (last_b, s);
-      if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
-	{
-	  for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
-	       s = DR_GROUP_NEXT_ELEMENT (s))
-	    if (get_later_stmt (il_b, s) == il_b)
-	      il_b = s;
-	}
-      else
-	il_b = last_b;
+      if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
+	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
+	     s = DR_GROUP_NEXT_ELEMENT (s))
+	  il_b = get_later_stmt (il_b, s);
+      else /* DR_IS_READ */
+	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
+	     s = DR_GROUP_NEXT_ELEMENT (s))
+	  if (get_later_stmt (il_b, s) == il_b)
+	    il_b = s;
     }
   else
-    last_b = il_b = stmtinfo_b;
+    il_b = stmtinfo_b;
   bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
-  return (/* SLP */
-	  (get_later_stmt (last_a, last_b) == last_a) == a_after_b
-	  /* Interleaving */
-	  && (get_later_stmt (il_a, il_b) == il_a) == a_after_b
-	  /* Mixed */
-	  && (get_later_stmt (il_a, last_b) == il_a) == a_after_b
-	  && (get_later_stmt (last_a, il_b) == last_a) == a_after_b);
+  return (get_later_stmt (il_a, il_b) == il_a) == a_after_b;
 }
 
 /* A subroutine of vect_analyze_data_ref_dependence.  Handle
@@ -702,71 +684,144 @@ vect_slp_analyze_node_dependences (vec_info *vinfo, slp_tree node,
   /* This walks over all stmts involved in the SLP load/store done
      in NODE verifying we can sink them up to the last stmt in the
      group.  */
-  stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
-  for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
+  if (DR_IS_WRITE (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node))))
     {
-      stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k];
-      if (access_info == last_access_info)
-	continue;
-      data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
-      ao_ref ref;
-      bool ref_initialized_p = false;
-      for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
-	   gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
+      stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
+      for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
 	{
-	  gimple *stmt = gsi_stmt (gsi);
-	  if (! gimple_vuse (stmt)
-	      || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
+	  stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k];
+	  if (access_info == last_access_info)
 	    continue;
-
-	  /* If we couldn't record a (single) data reference for this
-	     stmt we have to resort to the alias oracle.  */
-	  stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
-	  data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
-	  if (!dr_b)
+	  data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
+	  ao_ref ref;
+	  bool ref_initialized_p = false;
+	  for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
+	       gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
 	    {
-	      /* We are moving a store or sinking a load - this means
-	         we cannot use TBAA for disambiguation.  */
-	      if (!ref_initialized_p)
-		ao_ref_init (&ref, DR_REF (dr_a));
-	      if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
-		  || ref_maybe_used_by_stmt_p (stmt, &ref, false))
+	      gimple *stmt = gsi_stmt (gsi);
+	      if (! gimple_vuse (stmt))
+		continue;
+
+	      /* If we couldn't record a (single) data reference for this
+		 stmt we have to resort to the alias oracle.  */
+	      stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
+	      data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
+	      if (!dr_b)
+		{
+		  /* We are moving a store - this means
+		     we cannot use TBAA for disambiguation.  */
+		  if (!ref_initialized_p)
+		    ao_ref_init (&ref, DR_REF (dr_a));
+		  if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
+		      || ref_maybe_used_by_stmt_p (stmt, &ref, false))
+		    return false;
+		  continue;
+		}
+
+	      bool dependent = false;
+	      /* If we run into a store of this same instance (we've just
+		 marked those) then delay dependence checking until we run
+		 into the last store because this is where it will have
+		 been sunk to (and we verify if we can do that as well).  */
+	      if (gimple_visited_p (stmt))
+		{
+		  if (stmt_info != last_store_info)
+		    continue;
+		  unsigned i;
+		  stmt_vec_info store_info;
+		  FOR_EACH_VEC_ELT (stores, i, store_info)
+		    {
+		      data_reference *store_dr
+			= STMT_VINFO_DATA_REF (store_info);
+		      ddr_p ddr = initialize_data_dependence_relation
+				    (dr_a, store_dr, vNULL);
+		      dependent
+			= vect_slp_analyze_data_ref_dependence (vinfo, ddr);
+		      free_dependence_relation (ddr);
+		      if (dependent)
+			break;
+		    }
+		}
+	      else
+		{
+		  ddr_p ddr = initialize_data_dependence_relation (dr_a,
+								   dr_b, vNULL);
+		  dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
+		  free_dependence_relation (ddr);
+		}
+	      if (dependent)
 		return false;
-	      continue;
 	    }
-
-	  bool dependent = false;
-	  /* If we run into a store of this same instance (we've just
-	     marked those) then delay dependence checking until we run
-	     into the last store because this is where it will have
-	     been sunk to (and we verify if we can do that as well).  */
-	  if (gimple_visited_p (stmt))
+	}
+    }
+  else /* DR_IS_READ */
+    {
+      stmt_vec_info first_access_info
+	= vect_find_first_scalar_stmt_in_slp (node);
+      for (unsigned k = 0; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
+	{
+	  stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k];
+	  if (access_info == first_access_info)
+	    continue;
+	  data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
+	  ao_ref ref;
+	  bool ref_initialized_p = false;
+	  for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
+	       gsi_stmt (gsi) != first_access_info->stmt; gsi_prev (&gsi))
 	    {
-	      if (stmt_info != last_store_info)
+	      gimple *stmt = gsi_stmt (gsi);
+	      if (! gimple_vdef (stmt))
 		continue;
-	      unsigned i;
-	      stmt_vec_info store_info;
-	      FOR_EACH_VEC_ELT (stores, i, store_info)
+
+	      /* If we couldn't record a (single) data reference for this
+		 stmt we have to resort to the alias oracle.  */
+	      stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
+	      data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
+	      if (!dr_b)
+		{
+		  /* We are hoisting a load - this means we can use
+		     TBAA for disambiguation.  */
+		  if (!ref_initialized_p)
+		    ao_ref_init (&ref, DR_REF (dr_a));
+		  if (stmt_may_clobber_ref_p_1 (stmt, &ref, true))
+		    return false;
+		  continue;
+		}
+
+	      bool dependent = false;
+	      /* If we run into a store of this same instance (we've just
+		 marked those) then delay dependence checking until we run
+		 into the last store because this is where it will have
+		 been sunk to (and we verify if we can do that as well).  */
+	      if (gimple_visited_p (stmt))
+		{
+		  if (stmt_info != last_store_info)
+		    continue;
+		  unsigned i;
+		  stmt_vec_info store_info;
+		  FOR_EACH_VEC_ELT (stores, i, store_info)
+		    {
+		      data_reference *store_dr
+			= STMT_VINFO_DATA_REF (store_info);
+		      ddr_p ddr = initialize_data_dependence_relation
+				    (dr_a, store_dr, vNULL);
+		      dependent
+			= vect_slp_analyze_data_ref_dependence (vinfo, ddr);
+		      free_dependence_relation (ddr);
+		      if (dependent)
+			break;
+		    }
+		}
+	      else
 		{
-		  data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
-		  ddr_p ddr = initialize_data_dependence_relation
-				(dr_a, store_dr, vNULL);
-		  dependent
-		    = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
+		  ddr_p ddr = initialize_data_dependence_relation (dr_a,
+								   dr_b, vNULL);
+		  dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
 		  free_dependence_relation (ddr);
-		  if (dependent)
-		    break;
 		}
+	      if (dependent)
+		return false;
 	    }
-	  else
-	    {
-	      ddr_p ddr = initialize_data_dependence_relation (dr_a,
-							       dr_b, vNULL);
-	      dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
-	      free_dependence_relation (ddr);
-	    }
-	  if (dependent)
-	    return false;
 	}
     }
   return true;
@@ -2399,10 +2454,20 @@ vect_slp_analyze_and_verify_node_alignment (vec_info *vinfo, slp_tree node)
 
   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
   vect_compute_data_ref_alignment (vinfo, dr_info);
-  /* For creating the data-ref pointer we need alignment of the
-     first element anyway.  */
+  /* In several places we need alignment of the first element anyway.  */
   if (dr_info != first_dr_info)
     vect_compute_data_ref_alignment (vinfo, first_dr_info);
+
+  /* For creating the data-ref pointer we need alignment of the
+     first element as well.  */
+  first_stmt_info = vect_find_first_scalar_stmt_in_slp (node);
+  if (first_stmt_info != SLP_TREE_SCALAR_STMTS (node)[0])
+    {
+      first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
+      if (dr_info != first_dr_info)
+	vect_compute_data_ref_alignment (vinfo, first_dr_info);
+    }
+
   if (! verify_data_ref_alignment (vinfo, dr_info))
     {
       if (dump_enabled_p ())
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 4031db4a9c6..e7a260877a9 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1915,6 +1915,25 @@ vect_find_last_scalar_stmt_in_slp (slp_tree node)
   return last;
 }
 
+/* Find the first stmt in NODE.  */
+
+stmt_vec_info
+vect_find_first_scalar_stmt_in_slp (slp_tree node)
+{
+  stmt_vec_info first = NULL;
+  stmt_vec_info stmt_vinfo;
+
+  for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
+    {
+      stmt_vinfo = vect_orig_stmt (stmt_vinfo);
+      if (!first
+	  || get_later_stmt (stmt_vinfo, first) == first)
+	first = stmt_vinfo;
+    }
+
+  return first;
+}
+
 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
    two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
    (also containing the first GROUP1_SIZE stmts, since stores are
@@ -4205,15 +4224,33 @@ vect_schedule_slp_instance (vec_info *vinfo,
 		     "------>vectorizing SLP node starting from: %G",
 		     stmt_info->stmt);
 
-  /* Vectorized stmts go before the last scalar stmt which is where
-     all uses are ready.  */
-  stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
-  if (last_stmt_info)
-    si = gsi_for_stmt (last_stmt_info->stmt);
+  if (STMT_VINFO_DATA_REF (stmt_info)
+      && SLP_TREE_CODE (node) != VEC_PERM_EXPR)
+    {
+      /* Vectorized loads go before the first scalar load to make it
+	 ready early, vectorized stores go before the last scalar
+	 stmt which is where all uses are ready.  */
+      stmt_vec_info last_stmt_info = NULL;
+      if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
+	last_stmt_info = vect_find_first_scalar_stmt_in_slp (node);
+      else /* DR_IS_WRITE */
+	last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
+      si = gsi_for_stmt (last_stmt_info->stmt);
+    }
+  else if (SLP_TREE_CHILDREN (node).is_empty ())
+    /* This happens for reduction PHIs.  */
+    si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node)->stmt);
+  else if (stmt_vec_info first_stmt_info
+	     = vect_find_last_scalar_stmt_in_slp (node))
+    /* ???  Shifts by scalars hit us here again, we end up vectorizing
+       the shift operand but end up using the scalar operand anyway.
+       This needs to be better reflected in the SLP tree.  For now
+       use the last position if available.  */
+    si = gsi_for_stmt (first_stmt_info->stmt);
   else
     {
-      /* Or if we do not have 1:1 matching scalar stmts emit after the
-	 children vectorized defs.  */
+      /* Emit other stmts after the children vectorized defs which is
+	 earliest possible.  */
       gimple *last_stmt = NULL;
       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
 	if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 23799f0ac18..de7d77f3872 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -8728,7 +8728,8 @@ vectorizable_load (vec_info *vinfo,
       /* For BB vectorization always use the first stmt to base
 	 the data ref pointer on.  */
       if (bb_vinfo)
-	first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+	first_stmt_info_for_drptr
+	  = vect_find_first_scalar_stmt_in_slp (slp_node);
 
       /* Check if the chain of loads is already vectorized.  */
       if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 32feec3e24b..e4d132493ca 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2001,6 +2001,7 @@ extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
 			       unsigned n = -1U);
 extern bool vect_slp_bb (basic_block);
 extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
+extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
 extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
 extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
 					    unsigned int * = NULL,


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-06-27 21:27 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-27 21:27 [gcc(refs/users/aldyh/heads/ranger-staging)] emit SLP vectorized loads earlier Aldy Hernandez

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).