public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-1699] tree-optimization/101120 - fix compile-time issue with SLP groups
@ 2021-06-21 13:02 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2021-06-21 13:02 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0ad9c7087ef3904da89f2db6007b6d28b116087f

commit r12-1699-g0ad9c7087ef3904da89f2db6007b6d28b116087f
Author: Richard Biener <rguenther@suse.de>
Date:   Fri Jun 18 14:07:00 2021 +0200

    tree-optimization/101120 - fix compile-time issue with SLP groups
    
    This places two hacks to avoid an old compile-time issue when
    vectorizing large permuted SLP groups with gaps where we end up
    emitting loads and IV adjustments for the gap as well and those
    have quite a high cost until they are eventually cleaned up.
    
    The first hack is to fold the auto-inc style IV updates early
    in the vectorizer rather than in the next forwprop pass which
    shortens the SSA use-def chains of the used IV.
    
    The second hack is to remove the unused loads after we've picked
    all that we possibly use.
    
    2021-06-18  Richard Biener  <rguenther@suse.de>
    
            PR tree-optimization/101120
            * tree-vect-data-refs.c (bump_vector_ptr): Fold the
            built increment.
            * tree-vect-slp.c (vect_transform_slp_perm_load): Add
            DR chain DCE capability.
            * tree-vectorizer.h (vect_transform_slp_perm_load): Adjust.
            * tree-vect-stmts.c (vectorizable_load): Remove unused
            loads in the DR chain for SLP.

Diff:
---
 gcc/tree-vect-data-refs.c | 12 +++++++++++-
 gcc/tree-vect-slp.c       | 31 ++++++++++++++++++++++++++-----
 gcc/tree-vect-stmts.c     |  7 ++++++-
 gcc/tree-vectorizer.h     |  2 +-
 4 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index bb086c6ac1c..be067c8923b 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-hash-traits.h"
 #include "vec-perm-indices.h"
 #include "internal-fn.h"
+#include "gimple-fold.h"
 
 /* Return true if load- or store-lanes optab OPTAB is implemented for
    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
@@ -5026,7 +5027,7 @@ bump_vector_ptr (vec_info *vinfo,
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   tree update = TYPE_SIZE_UNIT (vectype);
-  gassign *incr_stmt;
+  gimple *incr_stmt;
   ssa_op_iter iter;
   use_operand_p use_p;
   tree new_dataref_ptr;
@@ -5041,6 +5042,15 @@ bump_vector_ptr (vec_info *vinfo,
   incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
 				   dataref_ptr, update);
   vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
+  /* Fold the increment, avoiding excessive chains use-def chains of
+     those, leading to compile-time issues for passes until the next
+     forwprop pass which would do this as well.  */
+  gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
+  if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
+    {
+      incr_stmt = gsi_stmt (fold_gsi);
+      update_stmt (incr_stmt);
+    }
 
   /* Copy the points-to information if it exists. */
   if (DR_PTR_INFO (dr))
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 99e7ce21e4e..a32f86b8bc7 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -6284,14 +6284,15 @@ vect_get_slp_defs (vec_info *,
    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
    permute statements for the SLP node NODE.  Store the number of vector
    permute instructions in *N_PERMS and the number of vector load
-   instructions in *N_LOADS.  */
+   instructions in *N_LOADS.  If DCE_CHAIN is true, remove all definitions
+   that were not needed.  */
 
 bool
 vect_transform_slp_perm_load (vec_info *vinfo,
 			      slp_tree node, vec<tree> dr_chain,
 			      gimple_stmt_iterator *gsi, poly_uint64 vf,
 			      bool analyze_only, unsigned *n_perms,
-			      unsigned int *n_loads)
+			      unsigned int *n_loads, bool dce_chain)
 {
   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
   int vec_index = 0;
@@ -6370,6 +6371,7 @@ vect_transform_slp_perm_load (vec_info *vinfo,
     }
   auto_sbitmap used_in_lanes (in_nlanes);
   bitmap_clear (used_in_lanes);
+  auto_bitmap used_defs;
 
   unsigned int count = mask.encoded_nelts ();
   mask.quick_grow (count);
@@ -6477,11 +6479,20 @@ vect_transform_slp_perm_load (vec_info *vinfo,
 					       mask_vec);
 		      vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
 						   gsi);
+		      if (dce_chain)
+			{
+			  bitmap_set_bit (used_defs, first_vec_index + ri);
+			  bitmap_set_bit (used_defs, second_vec_index + ri);
+			}
 		    }
 		  else
-		    /* If mask was NULL_TREE generate the requested
-		       identity transform.  */
-		    perm_stmt = SSA_NAME_DEF_STMT (first_vec);
+		    {
+		      /* If mask was NULL_TREE generate the requested
+			 identity transform.  */
+		      perm_stmt = SSA_NAME_DEF_STMT (first_vec);
+		      if (dce_chain)
+			bitmap_set_bit (used_defs, first_vec_index + ri);
+		    }
 
 		  /* Store the vector statement in NODE.  */
 		  SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
@@ -6521,6 +6532,16 @@ vect_transform_slp_perm_load (vec_info *vinfo,
 	}
     }
 
+  if (dce_chain)
+    for (unsigned i = 0; i < dr_chain.length (); ++i)
+      if (!bitmap_bit_p (used_defs, i))
+	{
+	  gimple *stmt = SSA_NAME_DEF_STMT (dr_chain[i]);
+	  gimple_stmt_iterator rgsi = gsi_for_stmt (stmt);
+	  gsi_remove (&rgsi, true);
+	  release_defs (stmt);
+	}
+
   return true;
 }
 
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index eeef96a2eb6..4ee11b2041a 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9762,8 +9762,13 @@ vectorizable_load (vec_info *vinfo,
       if (slp_perm)
         {
 	  unsigned n_perms;
+	  /* For SLP we know we've seen all possible uses of dr_chain so
+	     direct vect_transform_slp_perm_load to DCE the unused parts.
+	     ???  This is a hack to prevent compile-time issues as seen
+	     in PR101120 and friends.  */
 	  bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
-						  gsi, vf, false, &n_perms);
+						  gsi, vf, false, &n_perms,
+						  nullptr, true);
 	  gcc_assert (ok);
         }
       else
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 04c20f8bd0f..5c71fbc487f 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2012,7 +2012,7 @@ extern void vect_free_slp_instance (slp_instance);
 extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, vec<tree>,
 					  gimple_stmt_iterator *, poly_uint64,
 					  bool, unsigned *,
-					  unsigned * = nullptr);
+					  unsigned * = nullptr, bool = false);
 extern bool vect_slp_analyze_operations (vec_info *);
 extern void vect_schedule_slp (vec_info *, vec<slp_instance>);
 extern opt_result vect_analyze_slp (vec_info *, unsigned);


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-06-21 13:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-21 13:02 [gcc r12-1699] tree-optimization/101120 - fix compile-time issue with SLP groups Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).