public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/tnfchris/heads/gcc-14-early-break)] middle-end: Implement code motion and dependency analysis for early breaks
@ 2023-11-15 14:54 Tamar Christina
  0 siblings, 0 replies; only message in thread
From: Tamar Christina @ 2023-11-15 14:54 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4dd62ce6c19171e0d7c8e690998afe0c57ce5564

commit 4dd62ce6c19171e0d7c8e690998afe0c57ce5564
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Thu Nov 2 15:18:45 2023 +0000

    middle-end: Implement code motion and dependency analysis for early breaks

Diff:
---
 gcc/tree-vect-data-refs.cc | 330 +++++++++++++++++++++++++++++++++++++++++++++
 gcc/tree-vect-loop.cc      |  55 ++++++++
 gcc/tree-vect-stmts.cc     |   3 +
 gcc/tree-vectorizer.h      |  23 ++++
 4 files changed, 411 insertions(+)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index d5c9c4a11c2..9549570bac8 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -613,6 +613,331 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
   return opt_result::success ();
 }
 
+/* This function tries to validate whether an early break vectorization
+   is possible for the current instruction sequence. Returns True i
+   possible, otherwise False.
+
+   Requirements:
+     - Any memory access must be to a fixed size buffer.
+     - There must not be any loads and stores to the same object.
+     - Multiple loads are allowed as long as they don't alias.
+
+   NOTE:
+     This implemementation is very conservative. Any overlappig loads/stores
+     that take place before the early break statement gets rejected aside from
+     WAR dependencies.
+
+     i.e.:
+
+	a[i] = 8
+	c = a[i]
+	if (b[i])
+	  ...
+
+	is not allowed, but
+
+	c = a[i]
+	a[i] = 8
+	if (b[i])
+	  ...
+
+	is which is the common case.
+
+   Arguments:
+     - LOOP_VINFO: loop information for the current loop.
+     - CHAIN: Currently detected sequence of instructions that need to be moved
+	      if we are to vectorize this early break.
+     - FIXED: Sequences of SSA_NAMEs that must not be moved, they are reachable from
+	      one or more cond conditions.  If this set overlaps with CHAIN then FIXED
+	      takes precedence.  This deals with non-single use cases.
+     - LOADS: List of all loads found during traversal.
+     - BASES: List of all load data references found during traversal.
+     - GSTMT: Current position to inspect for validity.  The sequence
+	      will be moved upwards from this point.
+     - REACHING_VUSE: The dominating VUSE found so far.  */
+
+static bool
+validate_early_exit_stmts (loop_vec_info loop_vinfo, hash_set<tree> *chain,
+			   hash_set<tree> *fixed, vec<tree> *loads,
+			   vec<data_reference *> *bases, tree *reaching_vuse,
+			   gimple_stmt_iterator *gstmt)
+{
+  if (gsi_end_p (*gstmt))
+    return true;
+
+  gimple *stmt = gsi_stmt (*gstmt);
+  /* ?? Do I need to move debug statements? not quite sure..  */
+  if (gimple_has_ops (stmt)
+      && !is_gimple_debug (stmt))
+    {
+      tree dest = NULL_TREE;
+      /* Try to find the SSA_NAME being defined.  For Statements with an LHS
+	 use the LHS, if not, assume that the first argument of a call is the
+	 value being defined.  e.g. MASKED_LOAD etc.  */
+      if (gimple_has_lhs (stmt))
+	dest = gimple_get_lhs (stmt);
+      else if (const gcall *call = dyn_cast <const gcall *> (stmt))
+	dest = gimple_arg (call, 0);
+      else if (const gcond *cond = dyn_cast <const gcond *> (stmt))
+	{
+	  /* Operands of conds are ones we can't move.  */
+	  fixed->add (gimple_cond_lhs (cond));
+	  fixed->add (gimple_cond_rhs (cond));
+	}
+
+      bool move = false;
+
+      stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
+      if (!stmt_vinfo)
+	{
+	   if (dump_enabled_p ())
+	     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			      "early breaks not supported. Unknown"
+			      " statement: %G", stmt);
+	   return false;
+	}
+
+      auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
+      if (dr_ref)
+	{
+	   /* We currently only support statically allocated objects due to
+	      not having first-faulting loads support or peeling for alignment
+	      support.  Compute the size of the referenced object (it could be
+	      dynamically allocated).  */
+	   tree obj = DR_BASE_ADDRESS (dr_ref);
+	   if (!obj || TREE_CODE (obj) != ADDR_EXPR)
+	     {
+	       if (dump_enabled_p ())
+		 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				  "early breaks only supported on statically"
+				  " allocated objects.\n");
+	       return false;
+	     }
+
+	   tree refop = TREE_OPERAND (obj, 0);
+	   tree refbase = get_base_address (refop);
+	   if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
+	       || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
+	     {
+	       if (dump_enabled_p ())
+		 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				  "early breaks only supported on statically"
+				  " allocated objects.\n");
+	       return false;
+	     }
+
+	   if (DR_IS_READ (dr_ref))
+	     {
+		loads->safe_push (dest);
+		bases->safe_push (dr_ref);
+	     }
+	   else if (DR_IS_WRITE (dr_ref))
+	     {
+		for (auto dr : bases)
+		  if (same_data_refs_base_objects (dr, dr_ref))
+		    {
+		      if (dump_enabled_p ())
+			  dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+					   vect_location,
+					   "early breaks only supported,"
+					   " overlapping loads and stores found"
+					   " before the break statement.\n");
+		      return false;
+		    }
+		/* Any writes starts a new chain. */
+		move = true;
+	     }
+	}
+
+      /* If a statement is live and escapes the loop through usage in the loop
+	 epilogue then we can't move it since we need to maintain its
+	 reachability through all exits.  */
+      bool skip = false;
+      if (STMT_VINFO_LIVE_P (stmt_vinfo)
+	  && !(dr_ref && DR_IS_WRITE (dr_ref)))
+	{
+	  imm_use_iterator imm_iter;
+	  use_operand_p use_p;
+	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, dest)
+	    {
+	      basic_block bb = gimple_bb (USE_STMT (use_p));
+	      skip = bb == LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
+	      if (skip)
+		break;
+	    }
+	}
+
+      /* If we found the defining statement of a something that's part of the
+	 chain then expand the chain with the new SSA_VARs being used.  */
+      if (!skip && (chain->contains (dest) || move))
+	{
+	  move = true;
+	  for (unsigned x = 0; x < gimple_num_args (stmt); x++)
+	    {
+	      tree var = gimple_arg (stmt, x);
+	      if (TREE_CODE (var) == SSA_NAME)
+		{
+		  if (fixed->contains (dest))
+		    {
+		      move = false;
+		      fixed->add (var);
+		    }
+		  else
+		    chain->add (var);
+		}
+	      else
+		{
+		  use_operand_p use_p;
+		  ssa_op_iter iter;
+		  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+		    {
+		      tree op = USE_FROM_PTR (use_p);
+		      gcc_assert (TREE_CODE (op) == SSA_NAME);
+		      if (fixed->contains (dest))
+			{
+			  move = false;
+			  fixed->add (op);
+			}
+		      else
+			chain->add (op);
+		    }
+		}
+	    }
+
+	  if (dump_enabled_p ())
+	    {
+	      if (move)
+		dump_printf_loc (MSG_NOTE, vect_location,
+				"found chain %G", stmt);
+	      else
+		dump_printf_loc (MSG_NOTE, vect_location,
+				"ignored chain %G, not single use", stmt);
+	    }
+	}
+
+      if (move)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "==> recording stmt %G", stmt);
+
+	  for (tree ref : loads)
+	    if (stmt_may_clobber_ref_p (stmt, ref, true))
+	      {
+	        if (dump_enabled_p ())
+		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				   "early breaks not supported as memory used"
+				   " may alias.\n");
+	        return false;
+	      }
+
+	  /* If we've moved a VDEF, extract the defining MEM and update
+	     usages of it.   */
+	  tree vdef;
+	  if ((vdef = gimple_vdef (stmt)))
+	    {
+	      /* This statement is to be moved.  */
+	      LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).safe_push (stmt);
+	      *reaching_vuse = gimple_vuse (stmt);
+	    }
+	}
+    }
+
+  gsi_prev (gstmt);
+
+  if (!validate_early_exit_stmts (loop_vinfo, chain, fixed, loads, bases,
+				  reaching_vuse, gstmt))
+    return false;
+
+  if (gimple_vuse (stmt) && !gimple_vdef (stmt))
+    {
+      LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_push (stmt);
+      if (dump_enabled_p ())
+	  dump_printf_loc (MSG_NOTE, vect_location,
+			   "marked statement for vUSE update: %G", stmt);
+    }
+
+  return true;
+}
+
+/* Funcion vect_analyze_early_break_dependences.
+
+   Examime all the data references in the loop and make sure that if we have
+   mulitple exits that we are able to safely move stores such that they become
+   safe for vectorization.  The function also calculates the place where to move
+   the instructions to and computes what the new vUSE chain should be.
+
+   This works in tandem with the CFG that will be produced by
+   slpeel_tree_duplicate_loop_to_edge_cfg later on.  */
+
+static opt_result
+vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
+{
+  DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences");
+
+  hash_set<tree> chain, fixed;
+  auto_vec<tree> loads;
+  auto_vec<data_reference *> bases;
+  basic_block dest_bb = NULL;
+  tree vuse = NULL;
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "loop contains multiple exits, analyzing"
+		     " statement dependencies.\n");
+
+  for (gcond *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo))
+    {
+      stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c);
+      if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type)
+	continue;
+
+      gimple *stmt = STMT_VINFO_STMT (loop_cond_info);
+      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+
+      /* Initiaze the vuse chain with the one at the early break.  */
+      if (!vuse)
+	vuse = gimple_vuse (c);
+
+      if (!validate_early_exit_stmts (loop_vinfo, &chain, &fixed, &loads,
+				     &bases, &vuse, &gsi))
+	return opt_result::failure_at (stmt,
+				       "can't safely apply code motion to "
+				       "dependencies of %G to vectorize "
+				       "the early exit.\n", stmt);
+
+      /* Save destination as we go, BB are visited in order and the last one
+	is where statements should be moved to.  */
+      if (!dest_bb)
+	dest_bb = gimple_bb (c);
+      else
+	{
+	  basic_block curr_bb = gimple_bb (c);
+	  if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb))
+	    dest_bb = curr_bb;
+	}
+    }
+
+  dest_bb = FALLTHRU_EDGE (dest_bb)->dest;
+  gcc_assert (dest_bb);
+  LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
+
+  for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "updated use: %T, mem_ref: %G",
+			 vuse, g);
+    }
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "recorded statements to be moved to BB %d\n",
+		     LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
+
+  return opt_result::success ();
+}
+
 /* Function vect_analyze_data_ref_dependences.
 
    Examine all the data references in the loop, and make sure there do not
@@ -657,6 +982,11 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
 	  return res;
       }
 
+  /* If we have early break statements in the loop, check to see if they
+     are of a form we can vectorizer.  */
+  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+    return vect_analyze_early_break_dependences (loop_vinfo);
+
   return opt_result::success ();
 }
 
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 5213aa0169c..4cf7f65dc16 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1040,6 +1040,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
     partial_load_store_bias (0),
     peeling_for_gaps (false),
     peeling_for_niter (false),
+    early_breaks (false),
     no_data_dependencies (false),
     has_mask_store (false),
     scalar_loop_scaling (profile_probability::uninitialized ()),
@@ -11492,6 +11493,55 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
   epilogue_vinfo->shared->save_datarefs ();
 }
 
+/*  When vectorizing early break statements instructions that happen before
+    the early break in the current BB need to be moved to after the early
+    break.  This function deals with that and assumes that any validity
+    checks has already been performed.
+
+    While moving the instructions if it encounters a VUSE or VDEF it then
+    corrects the VUSES as it moves the statements along.  GDEST is the location
+    in which to insert the new statements.  */
+
+static void
+move_early_exit_stmts (loop_vec_info loop_vinfo)
+{
+  if (LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).is_empty ())
+    return;
+
+  /* Move all stmts that need moving.  */
+  basic_block dest_bb = LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo);
+  gimple_stmt_iterator dest_gsi = gsi_start_bb (dest_bb);
+
+  for (gimple *stmt : LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo))
+    {
+      /* Check to see if statement is still required for vect or has been
+	 elided.  */
+      auto stmt_info = loop_vinfo->lookup_stmt (stmt);
+      if (!stmt_info)
+	continue;
+
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location, "moving stmt %G", stmt);
+
+      gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt);
+      gsi_move_before (&stmt_gsi, &dest_gsi);
+      gsi_prev (&dest_gsi);
+      update_stmt (stmt);
+    }
+
+  /* Update all the stmts with their new reaching VUSES.  */
+  tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).last ());
+  for (auto p : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+	  dump_printf_loc (MSG_NOTE, vect_location,
+			   "updating vuse to %T for stmt %G", vuse, p);
+      unlink_stmt_vdef (p);
+      gimple_set_vuse (p, vuse);
+      update_stmt (p);
+    }
+}
+
 /* Function vect_transform_loop.
 
    The analysis phase has determined that the loop is vectorizable.
@@ -11641,6 +11691,11 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
       vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
     }
 
+  /* Handle any code motion that we need to for early-break vectorization after
+     we've done peeling but just before we start vectorizing.  */
+  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+    move_early_exit_stmts (loop_vinfo);
+
   /* FORNOW: the vectorizer supports only loops which body consist
      of one basic block (header + empty latch). When the vectorizer will
      support more involved loop forms, the order by which the BBs are
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 65883e04ad7..3a22bf02f5a 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -13545,6 +13545,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
 	case vect_first_order_recurrence:
 	  dump_printf (MSG_NOTE, "first order recurrence\n");
 	  break;
+       case vect_early_exit_def:
+	  dump_printf (MSG_NOTE, "early exit\n");
+	  break;
 	case vect_unknown_def_type:
 	  dump_printf (MSG_NOTE, "unknown\n");
 	  break;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index d2ddc2e4ad5..76451a7fefe 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -66,6 +66,7 @@ enum vect_def_type {
   vect_double_reduction_def,
   vect_nested_cycle,
   vect_first_order_recurrence,
+  vect_early_exit_def,
   vect_unknown_def_type
 };
 
@@ -888,6 +889,10 @@ public:
      we need to peel off iterations at the end to form an epilogue loop.  */
   bool peeling_for_niter;
 
+  /* When the loop has early breaks that we can vectorize we need to peel
+     the loop for the break finding loop.  */
+  bool early_breaks;
+
   /* List of loop additional IV conditionals found in the loop.  */
   auto_vec<gcond *> conds;
 
@@ -942,6 +947,20 @@ public:
   /* The controlling loop IV for the scalar loop being vectorized.  This IV
      controls the natural exits of the loop.  */
   edge scalar_loop_iv_exit;
+
+  /* Used to store the list of statements needing to be moved if doing early
+     break vectorization as they would violate the scalar loop semantics if
+     vectorized in their current location.  These are stored in order that they need
+     to be moved.  */
+  auto_vec<gimple *> early_break_conflict;
+
+  /* The final basic block where to move statements to.  In the case of
+     multiple exits this could be pretty far away.  */
+  basic_block early_break_dest_bb;
+
+  /* Statements whose VUSES need updating if early break vectorization is to
+     happen.  */
+  auto_vec<gimple*> early_break_vuses;
 } *loop_vec_info;
 
 /* Access Functions.  */
@@ -996,6 +1015,10 @@ public:
 #define LOOP_VINFO_REDUCTION_CHAINS(L)     (L)->reduction_chains
 #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
 #define LOOP_VINFO_PEELING_FOR_NITER(L)    (L)->peeling_for_niter
+#define LOOP_VINFO_EARLY_BREAKS(L)         (L)->early_breaks
+#define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict
+#define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
+#define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
 #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
 #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
 #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-11-15 14:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-15 14:54 [gcc(refs/users/tnfchris/heads/gcc-14-early-break)] middle-end: Implement code motion and dependency analysis for early breaks Tamar Christina

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).