public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
* Simplify using context in loop-iv
@ 2014-02-07 10:27 Paulo Matos
  0 siblings, 0 replies; only message in thread
From: Paulo Matos @ 2014-02-07 10:27 UTC (permalink / raw)
  To: gcc

[-- Attachment #1: Type: text/plain, Size: 825 bytes --]

Hello,

This is a followup of 
http://gcc.gnu.org/ml/gcc/2014-01/msg00120.html
and
http://gcc.gnu.org/ml/gcc/2014-01/msg00055.html

This is a slightly long patch that attempts to simplify the niter and infinite expressions by recursively looking through the dominant basic blocks for register definitions, replacing them in the original expressions and simplifying the expression with simplify-rtx.

It allows GCC to generate a lot more zero overhead loops by finding much better bounds to loops or discovering that infinite loops are actually not so.

I would like to have some comments on the patch and its possible integration into upstream (I guess it can only go upstream once 4.9 is released, right?).

I bootstrapped and tested x86_64-unknown-linux-gnu with no regressions.

Thanks,

Paulo Matos



[-- Attachment #2: simplify-using-context.patch --]
[-- Type: application/octet-stream, Size: 13786 bytes --]

Index: loop-iv.c
===================================================================
--- loop-iv.c	(revision 207338)
+++ loop-iv.c	(working copy)
@@ -272,6 +272,41 @@ clear_iv_info (void)
   bivs.empty ();
 }
 
+struct regdef_entry
+{
+  int regno;
+  int bb_i;
+  int insn_from;
+  rtx def;
+};
+
+struct regdef_entry_hasher : typed_free_remove <regdef_entry>
+{
+  typedef regdef_entry value_type;
+  typedef regdef_entry compare_type;
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+};
+
+/* Returns hash value for regdef RD.  */
+
+inline hashval_t
+regdef_entry_hasher::hash (const value_type *rd)
+{
+  return rd->regno;
+}
+
+/* Compares regdef RD1 and regdef RD2.  */
+
+inline bool
+regdef_entry_hasher::equal (const value_type *rd1, const compare_type *rd2)
+{
+  return rd1->regno == rd2->regno
+    && rd1->bb_i == rd2->bb_i
+    && rd1->insn_from == rd2->insn_from;
+}
+
+static hash_table <regdef_entry_hasher> regdefs;
 
 /* Prepare the data for an induction variable analysis of a LOOP.  */
 
@@ -285,6 +320,7 @@ iv_analysis_loop_init (struct loop *loop
     {
       df_set_flags (DF_EQ_NOTES + DF_DEFER_INSN_RESCAN);
       bivs.create (10);
+      regdefs.create (31);
       clean_slate = false;
     }
   else
@@ -1300,6 +1336,7 @@ iv_analysis_done (void)
       clean_slate = true;
       df_finish_pass (true);
       bivs.dispose ();
+      regdefs.dispose ();
       free (iv_ref_table);
       iv_ref_table = NULL;
       iv_ref_table_size = 0;
@@ -1849,11 +1886,334 @@ eliminate_implied_conditions (enum rtx_c
     eliminate_implied_condition (op, XEXP (elt, 0), head);
 }
 
+static rtx
+simplify_using_context_helper (basic_block bb, rtx expr, rtx from);
+
+static bool
+reg_defined_in_bb (basic_block bb, rtx reg, rtx from, rtx *insnp = NULL)
+{
+  rtx insn = from == NULL_RTX ? BB_END (bb) : from;
+  gcc_assert (from == NULL_RTX || BLOCK_FOR_INSN (from) == bb);
+
+  for (; insn && insn != PREV_INSN (BB_HEAD (bb));
+       insn = PREV_INSN (insn))
+    {
+      if (!INSN_P (insn))
+	continue;
+      if (df_reg_defined (insn, reg))
+	{
+	  if (insnp)
+	    *insnp = insn;
+	  return true;
+	}
+    }
+  return false;
+}
+
+static bool
+reg_defined_in_pred_helper (basic_block bb, rtx reg, bitmap visited)
+{
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      basic_block pred = e->src;
+
+      if (bitmap_bit_p (visited, pred->index))
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, ";; looking for definition of register %d in bb %d\n",
+		 REGNO (reg), pred->index);
+
+      bitmap_set_bit (visited, pred->index);
+      if (reg_defined_in_bb (pred, reg, NULL_RTX)
+	  || reg_defined_in_pred_helper (pred, reg, visited))
+	return true;
+    }
+  return false;
+}
+
+/* Returns true if register reg is defined in any block that is a successor
+   of dombb and reaches bb.  */
+static bool
+reg_defined_in_pred (basic_block bb, rtx reg, basic_block dombb)
+{
+  bitmap visited = BITMAP_ALLOC (NULL);
+  gcc_assert (REG_P (reg));
+  gcc_assert (dominated_by_p (CDI_DOMINATORS, bb, dombb));
+
+  bitmap_clear (visited);
+  bitmap_set_bit (visited, bb->index);
+  bitmap_set_bit (visited, dombb->index);
+  return reg_defined_in_pred_helper (bb, reg, visited);
+}
+
+static rtx
+simplify_reg_using_context (basic_block bb, rtx reg, rtx from)
+{
+  rtx insn_def = NULL_RTX, set;
+  bool def_found_in_loop = false;
+  basic_block dom_bb;
+
+  struct regdef_entry *rd;
+  struct regdef_entry *req = XNEW (struct regdef_entry);
+  req->regno = REGNO (reg);
+  req->bb_i = bb->index;
+  req->insn_from = from == NULL_RTX ? -1 : INSN_UID (from);
+  req->def = NULL_RTX;
+
+  gcc_assert (REG_P (reg));
+  gcc_assert (from == NULL_RTX || BLOCK_FOR_INSN (from) == bb);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; simplifying from bb %d register %d",
+	       bb->index, REGNO (reg));
+      if (from == NULL_RTX)
+	fprintf (dump_file, "\n");
+      else
+	fprintf (dump_file, " from insn %d\n", INSN_UID (from));
+    }
+
+  /* Check if we already have the definition requested in our hash.  */
+  if ((rd = regdefs.find (req)))
+    {
+      if (dump_file)
+	{
+	  fprintf (dump_file, ";; found definition in hash:");
+	  print_inline_rtx (dump_file, rd->def, 0);
+	  fprintf (dump_file, "\n");
+	}
+
+      free (req);
+      gcc_assert (rd->def != NULL_RTX);
+      return rd->def;
+    }
+
+  /* Find definition of reg in a dominator of bb.  */
+  for (dom_bb = bb;
+       dom_bb != ENTRY_BLOCK_PTR_FOR_FN (cfun);
+       dom_bb = get_immediate_dominator (CDI_DOMINATORS, dom_bb))
+    {
+      if (dump_file)
+	{
+	  fprintf (dump_file, ";; trying to find definition of reg %d in bb %d",
+		   REGNO (reg), dom_bb->index);
+	  if (from == NULL_RTX || bb != dom_bb)
+	    fprintf (dump_file, "\n");
+	  else
+	    fprintf (dump_file, " from insn %d\n", INSN_UID (from));
+	}
+
+      if (reg_defined_in_bb (dom_bb,
+			     reg,
+			     bb == dom_bb ? from : NULL_RTX,
+			     &insn_def))
+	{
+	  /* If definition is inside a basic block which is a loop header,
+	     we can use the definition but we cannot simplify it further
+	     because whatever values were defined before are changed by the loop.
+	     For example:
+	          i <- 10
+	          r1 <- 4
+	     .L2:
+	          r2 <- r1 + 5
+		  i <- i - 1
+		  goto .L2 until i == 0
+
+	      We can simplify r2 + 10 to r1 + 15, but we can't simplify it further.
+	  */
+	  if (bb_loop_header_p (dom_bb))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, ";; found definition for register %d in loop header, basic block %d\n",
+			 REGNO (reg), dom_bb->index);
+	      def_found_in_loop = true;
+	    }
+
+	  break;
+	}
+    }
+
+  /* Can't find definition.  */
+  if (!insn_def
+      || !(set = single_set (insn_def)))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; failed to find definition\n");
+
+      return reg;
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; found definition of register %d in bb %d\n",
+	       REGNO (reg), dom_bb->index);
+      print_inline_rtx (dump_file, insn_def, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  /* Now we have to ensure that from all the blocks that reach bb, coming from dom_bb,
+     none redefine reg.  */
+  if (bb != dom_bb
+      && reg_defined_in_pred (bb, reg, dom_bb))
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; cannot simplify register %d because a def. has been found in a pred. of bb %d\n",
+		 REGNO (reg), bb->index);
+      return reg;
+    }
+
+  if (def_found_in_loop)
+    req->def = SET_SRC (set);
+  else
+    req->def = simplify_using_context_helper (dom_bb, SET_SRC (set), PREV_INSN (insn_def));
+
+  /* Save definition found in hash.  */
+  {
+    struct regdef_entry **slot = regdefs.find_slot (req, INSERT);
+    *slot = req;
+  }
+  return req->def;
+}
+
+/* Helper for simplify_using_context.
+   Simplify expr in the context of bb starting lookup from insn from (in bb),
+   return the simplified expression.
+   Returns expr if no simplifications are possible.  */
+static rtx
+simplify_using_context_helper (basic_block bb, rtx expr, rtx from = NULL_RTX)
+{
+  const char *fmt;
+  int i;
+  rtx copy = NULL_RTX;
+  bool optimized_p = false;
+  const enum rtx_code code = GET_CODE (expr);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; simplifying from bb %d the expression:\n",
+	       bb->index);
+      print_inline_rtx (dump_file, expr, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  switch (GET_RTX_CLASS (code))
+    {
+    case RTX_EXTRA:
+    case RTX_CONST_OBJ:
+      /* Can't optimize these any further.  */
+      return expr;
+
+    case RTX_OBJ:
+
+      /* Currently we only simplify registers.  */
+      if (! REG_P (expr))
+	return expr;
+
+      return simplify_reg_using_context	(bb, expr, from);
+      break;
+
+    case RTX_MATCH:
+    case RTX_INSN:
+      /* We should never get these here.  */
+      gcc_unreachable ();
+
+    default:
+
+      copy = copy_rtx (expr);
+      fmt = GET_RTX_FORMAT (code);
+      for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+	if (fmt[i] == 'e')
+	  {
+	    rtx new_expr = simplify_using_context_helper (bb, XEXP (expr, i), from);
+	    rtx simp_expr = simplify_rtx (new_expr);
+	    if (simp_expr)
+	      new_expr = simp_expr;
+
+	    if (new_expr != XEXP (copy, i))
+	      {
+		/* Optimization success.  */
+		XEXP (copy, i) = new_expr;
+		optimized_p = true;
+	      }
+	  }
+
+      if (optimized_p)
+	{
+	  rtx simp_expr;
+
+	  /* Simplify abnormal expressions (not handled by simplify_rtx), like
+	     zero_extend of integers, etc.
+
+	     (zero_extend:M (const_int n)) == (const_int n)
+	     (sign_extend:M (const_int n)) == (const_int n)
+	  */
+	  if ((GET_CODE (copy) == ZERO_EXTEND
+	       || GET_CODE (copy) == SIGN_EXTEND)
+	      && CONST_INT_P (XEXP (copy, 0)))
+	    copy = XEXP (copy, 0);
+
+	  simp_expr = simplify_rtx (copy);
+	  if (simp_expr)
+	    copy = simp_expr;
+
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, ";; simplification success from bb %d:\n",
+		       bb->index);
+	      print_inline_rtx (dump_file, expr, 0);
+	      fprintf (dump_file, "\n;; simplified to\n");
+	      print_inline_rtx (dump_file, copy, 0);
+	      fprintf (dump_file, "\n");
+	    }
+
+	  return copy;
+	}
+      else
+	{
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, ";; simplification failed from bb %d:\n",
+		       bb->index);
+	      print_inline_rtx (dump_file, expr, 0);
+	      fprintf (dump_file, "\n");
+	    }
+	  return expr;
+	}
+    }
+
+  /* Never reach end of function.  */
+  gcc_unreachable ();
+}
+
+/* Simplifies *EXPR using context of LOOP.  *EXPR cannot be a list.  */
+
+static void
+simplify_using_context (struct loop *loop, rtx *expr)
+{
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; simplifying from loop header (loop: %d, bb: %d) the expression:\n",
+	       loop->num,
+	       loop->header->index);
+      print_inline_rtx (dump_file, *expr, 0);
+      fprintf (dump_file, "\n");
+    }
+  *expr = simplify_using_context_helper
+    (get_immediate_dominator (CDI_DOMINATORS, loop->header), *expr);
+}
+
 /* Simplifies *EXPR using initial values at the start of the LOOP.  If *EXPR
    is a list, its elements are assumed to be combined using OP.  */
 
 static void
-simplify_using_initial_values (struct loop *loop, enum rtx_code op, rtx *expr)
+simplify_using_initial_values (struct loop *loop,
+			       enum rtx_code op,
+			       rtx *expr,
+			       bool use_context = false)
 {
   bool expression_valid;
   rtx head, tail, insn, cond_list, last_valid_expr;
@@ -1889,8 +2249,7 @@ simplify_using_initial_values (struct lo
 	default:
 	  gcc_unreachable ();
 	}
-
-      simplify_using_initial_values (loop, UNKNOWN, &head);
+      simplify_using_initial_values (loop, UNKNOWN, &head, use_context);
       if (head == aggr)
 	{
 	  XEXP (*expr, 0) = aggr;
@@ -1900,10 +2259,10 @@ simplify_using_initial_values (struct lo
       else if (head == neutral)
 	{
 	  *expr = tail;
-	  simplify_using_initial_values (loop, op, expr);
+	  simplify_using_initial_values (loop, op, expr, use_context);
 	  return;
 	}
-      simplify_using_initial_values (loop, op, &tail);
+      simplify_using_initial_values (loop, op, &tail, use_context);
 
       if (tail && XEXP (tail, 0) == aggr)
 	{
@@ -2061,7 +2420,11 @@ simplify_using_initial_values (struct lo
       e = single_pred_edge (e->src);
     }
 
+
  out:
+  if (use_context)
+    simplify_using_context (loop, expr);
+
   free_EXPR_LIST_list (&cond_list);
   if (!CONSTANT_P (*expr))
     *expr = last_valid_expr;
@@ -2787,8 +3150,8 @@ iv_number_of_iterations (struct loop *lo
       && XEXP (desc->assumptions, 0) == const0_rtx)
     goto fail;
   simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions);
-  simplify_using_initial_values (loop, IOR, &desc->infinite);
-  simplify_using_initial_values (loop, UNKNOWN, &desc->niter_expr);
+  simplify_using_initial_values (loop, IOR, &desc->infinite, true);
+  simplify_using_initial_values (loop, UNKNOWN, &desc->niter_expr, true);
 
   /* Rerun the simplification.  Consider code (created by copying loop headers)
 
@@ -2810,8 +3173,8 @@ iv_number_of_iterations (struct loop *lo
       && XEXP (desc->assumptions, 0) == const0_rtx)
     goto fail;
   simplify_using_initial_values (loop, IOR, &desc->noloop_assumptions);
-  simplify_using_initial_values (loop, IOR, &desc->infinite);
-  simplify_using_initial_values (loop, UNKNOWN, &desc->niter_expr);
+  simplify_using_initial_values (loop, IOR, &desc->infinite, true);
+  simplify_using_initial_values (loop, UNKNOWN, &desc->niter_expr, true);
 
   if (desc->noloop_assumptions
       && XEXP (desc->noloop_assumptions, 0) == const_true_rtx)
@@ -2844,7 +3207,12 @@ iv_number_of_iterations (struct loop *lo
 	 brings no gain (and if it happens to do, the cse pass will take care
 	 of it anyway).  So prevent this behavior, unless it enabled us to
 	 derive that the number of iterations is a constant.  */
-      desc->niter_expr = old_niter;
+      /* We do the context simplification as well which can be quite
+	 beneficial so we will revert only the simplification if rtx cost is
+	 less than it used to be.  */
+      if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop)) >=
+	  set_src_cost (old_niter, optimize_loop_for_speed_p (loop)))
+	desc->niter_expr = old_niter;
     }
 
   return;
@@ -2855,7 +3223,7 @@ zero_iter_simplify:
   if (desc->assumptions
       && XEXP (desc->assumptions, 0) == const0_rtx)
     goto fail;
-  simplify_using_initial_values (loop, IOR, &desc->infinite);
+  simplify_using_initial_values (loop, IOR, &desc->infinite, true);
 
   /* Fallthru.  */
 zero_iter:

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2014-02-07 10:27 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-02-07 10:27 Simplify using context in loop-iv Paulo Matos

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).