public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Rename across basic block boundaries
@ 2011-06-21 14:12 Bernd Schmidt
  2011-08-23 10:56 ` Ping: " Bernd Schmidt
  2011-08-24 12:50 ` Richard Sandiford
  0 siblings, 2 replies; 11+ messages in thread
From: Bernd Schmidt @ 2011-06-21 14:12 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1627 bytes --]

This patch requires
  http://gcc.gnu.org/ml/gcc-patches/2011-05/msg02193.html
as a prerequisite, and supersedes
  http://gcc.gnu.org/ml/gcc-patches/2011-05/msg02194.html

The idea here is to allow regrename to operate across basic block
boundaries. This helps for targets that use sched_ebb (such as C6X), and
by exposing more chains, we also help on targets that can benefit from
PREFERRED_RENAME_CLASS (again, C6X).

The previous patch I posted only works on extended basic blocks, but
what I really wanted was to have this analysis also work for loops.
Another drawback of the previous patch was that it scanned ebbs as one
large block, which could potentially lead to worse optimization if we
encountered a situation that makes us fail the entire block.

This version, when reaching the end of a basic block, records state
which is used to initialize open chains when starting to scan its
successors. We still only ever scan each block once; for loops, it's
typically good enough to have data from one incoming edge. If for some
reason we can't match up all outgoing and incoming edges, we just fail
to merge them, behaviour then reverts to what we currently have
(renaming inside a single block).

One change in behaviour is that tick isn't reset for each basic block.
However, this shouldn't be a problem as it just randomizes the order in
which we choose registers.

Bootstrapped and tested on i686-linux (with -frename-registers forced on
for -O2); and a variant of it tested in a 4.5 C6X tree.  Benchmarking on
C6X suggested that it helps with certain performance issues.  Ok (this
and the preliminary patch)?


Bernd

[-- Attachment #2: rnreg-cfg.diff --]
[-- Type: text/plain, Size: 26563 bytes --]

	* regrename.c (struct du_head): Make nregs signed.
	(scan_rtx_reg, scan_rtx_address, dump_def_use_chain): Remove
	declarations.
	(closed_chains): Remove.
	(chain_from_id): New static function.
	(dump_def_use_chain): Change argument to be an int, indicating
	the first ID to print.  All callers changed.
	(mark_conflict, create_new_chain): Move upwards in the file.
	(merge_overlapping_regs): Use chain_from_id.  Assert that
	chains don't conflict with themselves.
	(rename_chains): Take no argument.  Iterate over id_to_chain
	rather to find chains to rename.  Clear tick before the main
	loop.
	(struct incoming_reg_info): New struct.
	(struct bb_rename_info): New struct.
	(init_rename_info, set_incoming_from_chain, merge_chains): New
	static functions.
	(regrename_analyze): New static function, broken out of
	regrename_optimize.  Record and make use of open chain information
	at basic block boundaries, and merge chains where possible.
	(scan_rtx_reg): Make this_nregs signed.  Don't update
	closed_chains.
	(build_def_use): Return a bool to indicate success.  All callers
	changed.  Don't initialize global data here.
	(regrename_optimize): Move most code out of here into
	regrename_analyze.
	* regs.h (add_range_to_hard_reg_set, remove_range_from_hard_reg_set,
	range_overlaps_hard_reg_set_p, range_in_hard_reg_set_p): New
	static inline functions.

Index: trunk/gcc/regrename.c
===================================================================
--- trunk.orig/gcc/regrename.c
+++ trunk/gcc/regrename.c
@@ -75,8 +75,9 @@ struct du_head
   struct du_head *next_chain;
   /* The first and last elements of this chain.  */
   struct du_chain *first, *last;
-  /* Describes the register being tracked.  */
-  unsigned regno, nregs;
+  /* Describe the register being tracked, register number and count.  */
+  unsigned regno;
+  int nregs;
 
   /* A unique id to be used as an index into the conflicts bitmaps.  */
   unsigned id;
@@ -138,14 +139,9 @@ static int this_tick = 0;
 static struct obstack rename_obstack;
 
 static void do_replace (struct du_head *, int);
-static void scan_rtx_reg (rtx, rtx *, enum reg_class,
-			  enum scan_actions, enum op_type);
-static void scan_rtx_address (rtx, rtx *, enum reg_class,
-			      enum scan_actions, enum machine_mode);
 static void scan_rtx (rtx, rtx *, enum reg_class, enum scan_actions,
 		      enum op_type);
-static struct du_head *build_def_use (basic_block);
-static void dump_def_use_chain (struct du_head *);
+static bool build_def_use (basic_block);
 
 typedef struct du_head *du_head_p;
 DEF_VEC_P (du_head_p);
@@ -157,9 +153,8 @@ static unsigned current_id;
 /* A mapping of unique id numbers to chains.  */
 static VEC(du_head_p, heap) *id_to_chain;
 
-/* List of currently open chains, and closed chains that can be renamed.  */
+/* List of currently open chains.  */
 static struct du_head *open_chains;
-static struct du_head *closed_chains;
 
 /* Bitmap of open chains.  The bits set always match the list found in
    open_chains.  */
@@ -172,14 +167,32 @@ static HARD_REG_SET live_in_chains;
    between this and live_in_chains is empty.  */
 static HARD_REG_SET live_hard_regs;
 
-/* Dump all def/use chains in CHAINS to DUMP_FILE.  */
+/* Return the chain corresponding to id number ID.  Take into account that
+   chains may have been merged.  */
+static du_head_p
+chain_from_id (unsigned int id)
+{
+  for (;;)
+    {
+      du_head_p chain = VEC_index (du_head_p, id_to_chain, id);
+      if (chain->id == id)
+	return chain;
+      id = chain->id;
+    }
+}
+
+/* Dump all def/use chains, starting at id FROM.  */
 
 static void
-dump_def_use_chain (struct du_head *head)
+dump_def_use_chain (int from)
 {
-  while (head)
+  du_head_p head;
+  int i;
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, head)
     {
       struct du_chain *this_du = head->first;
+      if (i < from)
+	continue;
       fprintf (dump_file, "Register %s (%d):",
 	       reg_names[head->regno], head->nregs);
       while (this_du)
@@ -204,6 +217,84 @@ free_chain_data (void)
   VEC_free (du_head_p, heap, id_to_chain);
 }
 
+/* Walk all chains starting with CHAINS and record that they conflict with
+   another chain whose id is ID.  */
+
+static void
+mark_conflict (struct du_head *chains, unsigned id)
+{
+  while (chains)
+    {
+      bitmap_set_bit (&chains->conflicts, id);
+      chains = chains->next_chain;
+    }
+}
+
+/* Create a new chain for THIS_NREGS registers starting at THIS_REGNO,
+   and record its occurrence in *LOC, which is being written to in INSN.
+   This access requires a register of class CL.  */
+
+static du_head_p
+create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
+		  rtx insn, enum reg_class cl)
+{
+  struct du_head *head = XOBNEW (&rename_obstack, struct du_head);
+  struct du_chain *this_du;
+  int nregs;
+
+  head->next_chain = open_chains;
+  head->regno = this_regno;
+  head->nregs = this_nregs;
+  head->need_caller_save_reg = 0;
+  head->cannot_rename = 0;
+
+  VEC_safe_push (du_head_p, heap, id_to_chain, head);
+  head->id = current_id++;
+
+  bitmap_initialize (&head->conflicts, &bitmap_default_obstack);
+  bitmap_copy (&head->conflicts, &open_chains_set);
+  mark_conflict (open_chains, head->id);
+
+  /* Since we're tracking this as a chain now, remove it from the
+     list of conflicting live hard registers and track it in
+     live_in_chains instead.  */
+  nregs = head->nregs;
+  while (nregs-- > 0)
+    {
+      SET_HARD_REG_BIT (live_in_chains, head->regno + nregs);
+      CLEAR_HARD_REG_BIT (live_hard_regs, head->regno + nregs);
+    }
+
+  COPY_HARD_REG_SET (head->hard_conflicts, live_hard_regs);
+  bitmap_set_bit (&open_chains_set, head->id);
+
+  open_chains = head;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Creating chain %s (%d)",
+	       reg_names[head->regno], head->id);
+      if (insn != NULL_RTX)
+	fprintf (dump_file, " at insn %d", INSN_UID (insn));
+      fprintf (dump_file, "\n");
+    }
+
+  if (insn == NULL_RTX)
+    {
+      head->first = head->last = NULL;
+      return head;
+    }
+
+  this_du = XOBNEW (&rename_obstack, struct du_chain);
+  head->first = head->last = this_du;
+
+  this_du->next_use = 0;
+  this_du->loc = loc;
+  this_du->insn = insn;
+  this_du->cl = cl;
+  return head;
+}
+
 /* For a def-use chain HEAD, find which registers overlap its lifetime and
    set the corresponding bits in *PSET.  */
 
@@ -215,8 +306,9 @@ merge_overlapping_regs (HARD_REG_SET *ps
   IOR_HARD_REG_SET (*pset, head->hard_conflicts);
   EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
     {
-      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
+      du_head_p other = chain_from_id (i);
       unsigned j = other->nregs;
+      gcc_assert (other != head);
       while (j-- > 0)
 	SET_HARD_REG_BIT (*pset, other->regno + j);
     }
@@ -269,13 +361,15 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSE
   return true;
 }
 
-/* Process the closed chains starting with ALL_CHAINS and rename
-   registers if possible.  */
+/* Perform register renaming on the current function.  */
 static void
-rename_chains (du_head_p all_chains)
+rename_chains (void)
 {
   HARD_REG_SET unavailable;
+  du_head_p this_head;
+  int i;
 
+  memset (tick, 0, sizeof tick);
 
   CLEAR_HARD_REG_SET (unavailable);
   /* Don't clobber traceback for noreturn functions.  */
@@ -287,11 +381,10 @@ rename_chains (du_head_p all_chains)
 #endif
     }
 
-  while (all_chains)
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, this_head)
     {
       int new_reg, best_new_reg, best_nregs;
       int n_uses;
-      struct du_head *this_head = all_chains;
       struct du_chain *tmp;
       HARD_REG_SET this_unavailable;
       int reg = this_head->regno;
@@ -300,8 +393,6 @@ rename_chains (du_head_p all_chains)
       enum reg_class preferred_class;
       bool has_preferred_class;
 
-      all_chains = this_head->next_chain;
-
       if (this_head->cannot_rename)
 	continue;
 
@@ -418,50 +509,401 @@ rename_chains (du_head_p all_chains)
     }
 }
 
-/* Perform register renaming on the current function.  */
+struct incoming_reg_info {
+  int nregs;
+  bool unusable;
+};
 
-static unsigned int
-regrename_optimize (void)
+/* A structure recording information about each basic block.  It is saved
+   and restored around basic block boundaries.  */
+struct bb_rename_info
 {
+  /* The basic block corresponding to this structure.  */
   basic_block bb;
-  char *first_obj;
+  /* Copies of the global information.  */
+  bitmap_head open_chains_set;
+  bitmap_head incoming_open_chains_set;
+  unsigned n_preds;
+  struct incoming_reg_info incoming[FIRST_PSEUDO_REGISTER];
+};
 
-  df_set_flags (DF_LR_RUN_DCE);
-  df_note_add_problem ();
-  df_analyze ();
-  df_set_flags (DF_DEFER_INSN_RESCAN);
+/* Initialize a rename_info structure P for basic block BB, which starts a new
+   scan.  */
+static void
+init_rename_info (struct bb_rename_info *p, basic_block bb)
+{
+  int i;
+  df_ref *def_rec;
+  HARD_REG_SET start_chains_set;
 
-  memset (tick, 0, sizeof tick);
+  p->bb = bb;
+  bitmap_initialize (&p->open_chains_set, &bitmap_default_obstack);
+  bitmap_initialize (&p->incoming_open_chains_set, &bitmap_default_obstack);
 
-  gcc_obstack_init (&rename_obstack);
-  first_obj = XOBNEWVAR (&rename_obstack, char, 0);
+  open_chains = NULL;
+  bitmap_clear (&open_chains_set);
+
+  CLEAR_HARD_REG_SET (live_in_chains);
+  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
+  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
+    {
+      df_ref def = *def_rec;
+      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
+	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
+    }
+
+  CLEAR_HARD_REG_SET (start_chains_set);
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (iri->nregs > 0 && !iri->unusable
+	  && range_in_hard_reg_set_p (live_hard_regs, i, iri->nregs))
+	{
+	  SET_HARD_REG_BIT (start_chains_set, i);
+	  remove_range_from_hard_reg_set (&live_hard_regs, i, iri->nregs);
+	}
+    }
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (TEST_HARD_REG_BIT (start_chains_set, i))
+	{
+	  du_head_p chain;
+	  if (dump_file)
+	    fprintf (dump_file, "opening incoming chain\n");
+	  chain = create_new_chain (i, iri->nregs, NULL, NULL_RTX, NO_REGS);
+	  bitmap_set_bit (&p->incoming_open_chains_set, chain->id);
+	}
+    }
+}
+
+/* Record in RI that the block corresponding to it has an incoming
+   live value, described by CHAIN.  */
+static void
+set_incoming_from_chain (struct bb_rename_info *ri, du_head_p chain)
+{
+  int min_reg, max_reg, i;
+  int incoming_nregs = ri->incoming[chain->regno].nregs;
+  int nregs;
+
+  /* If we've recorded the same information before, everything is fine.  */
+  if (incoming_nregs == chain->nregs)
+    {
+      if (dump_file)
+	fprintf (dump_file, "reg %d/%d already recorded\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* If we have no information for any of the involved registers, update
+     the incoming array.  */
+  nregs = chain->nregs;
+  while (nregs-- > 0)
+    if (ri->incoming[chain->regno + nregs].nregs != 0
+	|| ri->incoming[chain->regno + nregs].unusable)
+      break;
+  if (nregs < 0)
+    {
+      nregs = chain->nregs;
+      ri->incoming[chain->regno].nregs = nregs;
+      while (nregs-- > 1)
+	ri->incoming[chain->regno + nregs].nregs = -nregs;
+      if (dump_file)
+	fprintf (dump_file, "recorded reg %d/%d\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* There must be some kind of conflict.  Set the unusable for all
+     overlapping registers.  */
+  min_reg = chain->regno;
+  if (incoming_nregs < 0)
+    min_reg += incoming_nregs;
+  max_reg = chain->regno + chain->nregs;
+  for (i = min_reg; i < max_reg; i++)
+    {
+      ri->incoming[i].unusable = true;
+    }
+}
+
+/* Merge the two chains C1 and C2 so that all conflict information is
+   recorded and C1, and the id of C2 is changed to that of C1.  */
+static void
+merge_chains (du_head_p c1, du_head_p c2)
+{
+  if (c1 == c2)
+    return;
+
+  if (c2->first != NULL)
+    {
+      if (c1->first == NULL)
+	c1->first = c2->first;
+      else
+	c1->last->next_use = c2->first;
+      c1->last = c2->last;
+    }
+
+  c2->first = c2->last = NULL;
+  c2->id = c1->id;
+
+  IOR_HARD_REG_SET (c1->hard_conflicts, c2->hard_conflicts);
+  bitmap_ior_into (&c1->conflicts, &c2->conflicts);
+
+  c1->need_caller_save_reg |= c2->need_caller_save_reg;
+  c1->cannot_rename |= c2->cannot_rename;
+}
 
+/* Analyze the current function and build chains for renaming.  */
+
+static void
+regrename_analyze (void)
+{
+  struct bb_rename_info *rename_info;
+  int i, ri_index, n_processed;
+  basic_block bb;
+  bitmap_head processed_bbs;
+  VEC (basic_block, heap) *bb_queue;
+
+  rename_info = XCNEWVEC (struct bb_rename_info, n_basic_blocks);
+  ri_index = 0;
   FOR_EACH_BB (bb)
     {
-      struct du_head *all_chains = 0;
+      struct bb_rename_info *ri = rename_info + ri_index;
+      ri->bb = bb;
+      ri->n_preds = EDGE_COUNT (bb->preds);
+      bb->aux = ri;
+      ri_index++;
+    }
 
-      id_to_chain = VEC_alloc (du_head_p, heap, 0);
+  current_id = 0;
+  id_to_chain = VEC_alloc (du_head_p, heap, 0);
+  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
+
+  bb_queue = VEC_alloc (basic_block, heap, 0);
+
+  n_processed = 0;
+  bitmap_initialize (&processed_bbs, &bitmap_default_obstack);
+  VEC_safe_push (basic_block, heap, bb_queue, single_succ (ENTRY_BLOCK_PTR));
+
+  while (!VEC_empty (basic_block, bb_queue))
+    {
+      bool success;
+      edge e;
+      edge_iterator ei;
+      basic_block bb1 = VEC_pop (basic_block, bb_queue);
+      struct bb_rename_info *this_info = (struct bb_rename_info *)bb1->aux;
+      int old_length = VEC_length (du_head_p, id_to_chain);
 
       if (dump_file)
-	fprintf (dump_file, "\nBasic block %d:\n", bb->index);
+	fprintf (dump_file, "\nprocessing block %d:\n", bb1->index);
+
+      bitmap_set_bit (&processed_bbs, bb1->index);
+      n_processed++;
+      init_rename_info (this_info, bb1);
 
-      all_chains = build_def_use (bb);
+      success = build_def_use (bb1);
+      if (success)
+	{
+	  if (dump_file)
+	    dump_def_use_chain (old_length);
+	  bitmap_copy (&this_info->open_chains_set, &open_chains_set);
+	}
+      else
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "failed\n");
+	  bb1->aux = NULL;
+	  VEC_truncate (du_head_p, id_to_chain, old_length);
+	  current_id = old_length;
+	  bitmap_clear (&this_info->incoming_open_chains_set);
+	  open_chains = NULL;
+	}
+
+      FOR_EACH_EDGE (e, ei, bb1->succs)
+	{
+	  struct bb_rename_info *dest_ri;
+	  struct du_head *chain;
+
+	  if (dump_file)
+	    fprintf (dump_file, "successor block %d\n", e->dest->index);
+
+	  if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+	    continue;
+	  dest_ri = (struct bb_rename_info *)e->dest->aux;
+	  if (dest_ri == NULL)
+	    continue;
+	  gcc_assert (dest_ri->n_preds > 0);
+	  dest_ri->n_preds--;
+	  if (dest_ri->n_preds == 0
+	      && !bitmap_bit_p (&processed_bbs, dest_ri->bb->index))
+	    VEC_safe_push (basic_block, heap, bb_queue, e->dest);
+	  if (success)
+	    for (chain = open_chains; chain; chain = chain->next_chain)
+	      set_incoming_from_chain (dest_ri, chain);
+	}
+
+      if (VEC_empty (basic_block, bb_queue)
+	  && n_processed < ri_index)
+	{
+	  basic_block best = NULL;
+	  FOR_EACH_BB (bb)
+	    {
+	      struct bb_rename_info *ri = (struct bb_rename_info *)bb->aux;
+	      if (!bitmap_bit_p (&processed_bbs, bb->index)
+		  && (best == 0
+		      || ri->n_preds != EDGE_COUNT (bb->preds)))
+		best = bb;
+	    }
+	  if (dump_file)
+	    fprintf (dump_file, "\npicking new start block %d\n", best->index);
+	  VEC_safe_push (basic_block, heap, bb_queue, best);
+	}
+    }
+  bitmap_clear (&processed_bbs);
+  VEC_free (basic_block, heap, bb_queue);
+
+  for (i = 0; i < ri_index; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri->bb->aux == NULL)
+	continue;
 
       if (dump_file)
-	dump_def_use_chain (all_chains);
+	fprintf (dump_file, "processing bb %d in edges\n", bb_ri->bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->incoming_open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_preds_used = 0, n_preds_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->preds)
+	    {
+	      struct bb_rename_info *src_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+	      bool success = false;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_out (e->src));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      n_preds_used++;
+
+	      if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+		continue;
 
-      rename_chains (all_chains);
+	      src_ri = (struct bb_rename_info *)e->src->aux;
+	      if (src_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&src_ri->open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *outgoing_chain = chain_from_id (k);
 
-      free_chain_data ();
-      obstack_free (&rename_obstack, first_obj);
+		  if (outgoing_chain->regno == chain->regno
+		      && outgoing_chain->nregs == chain->nregs)
+		    {
+		      n_preds_joined++;
+		      success = true;
+		      break;
+		    }
+		}
+	      if (!success && dump_file)
+		fprintf (dump_file, "failure to match with pred block %d\n",
+			 e->src->index);
+	    }
+	  if (n_preds_joined < n_preds_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n", j);
+	      chain->cannot_rename = 1;
+	    }
+	}
     }
+  for (i = 0; i < ri_index; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
 
-  obstack_free (&rename_obstack, NULL);
+      if (bb_ri->bb->aux == NULL)
+	continue;
 
-  if (dump_file)
-    fputc ('\n', dump_file);
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d out edges\n", bb_ri->bb->index);
 
-  return 0;
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_succs_used = 0, n_succs_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->succs)
+	    {
+	      bool printed = false;
+	      struct bb_rename_info *dest_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_in (e->dest));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      
+	      n_succs_used++;
+
+	      dest_ri = (struct bb_rename_info *)e->dest->aux;
+	      if (dest_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&dest_ri->incoming_open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *incoming_chain = chain_from_id (k);
+
+		  if (incoming_chain->regno == chain->regno
+		      && incoming_chain->nregs == chain->nregs)
+		    {
+		      if (dump_file)
+			{
+			  if (!printed)
+			    fprintf (dump_file, "merging blocks for edge %d -> %d\n",
+				     e->src->index, e->dest->index);
+			  printed = true;
+			  fprintf (dump_file, "  merging chains %d (->%d) and %d (->%d) [%s]\n",
+				   k, incoming_chain->id, j, chain->id, 
+				   reg_names[incoming_chain->regno]);
+			}
+
+		      merge_chains (chain, incoming_chain);
+		      n_succs_joined++;
+		      break;
+		    }
+		}
+	    }
+	  if (n_succs_joined < n_succs_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n",
+			 j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+
+  free (rename_info);
+
+  FOR_EACH_BB (bb)
+    bb->aux = NULL;
 }
 
 static void
@@ -470,8 +912,6 @@ do_replace (struct du_head *head, int re
   struct du_chain *chain;
   unsigned int base_regno = head->regno;
 
-  gcc_assert (! DEBUG_INSN_P (head->first->insn));
-
   for (chain = head->first; chain; chain = chain->next_use)
     {
       unsigned int regno = ORIGINAL_REGNO (*chain->loc);
@@ -494,19 +934,6 @@ do_replace (struct du_head *head, int re
 }
 
 
-/* Walk all chains starting with CHAINS and record that they conflict with
-   another chain whose id is ID.  */
-
-static void
-mark_conflict (struct du_head *chains, unsigned id)
-{
-  while (chains)
-    {
-      bitmap_set_bit (&chains->conflicts, id);
-      chains = chains->next_chain;
-    }
-}
-
 /* True if we found a register with a size mismatch, which means that we
    can't track its lifetime accurately.  If so, we abort the current block
    without renaming.  */
@@ -572,71 +999,6 @@ note_sets_clobbers (rtx x, const_rtx set
     add_to_hard_reg_set (&chain->hard_conflicts, GET_MODE (x), REGNO (x));
 }
 
-/* Create a new chain for THIS_NREGS registers starting at THIS_REGNO,
-   and record its occurrence in *LOC, which is being written to in INSN.
-   This access requires a register of class CL.  */
-
-static void
-create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
-		  rtx insn, enum reg_class cl)
-{
-  struct du_head *head = XOBNEW (&rename_obstack, struct du_head);
-  struct du_chain *this_du;
-  int nregs;
-
-  head->next_chain = open_chains;
-  open_chains = head;
-  head->regno = this_regno;
-  head->nregs = this_nregs;
-  head->need_caller_save_reg = 0;
-  head->cannot_rename = 0;
-
-  VEC_safe_push (du_head_p, heap, id_to_chain, head);
-  head->id = current_id++;
-
-  bitmap_initialize (&head->conflicts, &bitmap_default_obstack);
-  bitmap_copy (&head->conflicts, &open_chains_set);
-  mark_conflict (open_chains, head->id);
-
-  /* Since we're tracking this as a chain now, remove it from the
-     list of conflicting live hard registers and track it in
-     live_in_chains instead.  */
-  nregs = head->nregs;
-  while (nregs-- > 0)
-    {
-      SET_HARD_REG_BIT (live_in_chains, head->regno + nregs);
-      CLEAR_HARD_REG_BIT (live_hard_regs, head->regno + nregs);
-    }
-
-  COPY_HARD_REG_SET (head->hard_conflicts, live_hard_regs);
-  bitmap_set_bit (&open_chains_set, head->id);
-
-  open_chains = head;
-
-  if (dump_file)
-    {
-      fprintf (dump_file, "Creating chain %s (%d)",
-	       reg_names[head->regno], head->id);
-      if (insn != NULL_RTX)
-	fprintf (dump_file, " at insn %d", INSN_UID (insn));
-      fprintf (dump_file, "\n");
-    }
-
-  if (insn == NULL_RTX)
-    {
-      head->first = head->last = NULL;
-      return;
-    }
-
-  this_du = XOBNEW (&rename_obstack, struct du_chain);
-  head->first = head->last = this_du;
-
-  this_du->next_use = 0;
-  this_du->loc = loc;
-  this_du->insn = insn;
-  this_du->cl = cl;
-}
-
 static void
 scan_rtx_reg (rtx insn, rtx *loc, enum reg_class cl, enum scan_actions action,
 	      enum op_type type)
@@ -645,7 +1007,7 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
   rtx x = *loc;
   enum machine_mode mode = GET_MODE (x);
   unsigned this_regno = REGNO (x);
-  unsigned this_nregs = hard_regno_nregs[this_regno][mode];
+  int this_nregs = hard_regno_nregs[this_regno][mode];
 
   if (action == mark_write)
     {
@@ -745,8 +1107,6 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
 
 	  if (subset && !superset)
 	    head->cannot_rename = 1;
-	  head->next_chain = closed_chains;
-	  closed_chains = head;
 	  bitmap_clear_bit (&open_chains_set, head->id);
 
 	  nregs = head->nregs;
@@ -1132,28 +1492,14 @@ record_out_operands (rtx insn, bool earl
 
 /* Build def/use chain.  */
 
-static struct du_head *
+static bool
 build_def_use (basic_block bb)
 {
   rtx insn;
-  df_ref *def_rec;
   unsigned HOST_WIDE_INT untracked_operands;
 
-  open_chains = closed_chains = NULL;
-
   fail_current_block = false;
 
-  current_id = 0;
-  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
-  CLEAR_HARD_REG_SET (live_in_chains);
-  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
-  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
-	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
-    }
-
   for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
     {
       if (NONDEBUG_INSN_P (insn))
@@ -1392,14 +1738,32 @@ build_def_use (basic_block bb)
 	break;
     }
 
-  bitmap_clear (&open_chains_set);
-
   if (fail_current_block)
-    return NULL;
+    return false;
+
+  return true;
+}
+\f
+/* Perform register renaming on the current function.  */
+
+static unsigned int
+regrename_optimize (void)
+{
+  df_set_flags (DF_LR_RUN_DCE);
+  df_note_add_problem ();
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  gcc_obstack_init (&rename_obstack);
 
-  /* Since we close every chain when we find a REG_DEAD note, anything that
-     is still open lives past the basic block, so it can't be renamed.  */
-  return closed_chains;
+  regrename_analyze ();
+
+  rename_chains ();
+
+  free_chain_data ();
+  obstack_free (&rename_obstack, NULL);
+
+  return 0;
 }
 \f
 static bool
Index: trunk/gcc/regs.h
===================================================================
--- trunk.orig/gcc/regs.h
+++ trunk/gcc/regs.h
@@ -397,4 +397,48 @@ overlaps_hard_reg_set_p (const HARD_REG_
   return false;
 }
 
+/* Like add_to_hard_reg_set, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+
+static inline void
+add_range_to_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+			   int nregs)
+{
+  while (nregs-- > 0)
+    SET_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Likewise, but remove the registers.  */
+
+static inline void
+remove_range_from_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+				int nregs)
+{
+  while (nregs-- > 0)
+    CLEAR_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Like overlaps_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno,
+			       int nregs)
+{
+  while (nregs-- > 0)
+    if (TEST_HARD_REG_BIT (set, regno + nregs))
+      return true;
+  return false;
+}
+
+/* Like in_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs)
+{
+  while (nregs-- > 0)
+    if (!TEST_HARD_REG_BIT (set, regno + nregs))
+      return false;
+  return true;
+}
+
 #endif /* GCC_REGS_H */

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Ping: Rename across basic block boundaries
  2011-06-21 14:12 Rename across basic block boundaries Bernd Schmidt
@ 2011-08-23 10:56 ` Bernd Schmidt
  2011-08-24 12:50 ` Richard Sandiford
  1 sibling, 0 replies; 11+ messages in thread
From: Bernd Schmidt @ 2011-08-23 10:56 UTC (permalink / raw)
  To: GCC Patches

Ping for the patch at
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg01595.html

> This patch requires
>   http://gcc.gnu.org/ml/gcc-patches/2011-05/msg02193.html
> as a prerequisite, and supersedes
>   http://gcc.gnu.org/ml/gcc-patches/2011-05/msg02194.html
> 
> The idea here is to allow regrename to operate across basic block
> boundaries. This helps for targets that use sched_ebb (such as C6X), and
> by exposing more chains, we also help on targets that can benefit from
> PREFERRED_RENAME_CLASS (again, C6X).


Bernd

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Rename across basic block boundaries
  2011-06-21 14:12 Rename across basic block boundaries Bernd Schmidt
  2011-08-23 10:56 ` Ping: " Bernd Schmidt
@ 2011-08-24 12:50 ` Richard Sandiford
  2011-08-25 14:39   ` Bernd Schmidt
  2011-08-25 19:04   ` Bernd Schmidt
  1 sibling, 2 replies; 11+ messages in thread
From: Richard Sandiford @ 2011-08-24 12:50 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: GCC Patches

Sorry, I'm find this a bit tough to review.  Could you provide some
overview comments somewhere to say what the new algorithm is?
The comment at the head of regrename.c still describes the current
bb-local algorithm.

One thing though:

Bernd Schmidt <bernds@codesourcery.com> writes:
> @@ -215,8 +306,9 @@ merge_overlapping_regs (HARD_REG_SET *ps
>    IOR_HARD_REG_SET (*pset, head->hard_conflicts);
>    EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
>      {
> -      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
> +      du_head_p other = chain_from_id (i);
>        unsigned j = other->nregs;
> +      gcc_assert (other != head);
>        while (j-- > 0)
>  	SET_HARD_REG_BIT (*pset, other->regno + j);
>      }

Is this effectively cubic in the number of chains?  There are O(chains)
calls to merge_overlapping_regs, O(chains) nodes in the conflicts bitmap,
and chain_from_id chases O(chains) merges.

I realise this probably isn't a problem in practice.  Just looking
for reassurance. :-)

Richard

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Rename across basic block boundaries
  2011-08-24 12:50 ` Richard Sandiford
@ 2011-08-25 14:39   ` Bernd Schmidt
  2011-08-25 19:04   ` Bernd Schmidt
  1 sibling, 0 replies; 11+ messages in thread
From: Bernd Schmidt @ 2011-08-25 14:39 UTC (permalink / raw)
  To: GCC Patches, richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 298 bytes --]

On 08/24/11 13:12, Richard Sandiford wrote:
> Sorry, I'm find this a bit tough to review.  Could you provide some
> overview comments somewhere to say what the new algorithm is?

Will resubmit. To make the patch smaller next time, I've committed the
following as obvious (BSRT i686-linux).


Bernd

[-- Attachment #2: rr-move.diff --]
[-- Type: text/plain, Size: 8658 bytes --]

Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog	(revision 178062)
+++ gcc/ChangeLog	(working copy)
@@ -1,5 +1,12 @@
+2011-08-25  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* regrename.c (scan_rtx_reg, scan_rtx_address, build_def_use,
+	dump_def_use_chain): Don't declare.
+	(mark_conflict, create_new_chain): Move before users.
+	(regrename_optimize): Move to near end of file.
+
 2011-08-25  Georg-Johann Lay  <avr@gjlay.de>
-	
+
 	* config/avr/avr.c (STR_PREFIX_P): New Define.
 	(avr_asm_declare_function_name): Use it.
 	(avr_asm_named_section): Use it.
Index: gcc/regrename.c
===================================================================
--- gcc/regrename.c	(revision 178057)
+++ gcc/regrename.c	(working copy)
@@ -138,14 +138,8 @@ static int this_tick = 0;
 static struct obstack rename_obstack;
 
 static void do_replace (struct du_head *, int);
-static void scan_rtx_reg (rtx, rtx *, enum reg_class,
-			  enum scan_actions, enum op_type);
-static void scan_rtx_address (rtx, rtx *, enum reg_class,
-			      enum scan_actions, enum machine_mode);
 static void scan_rtx (rtx, rtx *, enum reg_class, enum scan_actions,
 		      enum op_type);
-static struct du_head *build_def_use (basic_block);
-static void dump_def_use_chain (struct du_head *);
 
 typedef struct du_head *du_head_p;
 DEF_VEC_P (du_head_p);
@@ -204,6 +198,84 @@ free_chain_data (void)
   VEC_free (du_head_p, heap, id_to_chain);
 }
 
+/* Walk all chains starting with CHAINS and record that they conflict with
+   another chain whose id is ID.  */
+
+static void
+mark_conflict (struct du_head *chains, unsigned id)
+{
+  while (chains)
+    {
+      bitmap_set_bit (&chains->conflicts, id);
+      chains = chains->next_chain;
+    }
+}
+
+/* Create a new chain for THIS_NREGS registers starting at THIS_REGNO,
+   and record its occurrence in *LOC, which is being written to in INSN.
+   This access requires a register of class CL.  */
+
+static void
+create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
+		  rtx insn, enum reg_class cl)
+{
+  struct du_head *head = XOBNEW (&rename_obstack, struct du_head);
+  struct du_chain *this_du;
+  int nregs;
+
+  head->next_chain = open_chains;
+  open_chains = head;
+  head->regno = this_regno;
+  head->nregs = this_nregs;
+  head->need_caller_save_reg = 0;
+  head->cannot_rename = 0;
+
+  VEC_safe_push (du_head_p, heap, id_to_chain, head);
+  head->id = current_id++;
+
+  bitmap_initialize (&head->conflicts, &bitmap_default_obstack);
+  bitmap_copy (&head->conflicts, &open_chains_set);
+  mark_conflict (open_chains, head->id);
+
+  /* Since we're tracking this as a chain now, remove it from the
+     list of conflicting live hard registers and track it in
+     live_in_chains instead.  */
+  nregs = head->nregs;
+  while (nregs-- > 0)
+    {
+      SET_HARD_REG_BIT (live_in_chains, head->regno + nregs);
+      CLEAR_HARD_REG_BIT (live_hard_regs, head->regno + nregs);
+    }
+
+  COPY_HARD_REG_SET (head->hard_conflicts, live_hard_regs);
+  bitmap_set_bit (&open_chains_set, head->id);
+
+  open_chains = head;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "Creating chain %s (%d)",
+	       reg_names[head->regno], head->id);
+      if (insn != NULL_RTX)
+	fprintf (dump_file, " at insn %d", INSN_UID (insn));
+      fprintf (dump_file, "\n");
+    }
+
+  if (insn == NULL_RTX)
+    {
+      head->first = head->last = NULL;
+      return;
+    }
+
+  this_du = XOBNEW (&rename_obstack, struct du_chain);
+  head->first = head->last = this_du;
+
+  this_du->next_use = 0;
+  this_du->loc = loc;
+  this_du->insn = insn;
+  this_du->cl = cl;
+}
+
 /* For a def-use chain HEAD, find which registers overlap its lifetime and
    set the corresponding bits in *PSET.  */
 
@@ -416,52 +488,6 @@ rename_chains (du_head_p all_chains)
     }
 }
 
-/* Perform register renaming on the current function.  */
-
-static unsigned int
-regrename_optimize (void)
-{
-  basic_block bb;
-  char *first_obj;
-
-  df_set_flags (DF_LR_RUN_DCE);
-  df_note_add_problem ();
-  df_analyze ();
-  df_set_flags (DF_DEFER_INSN_RESCAN);
-
-  memset (tick, 0, sizeof tick);
-
-  gcc_obstack_init (&rename_obstack);
-  first_obj = XOBNEWVAR (&rename_obstack, char, 0);
-
-  FOR_EACH_BB (bb)
-    {
-      struct du_head *all_chains = 0;
-
-      id_to_chain = VEC_alloc (du_head_p, heap, 0);
-
-      if (dump_file)
-	fprintf (dump_file, "\nBasic block %d:\n", bb->index);
-
-      all_chains = build_def_use (bb);
-
-      if (dump_file)
-	dump_def_use_chain (all_chains);
-
-      rename_chains (all_chains);
-
-      free_chain_data ();
-      obstack_free (&rename_obstack, first_obj);
-    }
-
-  obstack_free (&rename_obstack, NULL);
-
-  if (dump_file)
-    fputc ('\n', dump_file);
-
-  return 0;
-}
-
 static void
 do_replace (struct du_head *head, int reg)
 {
@@ -492,19 +518,6 @@ do_replace (struct du_head *head, int re
 }
 
 
-/* Walk all chains starting with CHAINS and record that they conflict with
-   another chain whose id is ID.  */
-
-static void
-mark_conflict (struct du_head *chains, unsigned id)
-{
-  while (chains)
-    {
-      bitmap_set_bit (&chains->conflicts, id);
-      chains = chains->next_chain;
-    }
-}
-
 /* True if we found a register with a size mismatch, which means that we
    can't track its lifetime accurately.  If so, we abort the current block
    without renaming.  */
@@ -570,71 +583,6 @@ note_sets_clobbers (rtx x, const_rtx set
     add_to_hard_reg_set (&chain->hard_conflicts, GET_MODE (x), REGNO (x));
 }
 
-/* Create a new chain for THIS_NREGS registers starting at THIS_REGNO,
-   and record its occurrence in *LOC, which is being written to in INSN.
-   This access requires a register of class CL.  */
-
-static void
-create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
-		  rtx insn, enum reg_class cl)
-{
-  struct du_head *head = XOBNEW (&rename_obstack, struct du_head);
-  struct du_chain *this_du;
-  int nregs;
-
-  head->next_chain = open_chains;
-  open_chains = head;
-  head->regno = this_regno;
-  head->nregs = this_nregs;
-  head->need_caller_save_reg = 0;
-  head->cannot_rename = 0;
-
-  VEC_safe_push (du_head_p, heap, id_to_chain, head);
-  head->id = current_id++;
-
-  bitmap_initialize (&head->conflicts, &bitmap_default_obstack);
-  bitmap_copy (&head->conflicts, &open_chains_set);
-  mark_conflict (open_chains, head->id);
-
-  /* Since we're tracking this as a chain now, remove it from the
-     list of conflicting live hard registers and track it in
-     live_in_chains instead.  */
-  nregs = head->nregs;
-  while (nregs-- > 0)
-    {
-      SET_HARD_REG_BIT (live_in_chains, head->regno + nregs);
-      CLEAR_HARD_REG_BIT (live_hard_regs, head->regno + nregs);
-    }
-
-  COPY_HARD_REG_SET (head->hard_conflicts, live_hard_regs);
-  bitmap_set_bit (&open_chains_set, head->id);
-
-  open_chains = head;
-
-  if (dump_file)
-    {
-      fprintf (dump_file, "Creating chain %s (%d)",
-	       reg_names[head->regno], head->id);
-      if (insn != NULL_RTX)
-	fprintf (dump_file, " at insn %d", INSN_UID (insn));
-      fprintf (dump_file, "\n");
-    }
-
-  if (insn == NULL_RTX)
-    {
-      head->first = head->last = NULL;
-      return;
-    }
-
-  this_du = XOBNEW (&rename_obstack, struct du_chain);
-  head->first = head->last = this_du;
-
-  this_du->next_use = 0;
-  this_du->loc = loc;
-  this_du->insn = insn;
-  this_du->cl = cl;
-}
-
 static void
 scan_rtx_reg (rtx insn, rtx *loc, enum reg_class cl, enum scan_actions action,
 	      enum op_type type)
@@ -1400,6 +1348,52 @@ build_def_use (basic_block bb)
   return closed_chains;
 }
 \f
+/* Perform register renaming on the current function.  */
+
+static unsigned int
+regrename_optimize (void)
+{
+  basic_block bb;
+  char *first_obj;
+
+  df_set_flags (DF_LR_RUN_DCE);
+  df_note_add_problem ();
+  df_analyze ();
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  memset (tick, 0, sizeof tick);
+
+  gcc_obstack_init (&rename_obstack);
+  first_obj = XOBNEWVAR (&rename_obstack, char, 0);
+
+  FOR_EACH_BB (bb)
+    {
+      struct du_head *all_chains = 0;
+
+      id_to_chain = VEC_alloc (du_head_p, heap, 0);
+
+      if (dump_file)
+	fprintf (dump_file, "\nBasic block %d:\n", bb->index);
+
+      all_chains = build_def_use (bb);
+
+      if (dump_file)
+	dump_def_use_chain (all_chains);
+
+      rename_chains (all_chains);
+
+      free_chain_data ();
+      obstack_free (&rename_obstack, first_obj);
+    }
+
+  obstack_free (&rename_obstack, NULL);
+
+  if (dump_file)
+    fputc ('\n', dump_file);
+
+  return 0;
+}
+\f
 static bool
 gate_handle_regrename (void)
 {

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Rename across basic block boundaries
  2011-08-24 12:50 ` Richard Sandiford
  2011-08-25 14:39   ` Bernd Schmidt
@ 2011-08-25 19:04   ` Bernd Schmidt
  2011-08-26 13:33     ` Richard Sandiford
  1 sibling, 1 reply; 11+ messages in thread
From: Bernd Schmidt @ 2011-08-25 19:04 UTC (permalink / raw)
  To: GCC Patches, richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 1410 bytes --]

On 08/24/11 13:12, Richard Sandiford wrote:
> Sorry, I'm find this a bit tough to review.  Could you provide some
> overview comments somewhere to say what the new algorithm is?
> The comment at the head of regrename.c still describes the current
> bb-local algorithm.

New patch below, with extra comments.  Let me know if more are needed.

> One thing though:
> 
> Bernd Schmidt <bernds@codesourcery.com> writes:
>> @@ -215,8 +306,9 @@ merge_overlapping_regs (HARD_REG_SET *ps
>>    IOR_HARD_REG_SET (*pset, head->hard_conflicts);
>>    EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
>>      {
>> -      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
>> +      du_head_p other = chain_from_id (i);
>>        unsigned j = other->nregs;
>> +      gcc_assert (other != head);
>>        while (j-- > 0)
>>  	SET_HARD_REG_BIT (*pset, other->regno + j);
>>      }
> 
> Is this effectively cubic in the number of chains?  There are O(chains)
> calls to merge_overlapping_regs, O(chains) nodes in the conflicts bitmap,
> and chain_from_id chases O(chains) merges.

I've made chain_from_id store its final result in the original chain, so
it'll take O(chains) only once per chain.

Bootstrapped and tested on i686-linux (with rr enabled at O2). I've also
redone performance tests with a popular embedded benchmark on C6X; some
fluctuations around +/-5%, and 20% improvement on one benchmark.


Bernd

[-- Attachment #2: rrcfg2.diff --]
[-- Type: text/plain, Size: 26128 bytes --]

	* regrename.c (struct du_head): Make nregs signed.
	(scan_rtx_reg, scan_rtx_address, dump_def_use_chain): Remove
	declarations.
	(closed_chains): Remove.
	(chain_from_id): New static function.
	(dump_def_use_chain): Change argument to be an int, indicating
	the first ID to print.  All callers changed.
	(mark_conflict, create_new_chain): Move upwards in the file.
	(merge_overlapping_regs): Use chain_from_id.  Assert that
	chains don't conflict with themselves.
	(rename_chains): Take no argument.  Iterate over id_to_chain
	rather to find chains to rename.  Clear tick before the main
	loop.
	(struct incoming_reg_info): New struct.
	(struct bb_rename_info): New struct.
	(init_rename_info, set_incoming_from_chain, merge_chains): New
	static functions.
	(regrename_analyze): New static function, broken out of
	regrename_optimize.  Record and make use of open chain information
	at basic block boundaries, and merge chains where possible.
	(scan_rtx_reg): Make this_nregs signed.  Don't update
	closed_chains.
	(build_def_use): Return a bool to indicate success.  All callers
	changed.  Don't initialize global data here.
	(regrename_optimize): Move most code out of here into
	regrename_analyze.
	* regs.h (add_range_to_hard_reg_set, remove_range_from_hard_reg_set,
	range_overlaps_hard_reg_set_p, range_in_hard_reg_set_p): New
	static inline functions.

Index: regrename.c
===================================================================
--- regrename.c	(revision 178065)
+++ regrename.c	(working copy)
@@ -47,18 +47,24 @@
 
      1. Local def/use chains are built: within each basic block, chains are
 	opened and closed; if a chain isn't closed at the end of the block,
-	it is dropped.
+	it is dropped.  We pre-open chains if we have already examined a
+	predecessor block and found chains live at the end which match
+	live registers at the start of the new block.
 
-     2. For each chain, the set of possible renaming registers is computed.
+     2. We try combine the local chains across basic block boundaries by
+        comparing chains that were open at the start or end of a block to
+	those in successor/predecessor blocks.
+
+     3. For each chain, the set of possible renaming registers is computed.
 	This takes into account the renaming of previously processed chains.
 	Optionally, a preferred class is computed for the renaming register.
 
-     3. The best renaming register is computed for the chain in the above set,
+     4. The best renaming register is computed for the chain in the above set,
 	using a round-robin allocation.  If a preferred class exists, then the
 	round-robin allocation is done within the class first, if possible.
 	The round-robin allocation of renaming registers itself is global.
 
-     4. If a renaming register has been found, it is substituted in the chain.
+     5. If a renaming register has been found, it is substituted in the chain.
 
   Targets can parameterize the pass by specifying a preferred class for the
   renaming register for a given (super)class of registers to be renamed.  */
@@ -75,8 +81,9 @@ struct du_head
   struct du_head *next_chain;
   /* The first and last elements of this chain.  */
   struct du_chain *first, *last;
-  /* Describes the register being tracked.  */
-  unsigned regno, nregs;
+  /* Describe the register being tracked, register number and count.  */
+  unsigned regno;
+  int nregs;
 
   /* A unique id to be used as an index into the conflicts bitmaps.  */
   unsigned id;
@@ -140,6 +147,7 @@ static struct obstack rename_obstack;
 static void do_replace (struct du_head *, int);
 static void scan_rtx (rtx, rtx *, enum reg_class, enum scan_actions,
 		      enum op_type);
+static bool build_def_use (basic_block);
 
 typedef struct du_head *du_head_p;
 DEF_VEC_P (du_head_p);
@@ -151,9 +159,8 @@ static unsigned current_id;
 /* A mapping of unique id numbers to chains.  */
 static VEC(du_head_p, heap) *id_to_chain;
 
-/* List of currently open chains, and closed chains that can be renamed.  */
+/* List of currently open chains.  */
 static struct du_head *open_chains;
-static struct du_head *closed_chains;
 
 /* Bitmap of open chains.  The bits set always match the list found in
    open_chains.  */
@@ -166,14 +173,34 @@ static HARD_REG_SET live_in_chains;
    between this and live_in_chains is empty.  */
 static HARD_REG_SET live_hard_regs;
 
-/* Dump all def/use chains in CHAINS to DUMP_FILE.  */
+/* Return the chain corresponding to id number ID.  Take into account that
+   chains may have been merged.  */
+static du_head_p
+chain_from_id (unsigned int id)
+{
+  du_head_p first_chain = VEC_index (du_head_p, id_to_chain, id);
+  du_head_p chain = first_chain;
+  while (chain->id != id)
+    {
+      id = chain->id;
+      chain = VEC_index (du_head_p, id_to_chain, id);
+    }
+  first_chain->id = id;
+  return chain;
+}
+
+/* Dump all def/use chains, starting at id FROM.  */
 
 static void
-dump_def_use_chain (struct du_head *head)
+dump_def_use_chain (int from)
 {
-  while (head)
+  du_head_p head;
+  int i;
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, head)
     {
       struct du_chain *this_du = head->first;
+      if (i < from)
+	continue;
       fprintf (dump_file, "Register %s (%d):",
 	       reg_names[head->regno], head->nregs);
       while (this_du)
@@ -215,7 +242,7 @@ mark_conflict (struct du_head *chains, u
    and record its occurrence in *LOC, which is being written to in INSN.
    This access requires a register of class CL.  */
 
-static void
+static du_head_p
 create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
 		  rtx insn, enum reg_class cl)
 {
@@ -224,7 +251,6 @@ create_new_chain (unsigned this_regno, u
   int nregs;
 
   head->next_chain = open_chains;
-  open_chains = head;
   head->regno = this_regno;
   head->nregs = this_nregs;
   head->need_caller_save_reg = 0;
@@ -264,7 +290,7 @@ create_new_chain (unsigned this_regno, u
   if (insn == NULL_RTX)
     {
       head->first = head->last = NULL;
-      return;
+      return head;
     }
 
   this_du = XOBNEW (&rename_obstack, struct du_chain);
@@ -274,6 +300,7 @@ create_new_chain (unsigned this_regno, u
   this_du->loc = loc;
   this_du->insn = insn;
   this_du->cl = cl;
+  return head;
 }
 
 /* For a def-use chain HEAD, find which registers overlap its lifetime and
@@ -287,8 +314,9 @@ merge_overlapping_regs (HARD_REG_SET *ps
   IOR_HARD_REG_SET (*pset, head->hard_conflicts);
   EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
     {
-      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
+      du_head_p other = chain_from_id (i);
       unsigned j = other->nregs;
+      gcc_assert (other != head);
       while (j-- > 0)
 	SET_HARD_REG_BIT (*pset, other->regno + j);
     }
@@ -341,12 +369,15 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSE
   return true;
 }
 
-/* Process the closed chains starting with ALL_CHAINS and rename
-   registers if possible.  */
+/* Perform register renaming on the current function.  */
 static void
-rename_chains (du_head_p all_chains)
+rename_chains (void)
 {
   HARD_REG_SET unavailable;
+  du_head_p this_head;
+  int i;
+
+  memset (tick, 0, sizeof tick);
 
   CLEAR_HARD_REG_SET (unavailable);
   /* Don't clobber traceback for noreturn functions.  */
@@ -358,11 +389,10 @@ rename_chains (du_head_p all_chains)
 #endif
     }
 
-  while (all_chains)
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, this_head)
     {
       int new_reg, best_new_reg, best_nregs;
       int n_uses;
-      struct du_head *this_head = all_chains;
       struct du_chain *tmp;
       HARD_REG_SET this_unavailable;
       int reg = this_head->regno;
@@ -371,8 +401,6 @@ rename_chains (du_head_p all_chains)
       enum reg_class preferred_class;
       bool has_preferred_class;
 
-      all_chains = this_head->next_chain;
-
       if (this_head->cannot_rename)
 	continue;
 
@@ -488,14 +516,453 @@ rename_chains (du_head_p all_chains)
     }
 }
 
+/* A structure to record information for each hard register at the start of
+   a basic block.  */
+struct incoming_reg_info {
+  /* Holds the number of registers used in the chain that gave us information
+     about this register.  Zero means no information known yet, while a
+     negative value is used for something that is part of, but not the first
+     register in a multi-register value.  */
+  int nregs;
+  /* Set to true if we have accesses that conflict in the number of registers
+     used.  */
+  bool unusable;
+};
+
+/* A structure recording information about each basic block.  It is saved
+   and restored around basic block boundaries.  */
+struct bb_rename_info
+{
+  /* The basic block corresponding to this structure.  */
+  basic_block bb;
+  /* Copies of the global information.  */
+  bitmap_head open_chains_set;
+  bitmap_head incoming_open_chains_set;
+  unsigned n_preds;
+  struct incoming_reg_info incoming[FIRST_PSEUDO_REGISTER];
+};
+
+/* Initialize a rename_info structure P for basic block BB, which starts a new
+   scan.  */
+static void
+init_rename_info (struct bb_rename_info *p, basic_block bb)
+{
+  int i;
+  df_ref *def_rec;
+  HARD_REG_SET start_chains_set;
+
+  p->bb = bb;
+  bitmap_initialize (&p->open_chains_set, &bitmap_default_obstack);
+  bitmap_initialize (&p->incoming_open_chains_set, &bitmap_default_obstack);
+
+  open_chains = NULL;
+  bitmap_clear (&open_chains_set);
+
+  CLEAR_HARD_REG_SET (live_in_chains);
+  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
+  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
+    {
+      df_ref def = *def_rec;
+      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
+	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
+    }
+
+  /* Open chains based on information from (at least one) predecessor
+     block.  This gives us a chance later on to combine chains across
+     basic block boundaries.  Inconsistencies (in access sizes) will
+     be caught normally and dealt with conservatively by disabling the
+     chain for renaming, and there is no risk of losing optimization
+     opportunities by opening chains either: if we did not open the
+     chains, we'd have to track the live register as a hard reg, and
+     we'd be unable to rename it in any case.  */
+  CLEAR_HARD_REG_SET (start_chains_set);
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (iri->nregs > 0 && !iri->unusable
+	  && range_in_hard_reg_set_p (live_hard_regs, i, iri->nregs))
+	{
+	  SET_HARD_REG_BIT (start_chains_set, i);
+	  remove_range_from_hard_reg_set (&live_hard_regs, i, iri->nregs);
+	}
+    }
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (TEST_HARD_REG_BIT (start_chains_set, i))
+	{
+	  du_head_p chain;
+	  if (dump_file)
+	    fprintf (dump_file, "opening incoming chain\n");
+	  chain = create_new_chain (i, iri->nregs, NULL, NULL_RTX, NO_REGS);
+	  bitmap_set_bit (&p->incoming_open_chains_set, chain->id);
+	}
+    }
+}
+
+/* Record in RI that the block corresponding to it has an incoming
+   live value, described by CHAIN.  */
+static void
+set_incoming_from_chain (struct bb_rename_info *ri, du_head_p chain)
+{
+  int min_reg, max_reg, i;
+  int incoming_nregs = ri->incoming[chain->regno].nregs;
+  int nregs;
+
+  /* If we've recorded the same information before, everything is fine.  */
+  if (incoming_nregs == chain->nregs)
+    {
+      if (dump_file)
+	fprintf (dump_file, "reg %d/%d already recorded\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* If we have no information for any of the involved registers, update
+     the incoming array.  */
+  nregs = chain->nregs;
+  while (nregs-- > 0)
+    if (ri->incoming[chain->regno + nregs].nregs != 0
+	|| ri->incoming[chain->regno + nregs].unusable)
+      break;
+  if (nregs < 0)
+    {
+      nregs = chain->nregs;
+      ri->incoming[chain->regno].nregs = nregs;
+      while (nregs-- > 1)
+	ri->incoming[chain->regno + nregs].nregs = -nregs;
+      if (dump_file)
+	fprintf (dump_file, "recorded reg %d/%d\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* There must be some kind of conflict.  Set the unusable for all
+     overlapping registers.  */
+  min_reg = chain->regno;
+  if (incoming_nregs < 0)
+    min_reg += incoming_nregs;
+  max_reg = chain->regno + chain->nregs;
+  for (i = min_reg; i < max_reg; i++)
+    ri->incoming[i].unusable = true;
+}
+
+/* Merge the two chains C1 and C2 so that all conflict information is
+   recorded and C1, and the id of C2 is changed to that of C1.  */
+static void
+merge_chains (du_head_p c1, du_head_p c2)
+{
+  if (c1 == c2)
+    return;
+
+  if (c2->first != NULL)
+    {
+      if (c1->first == NULL)
+	c1->first = c2->first;
+      else
+	c1->last->next_use = c2->first;
+      c1->last = c2->last;
+    }
+
+  c2->first = c2->last = NULL;
+  c2->id = c1->id;
+
+  IOR_HARD_REG_SET (c1->hard_conflicts, c2->hard_conflicts);
+  bitmap_ior_into (&c1->conflicts, &c2->conflicts);
+
+  c1->need_caller_save_reg |= c2->need_caller_save_reg;
+  c1->cannot_rename |= c2->cannot_rename;
+}
+
+/* Analyze the current function and build chains for renaming.  */
+
+static void
+regrename_analyze (void)
+{
+  struct bb_rename_info *rename_info;
+  int i, ri_index, n_processed;
+  basic_block bb;
+  bitmap_head processed_bbs;
+  VEC (basic_block, heap) *bb_queue;
+
+  /* Gather some information about the blocks in this function.  */
+  rename_info = XCNEWVEC (struct bb_rename_info, n_basic_blocks);
+  ri_index = 0;
+  FOR_EACH_BB (bb)
+    {
+      struct bb_rename_info *ri = rename_info + ri_index;
+      ri->bb = bb;
+      ri->n_preds = EDGE_COUNT (bb->preds);
+      bb->aux = ri;
+      ri_index++;
+    }
+
+  current_id = 0;
+  id_to_chain = VEC_alloc (du_head_p, heap, 0);
+  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
+
+  /* Process all basic blocks by using a worklist, adding unvisited successor
+     blocks whenever we reach the end of one basic blocks.  This ensures that
+     whenever possible, we only process a block after at least one of its
+     predecessors, which provides a "seeding" effect to make the logic in
+     set_incoming_from_chain and init_rename_info useful.  */
+
+  bb_queue = VEC_alloc (basic_block, heap, 0);
+
+  n_processed = 0;
+  bitmap_initialize (&processed_bbs, &bitmap_default_obstack);
+
+  VEC_safe_push (basic_block, heap, bb_queue, single_succ (ENTRY_BLOCK_PTR));
+
+  while (!VEC_empty (basic_block, bb_queue))
+    {
+      bool success;
+      edge e;
+      edge_iterator ei;
+      basic_block bb1 = VEC_pop (basic_block, bb_queue);
+      struct bb_rename_info *this_info = (struct bb_rename_info *)bb1->aux;
+      int old_length = VEC_length (du_head_p, id_to_chain);
+
+      if (dump_file)
+	fprintf (dump_file, "\nprocessing block %d:\n", bb1->index);
+
+      bitmap_set_bit (&processed_bbs, bb1->index);
+      n_processed++;
+      init_rename_info (this_info, bb1);
+
+      success = build_def_use (bb1);
+      if (success)
+	{
+	  if (dump_file)
+	    dump_def_use_chain (old_length);
+	  bitmap_copy (&this_info->open_chains_set, &open_chains_set);
+	}
+      else
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "failed\n");
+	  bb1->aux = NULL;
+	  VEC_truncate (du_head_p, id_to_chain, old_length);
+	  current_id = old_length;
+	  bitmap_clear (&this_info->incoming_open_chains_set);
+	  open_chains = NULL;
+	}
+
+      /* Add successor blocks to the worklist if necessary, and record
+	 data about our own open chains at the end of this block, which
+	 will be used to pre-open chains when processing the successors.  */
+      FOR_EACH_EDGE (e, ei, bb1->succs)
+	{
+	  struct bb_rename_info *dest_ri;
+	  struct du_head *chain;
+
+	  if (dump_file)
+	    fprintf (dump_file, "successor block %d\n", e->dest->index);
+
+	  if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+	    continue;
+	  dest_ri = (struct bb_rename_info *)e->dest->aux;
+	  if (dest_ri == NULL)
+	    continue;
+	  gcc_assert (dest_ri->n_preds > 0);
+	  dest_ri->n_preds--;
+	  if (dest_ri->n_preds == 0
+	      && !bitmap_bit_p (&processed_bbs, dest_ri->bb->index))
+	    VEC_safe_push (basic_block, heap, bb_queue, e->dest);
+	  if (success)
+	    for (chain = open_chains; chain; chain = chain->next_chain)
+	      set_incoming_from_chain (dest_ri, chain);
+	}
+
+      if (VEC_empty (basic_block, bb_queue)
+	  && n_processed < ri_index)
+	{
+	  basic_block best = NULL;
+	  FOR_EACH_BB (bb)
+	    {
+	      struct bb_rename_info *ri = (struct bb_rename_info *)bb->aux;
+	      if (!bitmap_bit_p (&processed_bbs, bb->index)
+		  && (best == 0
+		      || ri->n_preds != EDGE_COUNT (bb->preds)))
+		best = bb;
+	    }
+	  if (dump_file)
+	    fprintf (dump_file, "\npicking new start block %d\n", best->index);
+	  VEC_safe_push (basic_block, heap, bb_queue, best);
+	}
+    }
+  bitmap_clear (&processed_bbs);
+  VEC_free (basic_block, heap, bb_queue);
+
+  /* Now, combine the chains data we have gathered across basic block
+     boundaries.
+
+     For every basic block, there may be chains open at the start, or at the
+     end.  Rather than exclude them from renaming, we look for open chains
+     with matching registers at the other side of the CFG edge.
+
+     For a given chain using register R, open at the start of block B, we
+     must find an open chain using R on the other side of every edge leading
+     to B, if the register is live across this edge.  In the code below,
+     N_PREDS_USED counts the number of edges where the register is live, and
+     N_PREDS_JOINED counts those where we found an appropriate chain for
+     joining.
+
+     We perform the analysis for both incoming and outgoing edges, but we
+     only need to merge once (in the second part, after verifying outgoing
+     edges).  */
+  for (i = 0; i < ri_index; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri->bb->aux == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d in edges\n", bb_ri->bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->incoming_open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_preds_used = 0, n_preds_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->preds)
+	    {
+	      struct bb_rename_info *src_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+	      bool success = false;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_out (e->src));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      n_preds_used++;
+
+	      if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+		continue;
+
+	      src_ri = (struct bb_rename_info *)e->src->aux;
+	      if (src_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&src_ri->open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *outgoing_chain = chain_from_id (k);
+
+		  if (outgoing_chain->regno == chain->regno
+		      && outgoing_chain->nregs == chain->nregs)
+		    {
+		      n_preds_joined++;
+		      success = true;
+		      break;
+		    }
+		}
+	      if (!success && dump_file)
+		fprintf (dump_file, "failure to match with pred block %d\n",
+			 e->src->index);
+	    }
+	  if (n_preds_joined < n_preds_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n", j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+  for (i = 0; i < ri_index; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri->bb->aux == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d out edges\n", bb_ri->bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_succs_used = 0, n_succs_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->succs)
+	    {
+	      bool printed = false;
+	      struct bb_rename_info *dest_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_in (e->dest));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      
+	      n_succs_used++;
+
+	      dest_ri = (struct bb_rename_info *)e->dest->aux;
+	      if (dest_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&dest_ri->incoming_open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *incoming_chain = chain_from_id (k);
+
+		  if (incoming_chain->regno == chain->regno
+		      && incoming_chain->nregs == chain->nregs)
+		    {
+		      if (dump_file)
+			{
+			  if (!printed)
+			    fprintf (dump_file,
+				     "merging blocks for edge %d -> %d\n",
+				     e->src->index, e->dest->index);
+			  printed = true;
+			  fprintf (dump_file,
+				   "  merging chains %d (->%d) and %d (->%d) [%s]\n",
+				   k, incoming_chain->id, j, chain->id, 
+				   reg_names[incoming_chain->regno]);
+			}
+
+		      merge_chains (chain, incoming_chain);
+		      n_succs_joined++;
+		      break;
+		    }
+		}
+	    }
+	  if (n_succs_joined < n_succs_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n",
+			 j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+
+  free (rename_info);
+
+  FOR_EACH_BB (bb)
+    bb->aux = NULL;
+}
+
 static void
 do_replace (struct du_head *head, int reg)
 {
   struct du_chain *chain;
   unsigned int base_regno = head->regno;
 
-  gcc_assert (! DEBUG_INSN_P (head->first->insn));
-
   for (chain = head->first; chain; chain = chain->next_use)
     {
       unsigned int regno = ORIGINAL_REGNO (*chain->loc);
@@ -591,7 +1058,7 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
   rtx x = *loc;
   enum machine_mode mode = GET_MODE (x);
   unsigned this_regno = REGNO (x);
-  unsigned this_nregs = hard_regno_nregs[this_regno][mode];
+  int this_nregs = hard_regno_nregs[this_regno][mode];
 
   if (action == mark_write)
     {
@@ -691,8 +1158,6 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
 
 	  if (subset && !superset)
 	    head->cannot_rename = 1;
-	  head->next_chain = closed_chains;
-	  closed_chains = head;
 	  bitmap_clear_bit (&open_chains_set, head->id);
 
 	  nregs = head->nregs;
@@ -1078,28 +1543,14 @@ record_out_operands (rtx insn, bool earl
 
 /* Build def/use chain.  */
 
-static struct du_head *
+static bool
 build_def_use (basic_block bb)
 {
   rtx insn;
-  df_ref *def_rec;
   unsigned HOST_WIDE_INT untracked_operands;
 
-  open_chains = closed_chains = NULL;
-
   fail_current_block = false;
 
-  current_id = 0;
-  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
-  CLEAR_HARD_REG_SET (live_in_chains);
-  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
-  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
-	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
-    }
-
   for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
     {
       if (NONDEBUG_INSN_P (insn))
@@ -1338,14 +1789,10 @@ build_def_use (basic_block bb)
 	break;
     }
 
-  bitmap_clear (&open_chains_set);
-
   if (fail_current_block)
-    return NULL;
+    return false;
 
-  /* Since we close every chain when we find a REG_DEAD note, anything that
-     is still open lives past the basic block, so it can't be renamed.  */
-  return closed_chains;
+  return true;
 }
 \f
 /* Perform register renaming on the current function.  */
@@ -1353,44 +1800,20 @@ build_def_use (basic_block bb)
 static unsigned int
 regrename_optimize (void)
 {
-  basic_block bb;
-  char *first_obj;
-
   df_set_flags (DF_LR_RUN_DCE);
   df_note_add_problem ();
   df_analyze ();
   df_set_flags (DF_DEFER_INSN_RESCAN);
 
-  memset (tick, 0, sizeof tick);
-
   gcc_obstack_init (&rename_obstack);
-  first_obj = XOBNEWVAR (&rename_obstack, char, 0);
 
-  FOR_EACH_BB (bb)
-    {
-      struct du_head *all_chains = 0;
-
-      id_to_chain = VEC_alloc (du_head_p, heap, 0);
-
-      if (dump_file)
-	fprintf (dump_file, "\nBasic block %d:\n", bb->index);
-
-      all_chains = build_def_use (bb);
-
-      if (dump_file)
-	dump_def_use_chain (all_chains);
-
-      rename_chains (all_chains);
+  regrename_analyze ();
 
-      free_chain_data ();
-      obstack_free (&rename_obstack, first_obj);
-    }
+  rename_chains ();
 
+  free_chain_data ();
   obstack_free (&rename_obstack, NULL);
 
-  if (dump_file)
-    fputc ('\n', dump_file);
-
   return 0;
 }
 \f
Index: regs.h
===================================================================
--- regs.h	(revision 178030)
+++ regs.h	(working copy)
@@ -397,4 +397,48 @@ overlaps_hard_reg_set_p (const HARD_REG_
   return false;
 }
 
+/* Like add_to_hard_reg_set, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+
+static inline void
+add_range_to_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+			   int nregs)
+{
+  while (nregs-- > 0)
+    SET_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Likewise, but remove the registers.  */
+
+static inline void
+remove_range_from_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+				int nregs)
+{
+  while (nregs-- > 0)
+    CLEAR_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Like overlaps_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno,
+			       int nregs)
+{
+  while (nregs-- > 0)
+    if (TEST_HARD_REG_BIT (set, regno + nregs))
+      return true;
+  return false;
+}
+
+/* Like in_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs)
+{
+  while (nregs-- > 0)
+    if (!TEST_HARD_REG_BIT (set, regno + nregs))
+      return false;
+  return true;
+}
+
 #endif /* GCC_REGS_H */

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Rename across basic block boundaries
  2011-08-25 19:04   ` Bernd Schmidt
@ 2011-08-26 13:33     ` Richard Sandiford
  2011-09-01 13:02       ` Bernd Schmidt
  0 siblings, 1 reply; 11+ messages in thread
From: Richard Sandiford @ 2011-08-26 13:33 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: GCC Patches

Rather than using global variables and then copying them into a bb
structure, would it be possible to write directly into the bb structure?
The answer's probably "no", just asking. :-)

Bernd Schmidt <bernds@codesourcery.com> writes:
> 	* regrename.c (struct du_head): Make nregs signed.
> 	(scan_rtx_reg, scan_rtx_address, dump_def_use_chain): Remove
> 	declarations.

This bit was split out.

> 	(mark_conflict, create_new_chain): Move upwards in the file.

Same here.  Should mention the change to create_new_chain's interface
though.

> -     2. For each chain, the set of possible renaming registers is computed.
> +     2. We try combine the local chains across basic block boundaries by
> +        comparing chains that were open at the start or end of a block to
> +	those in successor/predecessor blocks.

"try to combine"

> +/* Dump all def/use chains, starting at id FROM.  */
>  
>  static void
> -dump_def_use_chain (struct du_head *head)
> +dump_def_use_chain (int from)
>  {
> -  while (head)
> +  du_head_p head;
> +  int i;
> +  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, head)
>      {
>        struct du_chain *this_du = head->first;
> +      if (i < from)
> +	continue;
>        fprintf (dump_file, "Register %s (%d):",
>  	       reg_names[head->regno], head->nregs);
>        while (this_du)

I know it's only a dumping function, but maybe this'd be a good excuse
to add:

#define FOR_EACH_VEC_ELT_FROM(T, V, I, P, FROM)		\
  for (I = (FROM); VEC_iterate (T, (V), (I), (P)); ++(I))

> +/* A structure recording information about each basic block.  It is saved
> +   and restored around basic block boundaries.  */
> +struct bb_rename_info

Probably worth saying here or elsewhere that the bb->aux field points
to this information and that (more importantly) the bb->aux is null
for blocks that could not be optimised, including the exit block.

> +/* Record in RI that the block corresponding to it has an incoming
> +   live value, described by CHAIN.  */
> +static void
> +set_incoming_from_chain (struct bb_rename_info *ri, du_head_p chain)
> +{
> +  int min_reg, max_reg, i;
> +  int incoming_nregs = ri->incoming[chain->regno].nregs;
> +  int nregs;
> +
> +  /* If we've recorded the same information before, everything is fine.  */
> +  if (incoming_nregs == chain->nregs)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "reg %d/%d already recorded\n",
> +		 chain->regno, chain->nregs);
> +      return;
> +    }
> +
> +  /* If we have no information for any of the involved registers, update
> +     the incoming array.  */
> +  nregs = chain->nregs;
> +  while (nregs-- > 0)
> +    if (ri->incoming[chain->regno + nregs].nregs != 0
> +	|| ri->incoming[chain->regno + nregs].unusable)
> +      break;
> +  if (nregs < 0)
> +    {
> +      nregs = chain->nregs;
> +      ri->incoming[chain->regno].nregs = nregs;
> +      while (nregs-- > 1)
> +	ri->incoming[chain->regno + nregs].nregs = -nregs;
> +      if (dump_file)
> +	fprintf (dump_file, "recorded reg %d/%d\n",
> +		 chain->regno, chain->nregs);
> +      return;
> +    }
> +
> +  /* There must be some kind of conflict.  Set the unusable for all
> +     overlapping registers.  */
> +  min_reg = chain->regno;
> +  if (incoming_nregs < 0)
> +    min_reg += incoming_nregs;
> +  max_reg = chain->regno + chain->nregs;
> +  for (i = min_reg; i < max_reg; i++)
> +    ri->incoming[i].unusable = true;

In the incoming_nregs < 0 case, we only need to set
ri->incoming[chain->regno + incoming_nregs] itself, right,
not the other registers between that and ri->incoming[chain->regno]?
If so, I think it'd be clearer to have:

  /* There must be some kind of conflict.  Prevent both the old and
     new ranges from being used.  */
  if (incoming_nregs < 0)
    ri->incoming[chain->regno + incoming_nregs].unusable = true;
  for (i = 0; i < chain->nregs; i++)
    ri->incoming[chain->regno + i].unusable = true;

When I first looked at the code, I was wondering why we changed every
register in (chain->regno + incoming_nregs, chain_regno), but none in
[chain->regno + chain->nregs, OLD_END).  Seems like we should do neither
(as in the above suggestion) or both.

> +  /* Process all basic blocks by using a worklist, adding unvisited successor
> +     blocks whenever we reach the end of one basic blocks.  This ensures that
> +     whenever possible, we only process a block after at least one of its
> +     predecessors, which provides a "seeding" effect to make the logic in
> +     set_incoming_from_chain and init_rename_info useful.  */

Wouldn't a reverse post-order (inverted_post_order_compute) allow even
more pre-opening (as well as being less code)?

Looked good to me otherwise.

Richard

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Rename across basic block boundaries
  2011-08-26 13:33     ` Richard Sandiford
@ 2011-09-01 13:02       ` Bernd Schmidt
  2011-09-01 14:16         ` Richard Sandiford
  0 siblings, 1 reply; 11+ messages in thread
From: Bernd Schmidt @ 2011-09-01 13:02 UTC (permalink / raw)
  To: GCC Patches, richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 2095 bytes --]

On 08/26/11 14:57, Richard Sandiford wrote:
>> +  /* There must be some kind of conflict.  Set the unusable for all
>> +     overlapping registers.  */
>> +  min_reg = chain->regno;
>> +  if (incoming_nregs < 0)
>> +    min_reg += incoming_nregs;
>> +  max_reg = chain->regno + chain->nregs;
>> +  for (i = min_reg; i < max_reg; i++)
>> +    ri->incoming[i].unusable = true;
> 
> In the incoming_nregs < 0 case, we only need to set
> ri->incoming[chain->regno + incoming_nregs] itself, right,
> not the other registers between that and ri->incoming[chain->regno]?
> If so, I think it'd be clearer to have:
> 
>   /* There must be some kind of conflict.  Prevent both the old and
>      new ranges from being used.  */
>   if (incoming_nregs < 0)
>     ri->incoming[chain->regno + incoming_nregs].unusable = true;
>   for (i = 0; i < chain->nregs; i++)
>     ri->incoming[chain->regno + i].unusable = true;
> 
> When I first looked at the code, I was wondering why we changed every
> register in (chain->regno + incoming_nregs, chain_regno), but none in
> [chain->regno + chain->nregs, OLD_END).  Seems like we should do neither
> (as in the above suggestion) or both.

I think both was the original intention (OLD_END being min_reg +
ri->incoming[min_reg].nregs, right?), but yours works too.

>> +  /* Process all basic blocks by using a worklist, adding unvisited successor
>> +     blocks whenever we reach the end of one basic blocks.  This ensures that
>> +     whenever possible, we only process a block after at least one of its
>> +     predecessors, which provides a "seeding" effect to make the logic in
>> +     set_incoming_from_chain and init_rename_info useful.  */
> 
> Wouldn't a reverse post-order (inverted_post_order_compute) allow even
> more pre-opening (as well as being less code)?

I can't really tell from the comments what that function is supposed to
produce. I've made a change to use it to order the bbs, but that made rr
visit basic blocks without seeing any of their predecessors first, so it
seems unsuitable. So I've left that alone.

New version below.


Bernd

[-- Attachment #2: rrcfg3.diff --]
[-- Type: text/plain, Size: 26805 bytes --]

	* regrename.c (struct du_head): Make nregs signed.
	(closed_chains): Remove.
	(create_new_chain): Return the new chain.
	(chain_from_id): New static function.
	(dump_def_use_chain): Change argument to be an int, indicating
	the first ID to print.  All callers changed.
	(merge_overlapping_regs): Use chain_from_id.  Assert that
	chains don't conflict with themselves.
	(rename_chains): Take no argument.  Iterate over id_to_chain
	rather to find chains to rename.  Clear tick before the main
	loop.
	(struct incoming_reg_info): New struct.
	(struct bb_rename_info): New struct.
	(init_rename_info, set_incoming_from_chain, merge_chains): New
	static functions.
	(regrename_analyze): New static function, broken out of
	regrename_optimize.  Record and make use of open chain information
	at basic block boundaries, and merge chains where possible.
	(scan_rtx_reg): Make this_nregs signed.  Don't update
	closed_chains.
	(build_def_use): Return a bool to indicate success.  All callers
	changed.  Don't initialize global data here.
	(regrename_optimize): Move most code out of here into
	regrename_analyze.
	* regs.h (add_range_to_hard_reg_set, remove_range_from_hard_reg_set,
	range_overlaps_hard_reg_set_p, range_in_hard_reg_set_p): New
	static inline functions.
	* vec.h (FOR_EACH_VEC_ELT_FROM): New macro.

Index: gcc/regrename.c
===================================================================
--- gcc/regrename.c	(revision 178293)
+++ gcc/regrename.c	(working copy)
@@ -47,18 +47,24 @@
 
      1. Local def/use chains are built: within each basic block, chains are
 	opened and closed; if a chain isn't closed at the end of the block,
-	it is dropped.
+	it is dropped.  We pre-open chains if we have already examined a
+	predecessor block and found chains live at the end which match
+	live registers at the start of the new block.
 
-     2. For each chain, the set of possible renaming registers is computed.
+     2. We try to combine the local chains across basic block boundaries by
+        comparing chains that were open at the start or end of a block to
+	those in successor/predecessor blocks.
+
+     3. For each chain, the set of possible renaming registers is computed.
 	This takes into account the renaming of previously processed chains.
 	Optionally, a preferred class is computed for the renaming register.
 
-     3. The best renaming register is computed for the chain in the above set,
+     4. The best renaming register is computed for the chain in the above set,
 	using a round-robin allocation.  If a preferred class exists, then the
 	round-robin allocation is done within the class first, if possible.
 	The round-robin allocation of renaming registers itself is global.
 
-     4. If a renaming register has been found, it is substituted in the chain.
+     5. If a renaming register has been found, it is substituted in the chain.
 
   Targets can parameterize the pass by specifying a preferred class for the
   renaming register for a given (super)class of registers to be renamed.  */
@@ -75,8 +81,9 @@ struct du_head
   struct du_head *next_chain;
   /* The first and last elements of this chain.  */
   struct du_chain *first, *last;
-  /* Describes the register being tracked.  */
-  unsigned regno, nregs;
+  /* Describe the register being tracked, register number and count.  */
+  unsigned regno;
+  int nregs;
 
   /* A unique id to be used as an index into the conflicts bitmaps.  */
   unsigned id;
@@ -140,6 +147,7 @@ static struct obstack rename_obstack;
 static void do_replace (struct du_head *, int);
 static void scan_rtx (rtx, rtx *, enum reg_class, enum scan_actions,
 		      enum op_type);
+static bool build_def_use (basic_block);
 
 typedef struct du_head *du_head_p;
 DEF_VEC_P (du_head_p);
@@ -151,9 +159,8 @@ static unsigned current_id;
 /* A mapping of unique id numbers to chains.  */
 static VEC(du_head_p, heap) *id_to_chain;
 
-/* List of currently open chains, and closed chains that can be renamed.  */
+/* List of currently open chains.  */
 static struct du_head *open_chains;
-static struct du_head *closed_chains;
 
 /* Bitmap of open chains.  The bits set always match the list found in
    open_chains.  */
@@ -166,14 +173,33 @@ static HARD_REG_SET live_in_chains;
    between this and live_in_chains is empty.  */
 static HARD_REG_SET live_hard_regs;
 
-/* Dump all def/use chains in CHAINS to DUMP_FILE.  */
+/* Return the chain corresponding to id number ID.  Take into account that
+   chains may have been merged.  */
+static du_head_p
+chain_from_id (unsigned int id)
+{
+  du_head_p first_chain = VEC_index (du_head_p, id_to_chain, id);
+  du_head_p chain = first_chain;
+  while (chain->id != id)
+    {
+      id = chain->id;
+      chain = VEC_index (du_head_p, id_to_chain, id);
+    }
+  first_chain->id = id;
+  return chain;
+}
+
+/* Dump all def/use chains, starting at id FROM.  */
 
 static void
-dump_def_use_chain (struct du_head *head)
+dump_def_use_chain (int from)
 {
-  while (head)
+  du_head_p head;
+  int i;
+  FOR_EACH_VEC_ELT_FROM (du_head_p, id_to_chain, i, head, from)
     {
       struct du_chain *this_du = head->first;
+
       fprintf (dump_file, "Register %s (%d):",
 	       reg_names[head->regno], head->nregs);
       while (this_du)
@@ -215,7 +241,7 @@ mark_conflict (struct du_head *chains, u
    and record its occurrence in *LOC, which is being written to in INSN.
    This access requires a register of class CL.  */
 
-static void
+static du_head_p
 create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
 		  rtx insn, enum reg_class cl)
 {
@@ -224,7 +250,6 @@ create_new_chain (unsigned this_regno, u
   int nregs;
 
   head->next_chain = open_chains;
-  open_chains = head;
   head->regno = this_regno;
   head->nregs = this_nregs;
   head->need_caller_save_reg = 0;
@@ -264,7 +289,7 @@ create_new_chain (unsigned this_regno, u
   if (insn == NULL_RTX)
     {
       head->first = head->last = NULL;
-      return;
+      return head;
     }
 
   this_du = XOBNEW (&rename_obstack, struct du_chain);
@@ -274,6 +299,7 @@ create_new_chain (unsigned this_regno, u
   this_du->loc = loc;
   this_du->insn = insn;
   this_du->cl = cl;
+  return head;
 }
 
 /* For a def-use chain HEAD, find which registers overlap its lifetime and
@@ -287,8 +313,9 @@ merge_overlapping_regs (HARD_REG_SET *ps
   IOR_HARD_REG_SET (*pset, head->hard_conflicts);
   EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
     {
-      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
+      du_head_p other = chain_from_id (i);
       unsigned j = other->nregs;
+      gcc_assert (other != head);
       while (j-- > 0)
 	SET_HARD_REG_BIT (*pset, other->regno + j);
     }
@@ -341,12 +368,15 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSE
   return true;
 }
 
-/* Process the closed chains starting with ALL_CHAINS and rename
-   registers if possible.  */
+/* Perform register renaming on the current function.  */
 static void
-rename_chains (du_head_p all_chains)
+rename_chains (void)
 {
   HARD_REG_SET unavailable;
+  du_head_p this_head;
+  int i;
+
+  memset (tick, 0, sizeof tick);
 
   CLEAR_HARD_REG_SET (unavailable);
   /* Don't clobber traceback for noreturn functions.  */
@@ -358,11 +388,10 @@ rename_chains (du_head_p all_chains)
 #endif
     }
 
-  while (all_chains)
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, this_head)
     {
       int new_reg, best_new_reg, best_nregs;
       int n_uses;
-      struct du_head *this_head = all_chains;
       struct du_chain *tmp;
       HARD_REG_SET this_unavailable;
       int reg = this_head->regno;
@@ -371,8 +400,6 @@ rename_chains (du_head_p all_chains)
       enum reg_class preferred_class;
       bool has_preferred_class;
 
-      all_chains = this_head->next_chain;
-
       if (this_head->cannot_rename)
 	continue;
 
@@ -488,14 +515,454 @@ rename_chains (du_head_p all_chains)
     }
 }
 
+/* A structure to record information for each hard register at the start of
+   a basic block.  */
+struct incoming_reg_info {
+  /* Holds the number of registers used in the chain that gave us information
+     about this register.  Zero means no information known yet, while a
+     negative value is used for something that is part of, but not the first
+     register in a multi-register value.  */
+  int nregs;
+  /* Set to true if we have accesses that conflict in the number of registers
+     used.  */
+  bool unusable;
+};
+
+/* A structure recording information about each basic block.  It is saved
+   and restored around basic block boundaries.
+   A pointer to such a structure is stored in each basic block's aux field
+   during regrename_analyze, except for blocks we know can't be optimized
+   (such as entry and exit blocks).  */
+struct bb_rename_info
+{
+  /* The basic block corresponding to this structure.  */
+  basic_block bb;
+  /* Copies of the global information.  */
+  bitmap_head open_chains_set;
+  bitmap_head incoming_open_chains_set;
+  unsigned n_preds;
+  struct incoming_reg_info incoming[FIRST_PSEUDO_REGISTER];
+};
+
+/* Initialize a rename_info structure P for basic block BB, which starts a new
+   scan.  */
+static void
+init_rename_info (struct bb_rename_info *p, basic_block bb)
+{
+  int i;
+  df_ref *def_rec;
+  HARD_REG_SET start_chains_set;
+
+  p->bb = bb;
+  bitmap_initialize (&p->open_chains_set, &bitmap_default_obstack);
+  bitmap_initialize (&p->incoming_open_chains_set, &bitmap_default_obstack);
+
+  open_chains = NULL;
+  bitmap_clear (&open_chains_set);
+
+  CLEAR_HARD_REG_SET (live_in_chains);
+  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
+  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
+    {
+      df_ref def = *def_rec;
+      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
+	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
+    }
+
+  /* Open chains based on information from (at least one) predecessor
+     block.  This gives us a chance later on to combine chains across
+     basic block boundaries.  Inconsistencies (in access sizes) will
+     be caught normally and dealt with conservatively by disabling the
+     chain for renaming, and there is no risk of losing optimization
+     opportunities by opening chains either: if we did not open the
+     chains, we'd have to track the live register as a hard reg, and
+     we'd be unable to rename it in any case.  */
+  CLEAR_HARD_REG_SET (start_chains_set);
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (iri->nregs > 0 && !iri->unusable
+	  && range_in_hard_reg_set_p (live_hard_regs, i, iri->nregs))
+	{
+	  SET_HARD_REG_BIT (start_chains_set, i);
+	  remove_range_from_hard_reg_set (&live_hard_regs, i, iri->nregs);
+	}
+    }
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (TEST_HARD_REG_BIT (start_chains_set, i))
+	{
+	  du_head_p chain;
+	  if (dump_file)
+	    fprintf (dump_file, "opening incoming chain\n");
+	  chain = create_new_chain (i, iri->nregs, NULL, NULL_RTX, NO_REGS);
+	  bitmap_set_bit (&p->incoming_open_chains_set, chain->id);
+	}
+    }
+}
+
+/* Record in RI that the block corresponding to it has an incoming
+   live value, described by CHAIN.  */
+static void
+set_incoming_from_chain (struct bb_rename_info *ri, du_head_p chain)
+{
+  int i;
+  int incoming_nregs = ri->incoming[chain->regno].nregs;
+  int nregs;
+
+  /* If we've recorded the same information before, everything is fine.  */
+  if (incoming_nregs == chain->nregs)
+    {
+      if (dump_file)
+	fprintf (dump_file, "reg %d/%d already recorded\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* If we have no information for any of the involved registers, update
+     the incoming array.  */
+  nregs = chain->nregs;
+  while (nregs-- > 0)
+    if (ri->incoming[chain->regno + nregs].nregs != 0
+	|| ri->incoming[chain->regno + nregs].unusable)
+      break;
+  if (nregs < 0)
+    {
+      nregs = chain->nregs;
+      ri->incoming[chain->regno].nregs = nregs;
+      while (nregs-- > 1)
+	ri->incoming[chain->regno + nregs].nregs = -nregs;
+      if (dump_file)
+	fprintf (dump_file, "recorded reg %d/%d\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* There must be some kind of conflict.  Prevent both the old and
+     new ranges from being used.  */
+  if (incoming_nregs < 0)
+    ri->incoming[chain->regno + incoming_nregs].unusable = true;
+  for (i = 0; i < chain->nregs; i++)
+    ri->incoming[chain->regno + i].unusable = true;
+}
+
+/* Merge the two chains C1 and C2 so that all conflict information is
+   recorded and C1, and the id of C2 is changed to that of C1.  */
+static void
+merge_chains (du_head_p c1, du_head_p c2)
+{
+  if (c1 == c2)
+    return;
+
+  if (c2->first != NULL)
+    {
+      if (c1->first == NULL)
+	c1->first = c2->first;
+      else
+	c1->last->next_use = c2->first;
+      c1->last = c2->last;
+    }
+
+  c2->first = c2->last = NULL;
+  c2->id = c1->id;
+
+  IOR_HARD_REG_SET (c1->hard_conflicts, c2->hard_conflicts);
+  bitmap_ior_into (&c1->conflicts, &c2->conflicts);
+
+  c1->need_caller_save_reg |= c2->need_caller_save_reg;
+  c1->cannot_rename |= c2->cannot_rename;
+}
+
+/* Analyze the current function and build chains for renaming.  */
+
+static void
+regrename_analyze (void)
+{
+  struct bb_rename_info *rename_info;
+  int i, ri_index, n_processed;
+  basic_block bb;
+  bitmap_head processed_bbs;
+  VEC (basic_block, heap) *bb_queue;
+
+  /* Gather some information about the blocks in this function.  */
+  rename_info = XCNEWVEC (struct bb_rename_info, n_basic_blocks);
+  ri_index = 0;
+  FOR_EACH_BB (bb)
+    {
+      struct bb_rename_info *ri = rename_info + ri_index;
+      ri->bb = bb;
+      ri->n_preds = EDGE_COUNT (bb->preds);
+      bb->aux = ri;
+      ri_index++;
+    }
+
+  current_id = 0;
+  id_to_chain = VEC_alloc (du_head_p, heap, 0);
+  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
+
+  /* Process all basic blocks by using a worklist, adding unvisited successor
+     blocks whenever we reach the end of one basic blocks.  This ensures that
+     whenever possible, we only process a block after at least one of its
+     predecessors, which provides a "seeding" effect to make the logic in
+     set_incoming_from_chain and init_rename_info useful.  */
+
+  bb_queue = VEC_alloc (basic_block, heap, 0);
+
+  n_processed = 0;
+  bitmap_initialize (&processed_bbs, &bitmap_default_obstack);
+
+  VEC_safe_push (basic_block, heap, bb_queue, single_succ (ENTRY_BLOCK_PTR));
+
+  while (!VEC_empty (basic_block, bb_queue))
+    {
+      bool success;
+      edge e;
+      edge_iterator ei;
+      basic_block bb1 = VEC_pop (basic_block, bb_queue);
+      struct bb_rename_info *this_info = (struct bb_rename_info *)bb1->aux;
+      int old_length = VEC_length (du_head_p, id_to_chain);
+
+      if (dump_file)
+	fprintf (dump_file, "\nprocessing block %d:\n", bb1->index);
+
+      bitmap_set_bit (&processed_bbs, bb1->index);
+      n_processed++;
+      init_rename_info (this_info, bb1);
+
+      success = build_def_use (bb1);
+      if (success)
+	{
+	  if (dump_file)
+	    dump_def_use_chain (old_length);
+	  bitmap_copy (&this_info->open_chains_set, &open_chains_set);
+	}
+      else
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "failed\n");
+	  bb1->aux = NULL;
+	  VEC_truncate (du_head_p, id_to_chain, old_length);
+	  current_id = old_length;
+	  bitmap_clear (&this_info->incoming_open_chains_set);
+	  open_chains = NULL;
+	}
+
+      /* Add successor blocks to the worklist if necessary, and record
+	 data about our own open chains at the end of this block, which
+	 will be used to pre-open chains when processing the successors.  */
+      FOR_EACH_EDGE (e, ei, bb1->succs)
+	{
+	  struct bb_rename_info *dest_ri;
+	  struct du_head *chain;
+
+	  if (dump_file)
+	    fprintf (dump_file, "successor block %d\n", e->dest->index);
+
+	  if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+	    continue;
+	  dest_ri = (struct bb_rename_info *)e->dest->aux;
+	  if (dest_ri == NULL)
+	    continue;
+	  gcc_assert (dest_ri->n_preds > 0);
+	  dest_ri->n_preds--;
+	  if (dest_ri->n_preds == 0
+	      && !bitmap_bit_p (&processed_bbs, dest_ri->bb->index))
+	    VEC_safe_push (basic_block, heap, bb_queue, e->dest);
+	  if (success)
+	    for (chain = open_chains; chain; chain = chain->next_chain)
+	      set_incoming_from_chain (dest_ri, chain);
+	}
+
+      if (VEC_empty (basic_block, bb_queue)
+	  && n_processed < ri_index)
+	{
+	  basic_block best = NULL;
+	  FOR_EACH_BB (bb)
+	    {
+	      struct bb_rename_info *ri = (struct bb_rename_info *)bb->aux;
+	      if (!bitmap_bit_p (&processed_bbs, bb->index)
+		  && (best == 0
+		      || ri->n_preds != EDGE_COUNT (bb->preds)))
+		best = bb;
+	    }
+	  if (dump_file)
+	    fprintf (dump_file, "\npicking new start block %d\n", best->index);
+	  VEC_safe_push (basic_block, heap, bb_queue, best);
+	}
+    }
+  bitmap_clear (&processed_bbs);
+  VEC_free (basic_block, heap, bb_queue);
+
+  /* Now, combine the chains data we have gathered across basic block
+     boundaries.
+
+     For every basic block, there may be chains open at the start, or at the
+     end.  Rather than exclude them from renaming, we look for open chains
+     with matching registers at the other side of the CFG edge.
+
+     For a given chain using register R, open at the start of block B, we
+     must find an open chain using R on the other side of every edge leading
+     to B, if the register is live across this edge.  In the code below,
+     N_PREDS_USED counts the number of edges where the register is live, and
+     N_PREDS_JOINED counts those where we found an appropriate chain for
+     joining.
+
+     We perform the analysis for both incoming and outgoing edges, but we
+     only need to merge once (in the second part, after verifying outgoing
+     edges).  */
+  for (i = 0; i < ri_index; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri->bb->aux == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d in edges\n", bb_ri->bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->incoming_open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_preds_used = 0, n_preds_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->preds)
+	    {
+	      struct bb_rename_info *src_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+	      bool success = false;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_out (e->src));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      n_preds_used++;
+
+	      if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+		continue;
+
+	      src_ri = (struct bb_rename_info *)e->src->aux;
+	      if (src_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&src_ri->open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *outgoing_chain = chain_from_id (k);
+
+		  if (outgoing_chain->regno == chain->regno
+		      && outgoing_chain->nregs == chain->nregs)
+		    {
+		      n_preds_joined++;
+		      success = true;
+		      break;
+		    }
+		}
+	      if (!success && dump_file)
+		fprintf (dump_file, "failure to match with pred block %d\n",
+			 e->src->index);
+	    }
+	  if (n_preds_joined < n_preds_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n", j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+  for (i = 0; i < ri_index; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri->bb->aux == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d out edges\n", bb_ri->bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_succs_used = 0, n_succs_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->succs)
+	    {
+	      bool printed = false;
+	      struct bb_rename_info *dest_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_in (e->dest));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      
+	      n_succs_used++;
+
+	      dest_ri = (struct bb_rename_info *)e->dest->aux;
+	      if (dest_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&dest_ri->incoming_open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *incoming_chain = chain_from_id (k);
+
+		  if (incoming_chain->regno == chain->regno
+		      && incoming_chain->nregs == chain->nregs)
+		    {
+		      if (dump_file)
+			{
+			  if (!printed)
+			    fprintf (dump_file,
+				     "merging blocks for edge %d -> %d\n",
+				     e->src->index, e->dest->index);
+			  printed = true;
+			  fprintf (dump_file,
+				   "  merging chains %d (->%d) and %d (->%d) [%s]\n",
+				   k, incoming_chain->id, j, chain->id, 
+				   reg_names[incoming_chain->regno]);
+			}
+
+		      merge_chains (chain, incoming_chain);
+		      n_succs_joined++;
+		      break;
+		    }
+		}
+	    }
+	  if (n_succs_joined < n_succs_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n",
+			 j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+
+  free (rename_info);
+
+  FOR_EACH_BB (bb)
+    bb->aux = NULL;
+}
+
 static void
 do_replace (struct du_head *head, int reg)
 {
   struct du_chain *chain;
   unsigned int base_regno = head->regno;
 
-  gcc_assert (! DEBUG_INSN_P (head->first->insn));
-
   for (chain = head->first; chain; chain = chain->next_use)
     {
       unsigned int regno = ORIGINAL_REGNO (*chain->loc);
@@ -591,7 +1058,7 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
   rtx x = *loc;
   enum machine_mode mode = GET_MODE (x);
   unsigned this_regno = REGNO (x);
-  unsigned this_nregs = hard_regno_nregs[this_regno][mode];
+  int this_nregs = hard_regno_nregs[this_regno][mode];
 
   if (action == mark_write)
     {
@@ -691,8 +1158,6 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
 
 	  if (subset && !superset)
 	    head->cannot_rename = 1;
-	  head->next_chain = closed_chains;
-	  closed_chains = head;
 	  bitmap_clear_bit (&open_chains_set, head->id);
 
 	  nregs = head->nregs;
@@ -1078,28 +1543,14 @@ record_out_operands (rtx insn, bool earl
 
 /* Build def/use chain.  */
 
-static struct du_head *
+static bool
 build_def_use (basic_block bb)
 {
   rtx insn;
-  df_ref *def_rec;
   unsigned HOST_WIDE_INT untracked_operands;
 
-  open_chains = closed_chains = NULL;
-
   fail_current_block = false;
 
-  current_id = 0;
-  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
-  CLEAR_HARD_REG_SET (live_in_chains);
-  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
-  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
-	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
-    }
-
   for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
     {
       if (NONDEBUG_INSN_P (insn))
@@ -1338,14 +1789,10 @@ build_def_use (basic_block bb)
 	break;
     }
 
-  bitmap_clear (&open_chains_set);
-
   if (fail_current_block)
-    return NULL;
+    return false;
 
-  /* Since we close every chain when we find a REG_DEAD note, anything that
-     is still open lives past the basic block, so it can't be renamed.  */
-  return closed_chains;
+  return true;
 }
 \f
 /* Perform register renaming on the current function.  */
@@ -1353,44 +1800,20 @@ build_def_use (basic_block bb)
 static unsigned int
 regrename_optimize (void)
 {
-  basic_block bb;
-  char *first_obj;
-
   df_set_flags (DF_LR_RUN_DCE);
   df_note_add_problem ();
   df_analyze ();
   df_set_flags (DF_DEFER_INSN_RESCAN);
 
-  memset (tick, 0, sizeof tick);
-
   gcc_obstack_init (&rename_obstack);
-  first_obj = XOBNEWVAR (&rename_obstack, char, 0);
 
-  FOR_EACH_BB (bb)
-    {
-      struct du_head *all_chains = 0;
-
-      id_to_chain = VEC_alloc (du_head_p, heap, 0);
-
-      if (dump_file)
-	fprintf (dump_file, "\nBasic block %d:\n", bb->index);
-
-      all_chains = build_def_use (bb);
-
-      if (dump_file)
-	dump_def_use_chain (all_chains);
-
-      rename_chains (all_chains);
+  regrename_analyze ();
 
-      free_chain_data ();
-      obstack_free (&rename_obstack, first_obj);
-    }
+  rename_chains ();
 
+  free_chain_data ();
   obstack_free (&rename_obstack, NULL);
 
-  if (dump_file)
-    fputc ('\n', dump_file);
-
   return 0;
 }
 \f
Index: gcc/regs.h
===================================================================
--- gcc/regs.h	(revision 178293)
+++ gcc/regs.h	(working copy)
@@ -397,4 +397,48 @@ overlaps_hard_reg_set_p (const HARD_REG_
   return false;
 }
 
+/* Like add_to_hard_reg_set, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+
+static inline void
+add_range_to_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+			   int nregs)
+{
+  while (nregs-- > 0)
+    SET_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Likewise, but remove the registers.  */
+
+static inline void
+remove_range_from_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+				int nregs)
+{
+  while (nregs-- > 0)
+    CLEAR_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Like overlaps_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno,
+			       int nregs)
+{
+  while (nregs-- > 0)
+    if (TEST_HARD_REG_BIT (set, regno + nregs))
+      return true;
+  return false;
+}
+
+/* Like in_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs)
+{
+  while (nregs-- > 0)
+    if (!TEST_HARD_REG_BIT (set, regno + nregs))
+      return false;
+  return true;
+}
+
 #endif /* GCC_REGS_H */
Index: gcc/vec.h
===================================================================
--- gcc/vec.h	(revision 178293)
+++ gcc/vec.h	(working copy)
@@ -195,6 +195,11 @@ along with GCC; see the file COPYING3.
 #define FOR_EACH_VEC_ELT(T, V, I, P)		\
   for (I = 0; VEC_iterate (T, (V), (I), (P)); ++(I))
 
+/* Likewise, but start from FROM rather than 0.  */
+
+#define FOR_EACH_VEC_ELT_FROM(T, V, I, P, FROM)		\
+  for (I = (FROM); VEC_iterate (T, (V), (I), (P)); ++(I))
+
 /* Convenience macro for reverse iteration.  */
 
 #define FOR_EACH_VEC_ELT_REVERSE(T,V,I,P) \

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Rename across basic block boundaries
  2011-09-01 13:02       ` Bernd Schmidt
@ 2011-09-01 14:16         ` Richard Sandiford
  2011-09-05 15:36           ` Bernd Schmidt
  0 siblings, 1 reply; 11+ messages in thread
From: Richard Sandiford @ 2011-09-01 14:16 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: GCC Patches

Bernd Schmidt <bernds@codesourcery.com> writes:
> On 08/26/11 14:57, Richard Sandiford wrote:
>>> +  /* There must be some kind of conflict.  Set the unusable for all
>>> +     overlapping registers.  */
>>> +  min_reg = chain->regno;
>>> +  if (incoming_nregs < 0)
>>> +    min_reg += incoming_nregs;
>>> +  max_reg = chain->regno + chain->nregs;
>>> +  for (i = min_reg; i < max_reg; i++)
>>> +    ri->incoming[i].unusable = true;
>> 
>> In the incoming_nregs < 0 case, we only need to set
>> ri->incoming[chain->regno + incoming_nregs] itself, right,
>> not the other registers between that and ri->incoming[chain->regno]?
>> If so, I think it'd be clearer to have:
>> 
>>   /* There must be some kind of conflict.  Prevent both the old and
>>      new ranges from being used.  */
>>   if (incoming_nregs < 0)
>>     ri->incoming[chain->regno + incoming_nregs].unusable = true;
>>   for (i = 0; i < chain->nregs; i++)
>>     ri->incoming[chain->regno + i].unusable = true;
>> 
>> When I first looked at the code, I was wondering why we changed every
>> register in (chain->regno + incoming_nregs, chain_regno), but none in
>> [chain->regno + chain->nregs, OLD_END).  Seems like we should do neither
>> (as in the above suggestion) or both.
>
> I think both was the original intention (OLD_END being min_reg +
> ri->incoming[min_reg].nregs, right?),

Right.  There'd have been a max operation on max_reg too.

> but yours works too.

Ok, great.

>>> +  /* Process all basic blocks by using a worklist, adding unvisited successor
>>> +     blocks whenever we reach the end of one basic blocks.  This ensures that
>>> +     whenever possible, we only process a block after at least one of its
>>> +     predecessors, which provides a "seeding" effect to make the logic in
>>> +     set_incoming_from_chain and init_rename_info useful.  */
>> 
>> Wouldn't a reverse post-order (inverted_post_order_compute) allow even
>> more pre-opening (as well as being less code)?
>
> I can't really tell from the comments what that function is supposed to
> produce.

Sorry, I thought it was supposed to produce a reverse postorder, but...

> I've made a change to use it to order the bbs, but that made rr
> visit basic blocks without seeing any of their predecessors first,

...I guess not. :-)  pre_and_rev_post_order_compute should though.
Could you try that instead?

Richard

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Rename across basic block boundaries
  2011-09-01 14:16         ` Richard Sandiford
@ 2011-09-05 15:36           ` Bernd Schmidt
  2011-09-06 10:37             ` Richard Sandiford
  0 siblings, 1 reply; 11+ messages in thread
From: Bernd Schmidt @ 2011-09-05 15:36 UTC (permalink / raw)
  To: GCC Patches, richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 712 bytes --]

On 09/01/11 16:16, Richard Sandiford wrote:
> Bernd Schmidt <bernds@codesourcery.com> writes:
>> On 08/26/11 14:57, Richard Sandiford wrote:
>>> Wouldn't a reverse post-order (inverted_post_order_compute) allow even
>>> more pre-opening (as well as being less code)?
>>
>> I can't really tell from the comments what that function is supposed to
>> produce.
> 
> Sorry, I thought it was supposed to produce a reverse postorder, but...
> 
>> I've made a change to use it to order the bbs, but that made rr
>> visit basic blocks without seeing any of their predecessors first,
> 
> ...I guess not. :-)  pre_and_rev_post_order_compute should though.
> Could you try that instead?

That seems to work for me.


Bernd

[-- Attachment #2: rr0905.diff --]
[-- Type: text/plain, Size: 25761 bytes --]

	* regrename.c (struct du_head): Make nregs signed.
	(closed_chains): Remove.
	(create_new_chain): Return the new chain.
	(chain_from_id): New static function.
	(dump_def_use_chain): Change argument to be an int, indicating
	the first ID to print.  All callers changed.
	(merge_overlapping_regs): Use chain_from_id.  Assert that
	chains don't conflict with themselves.
	(rename_chains): Take no argument.  Iterate over id_to_chain
	rather to find chains to rename.  Clear tick before the main
	loop.
	(struct incoming_reg_info): New struct.
	(struct bb_rename_info): New struct.
	(init_rename_info, set_incoming_from_chain, merge_chains): New
	static functions.
	(regrename_analyze): New static function, broken out of
	regrename_optimize.  Record and make use of open chain information
	at basic block boundaries, and merge chains where possible.
	(scan_rtx_reg): Make this_nregs signed.  Don't update
	closed_chains.
	(build_def_use): Return a bool to indicate success.  All callers
	changed.  Don't initialize global data here.
	(regrename_optimize): Move most code out of here into
	regrename_analyze.
	* regs.h (add_range_to_hard_reg_set, remove_range_from_hard_reg_set,
	range_overlaps_hard_reg_set_p, range_in_hard_reg_set_p): New
	static inline functions.
	* vec.h (FOR_EACH_VEC_ELT_FROM): New macro.

Index: gcc/regrename.c
===================================================================
--- gcc/regrename.c	(revision 178293)
+++ gcc/regrename.c	(working copy)
@@ -47,18 +47,24 @@
 
      1. Local def/use chains are built: within each basic block, chains are
 	opened and closed; if a chain isn't closed at the end of the block,
-	it is dropped.
+	it is dropped.  We pre-open chains if we have already examined a
+	predecessor block and found chains live at the end which match
+	live registers at the start of the new block.
 
-     2. For each chain, the set of possible renaming registers is computed.
+     2. We try to combine the local chains across basic block boundaries by
+        comparing chains that were open at the start or end of a block to
+	those in successor/predecessor blocks.
+
+     3. For each chain, the set of possible renaming registers is computed.
 	This takes into account the renaming of previously processed chains.
 	Optionally, a preferred class is computed for the renaming register.
 
-     3. The best renaming register is computed for the chain in the above set,
+     4. The best renaming register is computed for the chain in the above set,
 	using a round-robin allocation.  If a preferred class exists, then the
 	round-robin allocation is done within the class first, if possible.
 	The round-robin allocation of renaming registers itself is global.
 
-     4. If a renaming register has been found, it is substituted in the chain.
+     5. If a renaming register has been found, it is substituted in the chain.
 
   Targets can parameterize the pass by specifying a preferred class for the
   renaming register for a given (super)class of registers to be renamed.  */
@@ -75,8 +81,9 @@ struct du_head
   struct du_head *next_chain;
   /* The first and last elements of this chain.  */
   struct du_chain *first, *last;
-  /* Describes the register being tracked.  */
-  unsigned regno, nregs;
+  /* Describe the register being tracked, register number and count.  */
+  unsigned regno;
+  int nregs;
 
   /* A unique id to be used as an index into the conflicts bitmaps.  */
   unsigned id;
@@ -140,6 +147,7 @@ static struct obstack rename_obstack;
 static void do_replace (struct du_head *, int);
 static void scan_rtx (rtx, rtx *, enum reg_class, enum scan_actions,
 		      enum op_type);
+static bool build_def_use (basic_block);
 
 typedef struct du_head *du_head_p;
 DEF_VEC_P (du_head_p);
@@ -151,9 +159,8 @@ static unsigned current_id;
 /* A mapping of unique id numbers to chains.  */
 static VEC(du_head_p, heap) *id_to_chain;
 
-/* List of currently open chains, and closed chains that can be renamed.  */
+/* List of currently open chains.  */
 static struct du_head *open_chains;
-static struct du_head *closed_chains;
 
 /* Bitmap of open chains.  The bits set always match the list found in
    open_chains.  */
@@ -166,14 +173,33 @@ static HARD_REG_SET live_in_chains;
    between this and live_in_chains is empty.  */
 static HARD_REG_SET live_hard_regs;
 
-/* Dump all def/use chains in CHAINS to DUMP_FILE.  */
+/* Return the chain corresponding to id number ID.  Take into account that
+   chains may have been merged.  */
+static du_head_p
+chain_from_id (unsigned int id)
+{
+  du_head_p first_chain = VEC_index (du_head_p, id_to_chain, id);
+  du_head_p chain = first_chain;
+  while (chain->id != id)
+    {
+      id = chain->id;
+      chain = VEC_index (du_head_p, id_to_chain, id);
+    }
+  first_chain->id = id;
+  return chain;
+}
+
+/* Dump all def/use chains, starting at id FROM.  */
 
 static void
-dump_def_use_chain (struct du_head *head)
+dump_def_use_chain (int from)
 {
-  while (head)
+  du_head_p head;
+  int i;
+  FOR_EACH_VEC_ELT_FROM (du_head_p, id_to_chain, i, head, from)
     {
       struct du_chain *this_du = head->first;
+
       fprintf (dump_file, "Register %s (%d):",
 	       reg_names[head->regno], head->nregs);
       while (this_du)
@@ -215,7 +241,7 @@ mark_conflict (struct du_head *chains, u
    and record its occurrence in *LOC, which is being written to in INSN.
    This access requires a register of class CL.  */
 
-static void
+static du_head_p
 create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
 		  rtx insn, enum reg_class cl)
 {
@@ -224,7 +250,6 @@ create_new_chain (unsigned this_regno, u
   int nregs;
 
   head->next_chain = open_chains;
-  open_chains = head;
   head->regno = this_regno;
   head->nregs = this_nregs;
   head->need_caller_save_reg = 0;
@@ -264,7 +289,7 @@ create_new_chain (unsigned this_regno, u
   if (insn == NULL_RTX)
     {
       head->first = head->last = NULL;
-      return;
+      return head;
     }
 
   this_du = XOBNEW (&rename_obstack, struct du_chain);
@@ -274,6 +299,7 @@ create_new_chain (unsigned this_regno, u
   this_du->loc = loc;
   this_du->insn = insn;
   this_du->cl = cl;
+  return head;
 }
 
 /* For a def-use chain HEAD, find which registers overlap its lifetime and
@@ -287,8 +313,9 @@ merge_overlapping_regs (HARD_REG_SET *ps
   IOR_HARD_REG_SET (*pset, head->hard_conflicts);
   EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
     {
-      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
+      du_head_p other = chain_from_id (i);
       unsigned j = other->nregs;
+      gcc_assert (other != head);
       while (j-- > 0)
 	SET_HARD_REG_BIT (*pset, other->regno + j);
     }
@@ -341,12 +368,15 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSE
   return true;
 }
 
-/* Process the closed chains starting with ALL_CHAINS and rename
-   registers if possible.  */
+/* Perform register renaming on the current function.  */
 static void
-rename_chains (du_head_p all_chains)
+rename_chains (void)
 {
   HARD_REG_SET unavailable;
+  du_head_p this_head;
+  int i;
+
+  memset (tick, 0, sizeof tick);
 
   CLEAR_HARD_REG_SET (unavailable);
   /* Don't clobber traceback for noreturn functions.  */
@@ -358,11 +388,10 @@ rename_chains (du_head_p all_chains)
 #endif
     }
 
-  while (all_chains)
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, this_head)
     {
       int new_reg, best_new_reg, best_nregs;
       int n_uses;
-      struct du_head *this_head = all_chains;
       struct du_chain *tmp;
       HARD_REG_SET this_unavailable;
       int reg = this_head->regno;
@@ -371,8 +400,6 @@ rename_chains (du_head_p all_chains)
       enum reg_class preferred_class;
       bool has_preferred_class;
 
-      all_chains = this_head->next_chain;
-
       if (this_head->cannot_rename)
 	continue;
 
@@ -488,14 +515,426 @@ rename_chains (du_head_p all_chains)
     }
 }
 
+/* A structure to record information for each hard register at the start of
+   a basic block.  */
+struct incoming_reg_info {
+  /* Holds the number of registers used in the chain that gave us information
+     about this register.  Zero means no information known yet, while a
+     negative value is used for something that is part of, but not the first
+     register in a multi-register value.  */
+  int nregs;
+  /* Set to true if we have accesses that conflict in the number of registers
+     used.  */
+  bool unusable;
+};
+
+/* A structure recording information about each basic block.  It is saved
+   and restored around basic block boundaries.
+   A pointer to such a structure is stored in each basic block's aux field
+   during regrename_analyze, except for blocks we know can't be optimized
+   (such as entry and exit blocks).  */
+struct bb_rename_info
+{
+  /* The basic block corresponding to this structure.  */
+  basic_block bb;
+  /* Copies of the global information.  */
+  bitmap_head open_chains_set;
+  bitmap_head incoming_open_chains_set;
+  unsigned n_preds;
+  struct incoming_reg_info incoming[FIRST_PSEUDO_REGISTER];
+};
+
+/* Initialize a rename_info structure P for basic block BB, which starts a new
+   scan.  */
+static void
+init_rename_info (struct bb_rename_info *p, basic_block bb)
+{
+  int i;
+  df_ref *def_rec;
+  HARD_REG_SET start_chains_set;
+
+  p->bb = bb;
+  bitmap_initialize (&p->open_chains_set, &bitmap_default_obstack);
+  bitmap_initialize (&p->incoming_open_chains_set, &bitmap_default_obstack);
+
+  open_chains = NULL;
+  bitmap_clear (&open_chains_set);
+
+  CLEAR_HARD_REG_SET (live_in_chains);
+  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
+  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
+    {
+      df_ref def = *def_rec;
+      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
+	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
+    }
+
+  /* Open chains based on information from (at least one) predecessor
+     block.  This gives us a chance later on to combine chains across
+     basic block boundaries.  Inconsistencies (in access sizes) will
+     be caught normally and dealt with conservatively by disabling the
+     chain for renaming, and there is no risk of losing optimization
+     opportunities by opening chains either: if we did not open the
+     chains, we'd have to track the live register as a hard reg, and
+     we'd be unable to rename it in any case.  */
+  CLEAR_HARD_REG_SET (start_chains_set);
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (iri->nregs > 0 && !iri->unusable
+	  && range_in_hard_reg_set_p (live_hard_regs, i, iri->nregs))
+	{
+	  SET_HARD_REG_BIT (start_chains_set, i);
+	  remove_range_from_hard_reg_set (&live_hard_regs, i, iri->nregs);
+	}
+    }
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (TEST_HARD_REG_BIT (start_chains_set, i))
+	{
+	  du_head_p chain;
+	  if (dump_file)
+	    fprintf (dump_file, "opening incoming chain\n");
+	  chain = create_new_chain (i, iri->nregs, NULL, NULL_RTX, NO_REGS);
+	  bitmap_set_bit (&p->incoming_open_chains_set, chain->id);
+	}
+    }
+}
+
+/* Record in RI that the block corresponding to it has an incoming
+   live value, described by CHAIN.  */
+static void
+set_incoming_from_chain (struct bb_rename_info *ri, du_head_p chain)
+{
+  int i;
+  int incoming_nregs = ri->incoming[chain->regno].nregs;
+  int nregs;
+
+  /* If we've recorded the same information before, everything is fine.  */
+  if (incoming_nregs == chain->nregs)
+    {
+      if (dump_file)
+	fprintf (dump_file, "reg %d/%d already recorded\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* If we have no information for any of the involved registers, update
+     the incoming array.  */
+  nregs = chain->nregs;
+  while (nregs-- > 0)
+    if (ri->incoming[chain->regno + nregs].nregs != 0
+	|| ri->incoming[chain->regno + nregs].unusable)
+      break;
+  if (nregs < 0)
+    {
+      nregs = chain->nregs;
+      ri->incoming[chain->regno].nregs = nregs;
+      while (nregs-- > 1)
+	ri->incoming[chain->regno + nregs].nregs = -nregs;
+      if (dump_file)
+	fprintf (dump_file, "recorded reg %d/%d\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* There must be some kind of conflict.  Prevent both the old and
+     new ranges from being used.  */
+  if (incoming_nregs < 0)
+    ri->incoming[chain->regno + incoming_nregs].unusable = true;
+  for (i = 0; i < chain->nregs; i++)
+    ri->incoming[chain->regno + i].unusable = true;
+}
+
+/* Merge the two chains C1 and C2 so that all conflict information is
+   recorded and C1, and the id of C2 is changed to that of C1.  */
+static void
+merge_chains (du_head_p c1, du_head_p c2)
+{
+  if (c1 == c2)
+    return;
+
+  if (c2->first != NULL)
+    {
+      if (c1->first == NULL)
+	c1->first = c2->first;
+      else
+	c1->last->next_use = c2->first;
+      c1->last = c2->last;
+    }
+
+  c2->first = c2->last = NULL;
+  c2->id = c1->id;
+
+  IOR_HARD_REG_SET (c1->hard_conflicts, c2->hard_conflicts);
+  bitmap_ior_into (&c1->conflicts, &c2->conflicts);
+
+  c1->need_caller_save_reg |= c2->need_caller_save_reg;
+  c1->cannot_rename |= c2->cannot_rename;
+}
+
+/* Analyze the current function and build chains for renaming.  */
+
+static void
+regrename_analyze (void)
+{
+  struct bb_rename_info *rename_info;
+  int i;
+  basic_block bb;
+  int n_bbs;
+  int *inverse_postorder;
+
+  inverse_postorder = XNEWVEC (int, last_basic_block);
+  n_bbs = pre_and_rev_post_order_compute (NULL, inverse_postorder, false);
+
+  /* Gather some information about the blocks in this function.  */
+  rename_info = XCNEWVEC (struct bb_rename_info, n_bbs);
+  for (i = 0; i < n_bbs; i++)
+    {
+      struct bb_rename_info *ri = rename_info + i;
+      basic_block bb1 = BASIC_BLOCK (inverse_postorder[i]);
+      ri->bb = bb1;
+      if (inverse_postorder[i] < NUM_FIXED_BLOCKS)
+	bb1->aux = NULL;
+      else
+	bb1->aux = ri;
+    }
+
+  current_id = 0;
+  id_to_chain = VEC_alloc (du_head_p, heap, 0);
+  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
+
+  /* The order in which we visit blocks ensures that whenever
+     possible, we only process a block after at least one of its
+     predecessors, which provides a "seeding" effect to make the logic
+     in set_incoming_from_chain and init_rename_info useful.  */
+
+  for (i = 0; i < n_bbs; i++)
+    {
+      basic_block bb1 = BASIC_BLOCK (inverse_postorder[i]);
+      struct bb_rename_info *this_info = rename_info + i;
+      bool success;
+      edge e;
+      edge_iterator ei;
+      int old_length = VEC_length (du_head_p, id_to_chain);
+
+      if (bb1->aux == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "\nprocessing block %d:\n", bb1->index);
+
+      init_rename_info (this_info, bb1);
+
+      success = build_def_use (bb1);
+      if (!success)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "failed\n");
+	  bb1->aux = NULL;
+	  VEC_truncate (du_head_p, id_to_chain, old_length);
+	  current_id = old_length;
+	  bitmap_clear (&this_info->incoming_open_chains_set);
+	  open_chains = NULL;
+	  continue;
+	}
+
+      if (dump_file)
+	dump_def_use_chain (old_length);
+      bitmap_copy (&this_info->open_chains_set, &open_chains_set);
+
+      /* Add successor blocks to the worklist if necessary, and record
+	 data about our own open chains at the end of this block, which
+	 will be used to pre-open chains when processing the successors.  */
+      FOR_EACH_EDGE (e, ei, bb1->succs)
+	{
+	  struct bb_rename_info *dest_ri;
+	  struct du_head *chain;
+
+	  if (dump_file)
+	    fprintf (dump_file, "successor block %d\n", e->dest->index);
+
+	  if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+	    continue;
+	  dest_ri = (struct bb_rename_info *)e->dest->aux;
+	  if (dest_ri == NULL)
+	    continue;
+	  for (chain = open_chains; chain; chain = chain->next_chain)
+	    set_incoming_from_chain (dest_ri, chain);
+	}
+    }
+  free (inverse_postorder);
+
+  /* Now, combine the chains data we have gathered across basic block
+     boundaries.
+
+     For every basic block, there may be chains open at the start, or at the
+     end.  Rather than exclude them from renaming, we look for open chains
+     with matching registers at the other side of the CFG edge.
+
+     For a given chain using register R, open at the start of block B, we
+     must find an open chain using R on the other side of every edge leading
+     to B, if the register is live across this edge.  In the code below,
+     N_PREDS_USED counts the number of edges where the register is live, and
+     N_PREDS_JOINED counts those where we found an appropriate chain for
+     joining.
+
+     We perform the analysis for both incoming and outgoing edges, but we
+     only need to merge once (in the second part, after verifying outgoing
+     edges).  */
+  for (i = 0; i < n_bbs; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri->bb->aux == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d in edges\n", bb_ri->bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->incoming_open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_preds_used = 0, n_preds_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->preds)
+	    {
+	      struct bb_rename_info *src_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+	      bool success = false;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_out (e->src));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      n_preds_used++;
+
+	      if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+		continue;
+
+	      src_ri = (struct bb_rename_info *)e->src->aux;
+	      if (src_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&src_ri->open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *outgoing_chain = chain_from_id (k);
+
+		  if (outgoing_chain->regno == chain->regno
+		      && outgoing_chain->nregs == chain->nregs)
+		    {
+		      n_preds_joined++;
+		      success = true;
+		      break;
+		    }
+		}
+	      if (!success && dump_file)
+		fprintf (dump_file, "failure to match with pred block %d\n",
+			 e->src->index);
+	    }
+	  if (n_preds_joined < n_preds_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n", j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+  for (i = 0; i < n_bbs; i++)
+    {
+      struct bb_rename_info *bb_ri = rename_info + i;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri->bb->aux == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d out edges\n", bb_ri->bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_succs_used = 0, n_succs_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb_ri->bb->succs)
+	    {
+	      bool printed = false;
+	      struct bb_rename_info *dest_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_in (e->dest));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      
+	      n_succs_used++;
+
+	      dest_ri = (struct bb_rename_info *)e->dest->aux;
+	      if (dest_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&dest_ri->incoming_open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *incoming_chain = chain_from_id (k);
+
+		  if (incoming_chain->regno == chain->regno
+		      && incoming_chain->nregs == chain->nregs)
+		    {
+		      if (dump_file)
+			{
+			  if (!printed)
+			    fprintf (dump_file,
+				     "merging blocks for edge %d -> %d\n",
+				     e->src->index, e->dest->index);
+			  printed = true;
+			  fprintf (dump_file,
+				   "  merging chains %d (->%d) and %d (->%d) [%s]\n",
+				   k, incoming_chain->id, j, chain->id, 
+				   reg_names[incoming_chain->regno]);
+			}
+
+		      merge_chains (chain, incoming_chain);
+		      n_succs_joined++;
+		      break;
+		    }
+		}
+	    }
+	  if (n_succs_joined < n_succs_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n",
+			 j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+
+  free (rename_info);
+
+  FOR_EACH_BB (bb)
+    bb->aux = NULL;
+}
+
 static void
 do_replace (struct du_head *head, int reg)
 {
   struct du_chain *chain;
   unsigned int base_regno = head->regno;
 
-  gcc_assert (! DEBUG_INSN_P (head->first->insn));
-
   for (chain = head->first; chain; chain = chain->next_use)
     {
       unsigned int regno = ORIGINAL_REGNO (*chain->loc);
@@ -591,7 +1030,7 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
   rtx x = *loc;
   enum machine_mode mode = GET_MODE (x);
   unsigned this_regno = REGNO (x);
-  unsigned this_nregs = hard_regno_nregs[this_regno][mode];
+  int this_nregs = hard_regno_nregs[this_regno][mode];
 
   if (action == mark_write)
     {
@@ -691,8 +1130,6 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
 
 	  if (subset && !superset)
 	    head->cannot_rename = 1;
-	  head->next_chain = closed_chains;
-	  closed_chains = head;
 	  bitmap_clear_bit (&open_chains_set, head->id);
 
 	  nregs = head->nregs;
@@ -1078,28 +1515,14 @@ record_out_operands (rtx insn, bool earl
 
 /* Build def/use chain.  */
 
-static struct du_head *
+static bool
 build_def_use (basic_block bb)
 {
   rtx insn;
-  df_ref *def_rec;
   unsigned HOST_WIDE_INT untracked_operands;
 
-  open_chains = closed_chains = NULL;
-
   fail_current_block = false;
 
-  current_id = 0;
-  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
-  CLEAR_HARD_REG_SET (live_in_chains);
-  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
-  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
-	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
-    }
-
   for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
     {
       if (NONDEBUG_INSN_P (insn))
@@ -1338,14 +1761,10 @@ build_def_use (basic_block bb)
 	break;
     }
 
-  bitmap_clear (&open_chains_set);
-
   if (fail_current_block)
-    return NULL;
+    return false;
 
-  /* Since we close every chain when we find a REG_DEAD note, anything that
-     is still open lives past the basic block, so it can't be renamed.  */
-  return closed_chains;
+  return true;
 }
 \f
 /* Perform register renaming on the current function.  */
@@ -1353,44 +1772,20 @@ build_def_use (basic_block bb)
 static unsigned int
 regrename_optimize (void)
 {
-  basic_block bb;
-  char *first_obj;
-
   df_set_flags (DF_LR_RUN_DCE);
   df_note_add_problem ();
   df_analyze ();
   df_set_flags (DF_DEFER_INSN_RESCAN);
 
-  memset (tick, 0, sizeof tick);
-
   gcc_obstack_init (&rename_obstack);
-  first_obj = XOBNEWVAR (&rename_obstack, char, 0);
-
-  FOR_EACH_BB (bb)
-    {
-      struct du_head *all_chains = 0;
-
-      id_to_chain = VEC_alloc (du_head_p, heap, 0);
-
-      if (dump_file)
-	fprintf (dump_file, "\nBasic block %d:\n", bb->index);
 
-      all_chains = build_def_use (bb);
-
-      if (dump_file)
-	dump_def_use_chain (all_chains);
-
-      rename_chains (all_chains);
+  regrename_analyze ();
 
-      free_chain_data ();
-      obstack_free (&rename_obstack, first_obj);
-    }
+  rename_chains ();
 
+  free_chain_data ();
   obstack_free (&rename_obstack, NULL);
 
-  if (dump_file)
-    fputc ('\n', dump_file);
-
   return 0;
 }
 \f
Index: gcc/regs.h
===================================================================
--- gcc/regs.h	(revision 178293)
+++ gcc/regs.h	(working copy)
@@ -397,4 +397,48 @@ overlaps_hard_reg_set_p (const HARD_REG_
   return false;
 }
 
+/* Like add_to_hard_reg_set, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+
+static inline void
+add_range_to_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+			   int nregs)
+{
+  while (nregs-- > 0)
+    SET_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Likewise, but remove the registers.  */
+
+static inline void
+remove_range_from_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+				int nregs)
+{
+  while (nregs-- > 0)
+    CLEAR_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Like overlaps_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno,
+			       int nregs)
+{
+  while (nregs-- > 0)
+    if (TEST_HARD_REG_BIT (set, regno + nregs))
+      return true;
+  return false;
+}
+
+/* Like in_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs)
+{
+  while (nregs-- > 0)
+    if (!TEST_HARD_REG_BIT (set, regno + nregs))
+      return false;
+  return true;
+}
+
 #endif /* GCC_REGS_H */
Index: gcc/vec.h
===================================================================
--- gcc/vec.h	(revision 178293)
+++ gcc/vec.h	(working copy)
@@ -195,6 +195,11 @@ along with GCC; see the file COPYING3.
 #define FOR_EACH_VEC_ELT(T, V, I, P)		\
   for (I = 0; VEC_iterate (T, (V), (I), (P)); ++(I))
 
+/* Likewise, but start from FROM rather than 0.  */
+
+#define FOR_EACH_VEC_ELT_FROM(T, V, I, P, FROM)		\
+  for (I = (FROM); VEC_iterate (T, (V), (I), (P)); ++(I))
+
 /* Convenience macro for reverse iteration.  */
 
 #define FOR_EACH_VEC_ELT_REVERSE(T,V,I,P) \

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Rename across basic block boundaries
  2011-09-05 15:36           ` Bernd Schmidt
@ 2011-09-06 10:37             ` Richard Sandiford
  2011-09-07 16:07               ` Bernd Schmidt
  0 siblings, 1 reply; 11+ messages in thread
From: Richard Sandiford @ 2011-09-06 10:37 UTC (permalink / raw)
  To: Bernd Schmidt; +Cc: GCC Patches

Bernd Schmidt <bernds@codesourcery.com> writes:
> On 09/01/11 16:16, Richard Sandiford wrote:
>> Bernd Schmidt <bernds@codesourcery.com> writes:
>>> On 08/26/11 14:57, Richard Sandiford wrote:
>>>> Wouldn't a reverse post-order (inverted_post_order_compute) allow even
>>>> more pre-opening (as well as being less code)?
>>>
>>> I can't really tell from the comments what that function is supposed to
>>> produce.
>> 
>> Sorry, I thought it was supposed to produce a reverse postorder, but...
>> 
>>> I've made a change to use it to order the bbs, but that made rr
>>> visit basic blocks without seeing any of their predecessors first,
>> 
>> ...I guess not. :-)  pre_and_rev_post_order_compute should though.
>> Could you try that instead?
>
> That seems to work for me.

Looks good to me.  Maybe here:

> +  /* The order in which we visit blocks ensures that whenever
> +     possible, we only process a block after at least one of its
> +     predecessors, which provides a "seeding" effect to make the logic
> +     in set_incoming_from_chain and init_rename_info useful.  */
> +
> +  for (i = 0; i < n_bbs; i++)
> +    {
> +      basic_block bb1 = BASIC_BLOCK (inverse_postorder[i]);
> +      struct bb_rename_info *this_info = rename_info + i;
> [...]
> +      if (bb1->aux == NULL)
> +	continue;

it would be better to use:

  this_info = (struct bb_rename_info *) bb1->aux;

  if (this_info == NULL)
    continue;

so that we don't care which order the rename_info array is.  You could
then keep the original form of the first loop:

  /* Gather some information about the blocks in this function.  */
  rename_info = XCNEWVEC (struct bb_rename_info, n_basic_blocks);
  ri_index = 0;
  FOR_EACH_BB (bb)
    {
      struct bb_rename_info *ri = rename_info + ri_index;
      ri->bb = bb;
      ri->n_preds = EDGE_COUNT (bb->preds);
      bb->aux = ri;
      ri_index++;
    }

OK with me whichever.

Richard

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: Rename across basic block boundaries
  2011-09-06 10:37             ` Richard Sandiford
@ 2011-09-07 16:07               ` Bernd Schmidt
  0 siblings, 0 replies; 11+ messages in thread
From: Bernd Schmidt @ 2011-09-07 16:07 UTC (permalink / raw)
  To: GCC Patches, richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 418 bytes --]

On 09/06/11 12:37, Richard Sandiford wrote:

>  Maybe here:
[...]

> it would be better to use:
> 
>   this_info = (struct bb_rename_info *) bb1->aux;
> 
>   if (this_info == NULL)
>     continue;
> 
> so that we don't care which order the rename_info array is.  You could
> then keep the original form of the first loop:

> OK with me whichever.

Thanks! I've committed the following version after retesting.


Bernd

[-- Attachment #2: rr0907.diff --]
[-- Type: text/plain, Size: 24317 bytes --]

Index: gcc/regrename.c
===================================================================
--- gcc/regrename.c	(revision 178596)
+++ gcc/regrename.c	(working copy)
@@ -47,18 +47,24 @@
 
      1. Local def/use chains are built: within each basic block, chains are
 	opened and closed; if a chain isn't closed at the end of the block,
-	it is dropped.
+	it is dropped.  We pre-open chains if we have already examined a
+	predecessor block and found chains live at the end which match
+	live registers at the start of the new block.
 
-     2. For each chain, the set of possible renaming registers is computed.
+     2. We try to combine the local chains across basic block boundaries by
+        comparing chains that were open at the start or end of a block to
+	those in successor/predecessor blocks.
+
+     3. For each chain, the set of possible renaming registers is computed.
 	This takes into account the renaming of previously processed chains.
 	Optionally, a preferred class is computed for the renaming register.
 
-     3. The best renaming register is computed for the chain in the above set,
+     4. The best renaming register is computed for the chain in the above set,
 	using a round-robin allocation.  If a preferred class exists, then the
 	round-robin allocation is done within the class first, if possible.
 	The round-robin allocation of renaming registers itself is global.
 
-     4. If a renaming register has been found, it is substituted in the chain.
+     5. If a renaming register has been found, it is substituted in the chain.
 
   Targets can parameterize the pass by specifying a preferred class for the
   renaming register for a given (super)class of registers to be renamed.  */
@@ -75,8 +81,9 @@ struct du_head
   struct du_head *next_chain;
   /* The first and last elements of this chain.  */
   struct du_chain *first, *last;
-  /* Describes the register being tracked.  */
-  unsigned regno, nregs;
+  /* Describe the register being tracked, register number and count.  */
+  unsigned regno;
+  int nregs;
 
   /* A unique id to be used as an index into the conflicts bitmaps.  */
   unsigned id;
@@ -140,6 +147,7 @@ static struct obstack rename_obstack;
 static void do_replace (struct du_head *, int);
 static void scan_rtx (rtx, rtx *, enum reg_class, enum scan_actions,
 		      enum op_type);
+static bool build_def_use (basic_block);
 
 typedef struct du_head *du_head_p;
 DEF_VEC_P (du_head_p);
@@ -151,9 +159,8 @@ static unsigned current_id;
 /* A mapping of unique id numbers to chains.  */
 static VEC(du_head_p, heap) *id_to_chain;
 
-/* List of currently open chains, and closed chains that can be renamed.  */
+/* List of currently open chains.  */
 static struct du_head *open_chains;
-static struct du_head *closed_chains;
 
 /* Bitmap of open chains.  The bits set always match the list found in
    open_chains.  */
@@ -166,14 +173,33 @@ static HARD_REG_SET live_in_chains;
    between this and live_in_chains is empty.  */
 static HARD_REG_SET live_hard_regs;
 
-/* Dump all def/use chains in CHAINS to DUMP_FILE.  */
+/* Return the chain corresponding to id number ID.  Take into account that
+   chains may have been merged.  */
+static du_head_p
+chain_from_id (unsigned int id)
+{
+  du_head_p first_chain = VEC_index (du_head_p, id_to_chain, id);
+  du_head_p chain = first_chain;
+  while (chain->id != id)
+    {
+      id = chain->id;
+      chain = VEC_index (du_head_p, id_to_chain, id);
+    }
+  first_chain->id = id;
+  return chain;
+}
+
+/* Dump all def/use chains, starting at id FROM.  */
 
 static void
-dump_def_use_chain (struct du_head *head)
+dump_def_use_chain (int from)
 {
-  while (head)
+  du_head_p head;
+  int i;
+  FOR_EACH_VEC_ELT_FROM (du_head_p, id_to_chain, i, head, from)
     {
       struct du_chain *this_du = head->first;
+
       fprintf (dump_file, "Register %s (%d):",
 	       reg_names[head->regno], head->nregs);
       while (this_du)
@@ -215,7 +241,7 @@ mark_conflict (struct du_head *chains, u
    and record its occurrence in *LOC, which is being written to in INSN.
    This access requires a register of class CL.  */
 
-static void
+static du_head_p
 create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc,
 		  rtx insn, enum reg_class cl)
 {
@@ -224,7 +250,6 @@ create_new_chain (unsigned this_regno, u
   int nregs;
 
   head->next_chain = open_chains;
-  open_chains = head;
   head->regno = this_regno;
   head->nregs = this_nregs;
   head->need_caller_save_reg = 0;
@@ -264,7 +289,7 @@ create_new_chain (unsigned this_regno, u
   if (insn == NULL_RTX)
     {
       head->first = head->last = NULL;
-      return;
+      return head;
     }
 
   this_du = XOBNEW (&rename_obstack, struct du_chain);
@@ -274,6 +299,7 @@ create_new_chain (unsigned this_regno, u
   this_du->loc = loc;
   this_du->insn = insn;
   this_du->cl = cl;
+  return head;
 }
 
 /* For a def-use chain HEAD, find which registers overlap its lifetime and
@@ -287,8 +313,9 @@ merge_overlapping_regs (HARD_REG_SET *ps
   IOR_HARD_REG_SET (*pset, head->hard_conflicts);
   EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi)
     {
-      du_head_p other = VEC_index (du_head_p, id_to_chain, i);
+      du_head_p other = chain_from_id (i);
       unsigned j = other->nregs;
+      gcc_assert (other != head);
       while (j-- > 0)
 	SET_HARD_REG_BIT (*pset, other->regno + j);
     }
@@ -341,12 +368,15 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSE
   return true;
 }
 
-/* Process the closed chains starting with ALL_CHAINS and rename
-   registers if possible.  */
+/* Perform register renaming on the current function.  */
 static void
-rename_chains (du_head_p all_chains)
+rename_chains (void)
 {
   HARD_REG_SET unavailable;
+  du_head_p this_head;
+  int i;
+
+  memset (tick, 0, sizeof tick);
 
   CLEAR_HARD_REG_SET (unavailable);
   /* Don't clobber traceback for noreturn functions.  */
@@ -358,11 +388,10 @@ rename_chains (du_head_p all_chains)
 #endif
     }
 
-  while (all_chains)
+  FOR_EACH_VEC_ELT (du_head_p, id_to_chain, i, this_head)
     {
       int new_reg, best_new_reg, best_nregs;
       int n_uses;
-      struct du_head *this_head = all_chains;
       struct du_chain *tmp;
       HARD_REG_SET this_unavailable;
       int reg = this_head->regno;
@@ -371,8 +400,6 @@ rename_chains (du_head_p all_chains)
       enum reg_class preferred_class;
       bool has_preferred_class;
 
-      all_chains = this_head->next_chain;
-
       if (this_head->cannot_rename)
 	continue;
 
@@ -488,14 +515,425 @@ rename_chains (du_head_p all_chains)
     }
 }
 
+/* A structure to record information for each hard register at the start of
+   a basic block.  */
+struct incoming_reg_info {
+  /* Holds the number of registers used in the chain that gave us information
+     about this register.  Zero means no information known yet, while a
+     negative value is used for something that is part of, but not the first
+     register in a multi-register value.  */
+  int nregs;
+  /* Set to true if we have accesses that conflict in the number of registers
+     used.  */
+  bool unusable;
+};
+
+/* A structure recording information about each basic block.  It is saved
+   and restored around basic block boundaries.
+   A pointer to such a structure is stored in each basic block's aux field
+   during regrename_analyze, except for blocks we know can't be optimized
+   (such as entry and exit blocks).  */
+struct bb_rename_info
+{
+  /* The basic block corresponding to this structure.  */
+  basic_block bb;
+  /* Copies of the global information.  */
+  bitmap_head open_chains_set;
+  bitmap_head incoming_open_chains_set;
+  struct incoming_reg_info incoming[FIRST_PSEUDO_REGISTER];
+};
+
+/* Initialize a rename_info structure P for basic block BB, which starts a new
+   scan.  */
+static void
+init_rename_info (struct bb_rename_info *p, basic_block bb)
+{
+  int i;
+  df_ref *def_rec;
+  HARD_REG_SET start_chains_set;
+
+  p->bb = bb;
+  bitmap_initialize (&p->open_chains_set, &bitmap_default_obstack);
+  bitmap_initialize (&p->incoming_open_chains_set, &bitmap_default_obstack);
+
+  open_chains = NULL;
+  bitmap_clear (&open_chains_set);
+
+  CLEAR_HARD_REG_SET (live_in_chains);
+  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
+  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
+    {
+      df_ref def = *def_rec;
+      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
+	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
+    }
+
+  /* Open chains based on information from (at least one) predecessor
+     block.  This gives us a chance later on to combine chains across
+     basic block boundaries.  Inconsistencies (in access sizes) will
+     be caught normally and dealt with conservatively by disabling the
+     chain for renaming, and there is no risk of losing optimization
+     opportunities by opening chains either: if we did not open the
+     chains, we'd have to track the live register as a hard reg, and
+     we'd be unable to rename it in any case.  */
+  CLEAR_HARD_REG_SET (start_chains_set);
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (iri->nregs > 0 && !iri->unusable
+	  && range_in_hard_reg_set_p (live_hard_regs, i, iri->nregs))
+	{
+	  SET_HARD_REG_BIT (start_chains_set, i);
+	  remove_range_from_hard_reg_set (&live_hard_regs, i, iri->nregs);
+	}
+    }
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      struct incoming_reg_info *iri = p->incoming + i;
+      if (TEST_HARD_REG_BIT (start_chains_set, i))
+	{
+	  du_head_p chain;
+	  if (dump_file)
+	    fprintf (dump_file, "opening incoming chain\n");
+	  chain = create_new_chain (i, iri->nregs, NULL, NULL_RTX, NO_REGS);
+	  bitmap_set_bit (&p->incoming_open_chains_set, chain->id);
+	}
+    }
+}
+
+/* Record in RI that the block corresponding to it has an incoming
+   live value, described by CHAIN.  */
+static void
+set_incoming_from_chain (struct bb_rename_info *ri, du_head_p chain)
+{
+  int i;
+  int incoming_nregs = ri->incoming[chain->regno].nregs;
+  int nregs;
+
+  /* If we've recorded the same information before, everything is fine.  */
+  if (incoming_nregs == chain->nregs)
+    {
+      if (dump_file)
+	fprintf (dump_file, "reg %d/%d already recorded\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* If we have no information for any of the involved registers, update
+     the incoming array.  */
+  nregs = chain->nregs;
+  while (nregs-- > 0)
+    if (ri->incoming[chain->regno + nregs].nregs != 0
+	|| ri->incoming[chain->regno + nregs].unusable)
+      break;
+  if (nregs < 0)
+    {
+      nregs = chain->nregs;
+      ri->incoming[chain->regno].nregs = nregs;
+      while (nregs-- > 1)
+	ri->incoming[chain->regno + nregs].nregs = -nregs;
+      if (dump_file)
+	fprintf (dump_file, "recorded reg %d/%d\n",
+		 chain->regno, chain->nregs);
+      return;
+    }
+
+  /* There must be some kind of conflict.  Prevent both the old and
+     new ranges from being used.  */
+  if (incoming_nregs < 0)
+    ri->incoming[chain->regno + incoming_nregs].unusable = true;
+  for (i = 0; i < chain->nregs; i++)
+    ri->incoming[chain->regno + i].unusable = true;
+}
+
+/* Merge the two chains C1 and C2 so that all conflict information is
+   recorded and C1, and the id of C2 is changed to that of C1.  */
+static void
+merge_chains (du_head_p c1, du_head_p c2)
+{
+  if (c1 == c2)
+    return;
+
+  if (c2->first != NULL)
+    {
+      if (c1->first == NULL)
+	c1->first = c2->first;
+      else
+	c1->last->next_use = c2->first;
+      c1->last = c2->last;
+    }
+
+  c2->first = c2->last = NULL;
+  c2->id = c1->id;
+
+  IOR_HARD_REG_SET (c1->hard_conflicts, c2->hard_conflicts);
+  bitmap_ior_into (&c1->conflicts, &c2->conflicts);
+
+  c1->need_caller_save_reg |= c2->need_caller_save_reg;
+  c1->cannot_rename |= c2->cannot_rename;
+}
+
+/* Analyze the current function and build chains for renaming.  */
+
+static void
+regrename_analyze (void)
+{
+  struct bb_rename_info *rename_info;
+  int i;
+  basic_block bb;
+  int n_bbs;
+  int *inverse_postorder;
+
+  inverse_postorder = XNEWVEC (int, last_basic_block);
+  n_bbs = pre_and_rev_post_order_compute (NULL, inverse_postorder, false);
+
+  /* Gather some information about the blocks in this function.  */
+  rename_info = XCNEWVEC (struct bb_rename_info, n_basic_blocks);
+  i = 0;
+  FOR_EACH_BB (bb)
+    {
+      struct bb_rename_info *ri = rename_info + i;
+      ri->bb = bb;
+      bb->aux = ri;
+      i++;
+    }
+
+  current_id = 0;
+  id_to_chain = VEC_alloc (du_head_p, heap, 0);
+  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
+
+  /* The order in which we visit blocks ensures that whenever
+     possible, we only process a block after at least one of its
+     predecessors, which provides a "seeding" effect to make the logic
+     in set_incoming_from_chain and init_rename_info useful.  */
+
+  for (i = 0; i < n_bbs; i++)
+    {
+      basic_block bb1 = BASIC_BLOCK (inverse_postorder[i]);
+      struct bb_rename_info *this_info;
+      bool success;
+      edge e;
+      edge_iterator ei;
+      int old_length = VEC_length (du_head_p, id_to_chain);
+
+      this_info = (struct bb_rename_info *) bb1->aux;
+      if (this_info == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "\nprocessing block %d:\n", bb1->index);
+
+      init_rename_info (this_info, bb1);
+
+      success = build_def_use (bb1);
+      if (!success)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "failed\n");
+	  bb1->aux = NULL;
+	  VEC_truncate (du_head_p, id_to_chain, old_length);
+	  current_id = old_length;
+	  bitmap_clear (&this_info->incoming_open_chains_set);
+	  open_chains = NULL;
+	  continue;
+	}
+
+      if (dump_file)
+	dump_def_use_chain (old_length);
+      bitmap_copy (&this_info->open_chains_set, &open_chains_set);
+
+      /* Add successor blocks to the worklist if necessary, and record
+	 data about our own open chains at the end of this block, which
+	 will be used to pre-open chains when processing the successors.  */
+      FOR_EACH_EDGE (e, ei, bb1->succs)
+	{
+	  struct bb_rename_info *dest_ri;
+	  struct du_head *chain;
+
+	  if (dump_file)
+	    fprintf (dump_file, "successor block %d\n", e->dest->index);
+
+	  if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+	    continue;
+	  dest_ri = (struct bb_rename_info *)e->dest->aux;
+	  if (dest_ri == NULL)
+	    continue;
+	  for (chain = open_chains; chain; chain = chain->next_chain)
+	    set_incoming_from_chain (dest_ri, chain);
+	}
+    }
+
+  free (inverse_postorder);
+
+  /* Now, combine the chains data we have gathered across basic block
+     boundaries.
+
+     For every basic block, there may be chains open at the start, or at the
+     end.  Rather than exclude them from renaming, we look for open chains
+     with matching registers at the other side of the CFG edge.
+
+     For a given chain using register R, open at the start of block B, we
+     must find an open chain using R on the other side of every edge leading
+     to B, if the register is live across this edge.  In the code below,
+     N_PREDS_USED counts the number of edges where the register is live, and
+     N_PREDS_JOINED counts those where we found an appropriate chain for
+     joining.
+
+     We perform the analysis for both incoming and outgoing edges, but we
+     only need to merge once (in the second part, after verifying outgoing
+     edges).  */
+  FOR_EACH_BB (bb)
+    {
+      struct bb_rename_info *bb_ri = (struct bb_rename_info *) bb->aux;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d in edges\n", bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->incoming_open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_preds_used = 0, n_preds_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb->preds)
+	    {
+	      struct bb_rename_info *src_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+	      bool success = false;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_out (e->src));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      n_preds_used++;
+
+	      if (e->flags & (EDGE_EH | EDGE_ABNORMAL))
+		continue;
+
+	      src_ri = (struct bb_rename_info *)e->src->aux;
+	      if (src_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&src_ri->open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *outgoing_chain = chain_from_id (k);
+
+		  if (outgoing_chain->regno == chain->regno
+		      && outgoing_chain->nregs == chain->nregs)
+		    {
+		      n_preds_joined++;
+		      success = true;
+		      break;
+		    }
+		}
+	      if (!success && dump_file)
+		fprintf (dump_file, "failure to match with pred block %d\n",
+			 e->src->index);
+	    }
+	  if (n_preds_joined < n_preds_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n", j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+  FOR_EACH_BB (bb)
+    {
+      struct bb_rename_info *bb_ri = (struct bb_rename_info *) bb->aux;
+      unsigned j;
+      bitmap_iterator bi;
+
+      if (bb_ri == NULL)
+	continue;
+
+      if (dump_file)
+	fprintf (dump_file, "processing bb %d out edges\n", bb->index);
+
+      EXECUTE_IF_SET_IN_BITMAP (&bb_ri->open_chains_set, 0, j, bi)
+	{
+	  edge e;
+	  edge_iterator ei;
+	  struct du_head *chain = chain_from_id (j);
+	  int n_succs_used = 0, n_succs_joined = 0;
+
+	  FOR_EACH_EDGE (e, ei, bb->succs)
+	    {
+	      bool printed = false;
+	      struct bb_rename_info *dest_ri;
+	      unsigned k;
+	      bitmap_iterator bi2;
+	      HARD_REG_SET live;
+
+	      REG_SET_TO_HARD_REG_SET (live, df_get_live_in (e->dest));
+	      if (!range_overlaps_hard_reg_set_p (live, chain->regno,
+						  chain->nregs))
+		continue;
+	      
+	      n_succs_used++;
+
+	      dest_ri = (struct bb_rename_info *)e->dest->aux;
+	      if (dest_ri == NULL)
+		continue;
+
+	      EXECUTE_IF_SET_IN_BITMAP (&dest_ri->incoming_open_chains_set,
+					0, k, bi2)
+		{
+		  struct du_head *incoming_chain = chain_from_id (k);
+
+		  if (incoming_chain->regno == chain->regno
+		      && incoming_chain->nregs == chain->nregs)
+		    {
+		      if (dump_file)
+			{
+			  if (!printed)
+			    fprintf (dump_file,
+				     "merging blocks for edge %d -> %d\n",
+				     e->src->index, e->dest->index);
+			  printed = true;
+			  fprintf (dump_file,
+				   "  merging chains %d (->%d) and %d (->%d) [%s]\n",
+				   k, incoming_chain->id, j, chain->id, 
+				   reg_names[incoming_chain->regno]);
+			}
+
+		      merge_chains (chain, incoming_chain);
+		      n_succs_joined++;
+		      break;
+		    }
+		}
+	    }
+	  if (n_succs_joined < n_succs_used)
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "cannot rename chain %d\n",
+			 j);
+	      chain->cannot_rename = 1;
+	    }
+	}
+    }
+
+  free (rename_info);
+
+  FOR_EACH_BB (bb)
+    bb->aux = NULL;
+}
+
 static void
 do_replace (struct du_head *head, int reg)
 {
   struct du_chain *chain;
   unsigned int base_regno = head->regno;
 
-  gcc_assert (! DEBUG_INSN_P (head->first->insn));
-
   for (chain = head->first; chain; chain = chain->next_use)
     {
       unsigned int regno = ORIGINAL_REGNO (*chain->loc);
@@ -591,7 +1029,7 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
   rtx x = *loc;
   enum machine_mode mode = GET_MODE (x);
   unsigned this_regno = REGNO (x);
-  unsigned this_nregs = hard_regno_nregs[this_regno][mode];
+  int this_nregs = hard_regno_nregs[this_regno][mode];
 
   if (action == mark_write)
     {
@@ -691,8 +1129,6 @@ scan_rtx_reg (rtx insn, rtx *loc, enum r
 
 	  if (subset && !superset)
 	    head->cannot_rename = 1;
-	  head->next_chain = closed_chains;
-	  closed_chains = head;
 	  bitmap_clear_bit (&open_chains_set, head->id);
 
 	  nregs = head->nregs;
@@ -1078,28 +1514,14 @@ record_out_operands (rtx insn, bool earl
 
 /* Build def/use chain.  */
 
-static struct du_head *
+static bool
 build_def_use (basic_block bb)
 {
   rtx insn;
-  df_ref *def_rec;
   unsigned HOST_WIDE_INT untracked_operands;
 
-  open_chains = closed_chains = NULL;
-
   fail_current_block = false;
 
-  current_id = 0;
-  bitmap_initialize (&open_chains_set, &bitmap_default_obstack);
-  CLEAR_HARD_REG_SET (live_in_chains);
-  REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_in (bb));
-  for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++)
-    {
-      df_ref def = *def_rec;
-      if (DF_REF_FLAGS (def) & DF_REF_AT_TOP)
-	SET_HARD_REG_BIT (live_hard_regs, DF_REF_REGNO (def));
-    }
-
   for (insn = BB_HEAD (bb); ; insn = NEXT_INSN (insn))
     {
       if (NONDEBUG_INSN_P (insn))
@@ -1338,14 +1760,10 @@ build_def_use (basic_block bb)
 	break;
     }
 
-  bitmap_clear (&open_chains_set);
-
   if (fail_current_block)
-    return NULL;
+    return false;
 
-  /* Since we close every chain when we find a REG_DEAD note, anything that
-     is still open lives past the basic block, so it can't be renamed.  */
-  return closed_chains;
+  return true;
 }
 \f
 /* Perform register renaming on the current function.  */
@@ -1353,44 +1771,20 @@ build_def_use (basic_block bb)
 static unsigned int
 regrename_optimize (void)
 {
-  basic_block bb;
-  char *first_obj;
-
   df_set_flags (DF_LR_RUN_DCE);
   df_note_add_problem ();
   df_analyze ();
   df_set_flags (DF_DEFER_INSN_RESCAN);
 
-  memset (tick, 0, sizeof tick);
-
   gcc_obstack_init (&rename_obstack);
-  first_obj = XOBNEWVAR (&rename_obstack, char, 0);
-
-  FOR_EACH_BB (bb)
-    {
-      struct du_head *all_chains = 0;
-
-      id_to_chain = VEC_alloc (du_head_p, heap, 0);
-
-      if (dump_file)
-	fprintf (dump_file, "\nBasic block %d:\n", bb->index);
 
-      all_chains = build_def_use (bb);
-
-      if (dump_file)
-	dump_def_use_chain (all_chains);
-
-      rename_chains (all_chains);
+  regrename_analyze ();
 
-      free_chain_data ();
-      obstack_free (&rename_obstack, first_obj);
-    }
+  rename_chains ();
 
+  free_chain_data ();
   obstack_free (&rename_obstack, NULL);
 
-  if (dump_file)
-    fputc ('\n', dump_file);
-
   return 0;
 }
 \f
Index: gcc/regs.h
===================================================================
--- gcc/regs.h	(revision 178596)
+++ gcc/regs.h	(working copy)
@@ -397,4 +397,48 @@ overlaps_hard_reg_set_p (const HARD_REG_
   return false;
 }
 
+/* Like add_to_hard_reg_set, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+
+static inline void
+add_range_to_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+			   int nregs)
+{
+  while (nregs-- > 0)
+    SET_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Likewise, but remove the registers.  */
+
+static inline void
+remove_range_from_hard_reg_set (HARD_REG_SET *regs, unsigned int regno,
+				int nregs)
+{
+  while (nregs-- > 0)
+    CLEAR_HARD_REG_BIT (*regs, regno + nregs);
+}
+
+/* Like overlaps_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno,
+			       int nregs)
+{
+  while (nregs-- > 0)
+    if (TEST_HARD_REG_BIT (set, regno + nregs))
+      return true;
+  return false;
+}
+
+/* Like in_hard_reg_set_p, but use a REGNO/NREGS range instead of
+   REGNO and MODE.  */
+static inline bool
+range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs)
+{
+  while (nregs-- > 0)
+    if (!TEST_HARD_REG_BIT (set, regno + nregs))
+      return false;
+  return true;
+}
+
 #endif /* GCC_REGS_H */
Index: gcc/vec.h
===================================================================
--- gcc/vec.h	(revision 178596)
+++ gcc/vec.h	(working copy)
@@ -195,6 +195,11 @@ along with GCC; see the file COPYING3.
 #define FOR_EACH_VEC_ELT(T, V, I, P)		\
   for (I = 0; VEC_iterate (T, (V), (I), (P)); ++(I))
 
+/* Likewise, but start from FROM rather than 0.  */
+
+#define FOR_EACH_VEC_ELT_FROM(T, V, I, P, FROM)		\
+  for (I = (FROM); VEC_iterate (T, (V), (I), (P)); ++(I))
+
 /* Convenience macro for reverse iteration.  */
 
 #define FOR_EACH_VEC_ELT_REVERSE(T,V,I,P) \

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2011-09-07 15:59 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-21 14:12 Rename across basic block boundaries Bernd Schmidt
2011-08-23 10:56 ` Ping: " Bernd Schmidt
2011-08-24 12:50 ` Richard Sandiford
2011-08-25 14:39   ` Bernd Schmidt
2011-08-25 19:04   ` Bernd Schmidt
2011-08-26 13:33     ` Richard Sandiford
2011-09-01 13:02       ` Bernd Schmidt
2011-09-01 14:16         ` Richard Sandiford
2011-09-05 15:36           ` Bernd Schmidt
2011-09-06 10:37             ` Richard Sandiford
2011-09-07 16:07               ` Bernd Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).