public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-6414] ira: Try to avoid propagating conflicts
@ 2022-01-10 14:47 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2022-01-10 14:47 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8e7a23728f66d2da88b47e34224410457fdefbf5

commit r12-6414-g8e7a23728f66d2da88b47e34224410457fdefbf5
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Mon Jan 10 14:47:08 2022 +0000

    ira: Try to avoid propagating conflicts
    
    Suppose that:
    
    - an inner loop L contains an allocno A
    - L clobbers hard register R while A is live
    - A's parent allocno is AP
    
    Previously, propagate_allocno_info would propagate conflict sets up the
    loop tree, so that the conflict between A and R would become a conflict
    between AP and R (and so on for ancestors of AP).
    
    However, when IRA treats loops as separate allocation regions, it can
    decide on a loop-by-loop basis whether to allocate a register or spill
    to memory.  Conflicts in inner loops therefore don't need to become
    hard conflicts in parent loops.  Instead we can record that using the
    “conflicting” registers for the parent allocnos has a higher cost.
    In the example above, this higher cost is the sum of:
    
    - the cost of saving R on entry to L
    - the cost of keeping the pseudo register in memory throughout L
    - the cost of reloading R on exit from L
    
    This value is also a cap on the hard register cost that A can contribute
    to AP in general (not just for conflicts).  Whatever allocation we pick
    for AP, there is always the option of spilling that register to memory
    throughout L, so the cost to A of allocating a register to AP can't be
    more than the cost of spilling A.
    
    To take an extreme example: if allocating a register R2 to A is more
    expensive than spilling A to memory, ALLOCNO_HARD_REG_COSTS (A)[R2]
    could be (say) 2 times greater than ALLOCNO_MEMORY_COST (A) or 100
    times greater than ALLOCNO_MEMORY_COST (A).  But this scale factor
    doesn't matter to AP.  All that matters is that R2 is more expensive
    than memory for A, so that allocating R2 to AP should be costed as
    spilling A to memory (again assuming that A and AP are in different
    allocation regions).  Propagating a factor of 100 would distort the
    register costs for AP.
    
    move_spill_restore tries to undo the propagation done by
    propagate_allocno_info, so we need some extra processing there.
    
    gcc/
            PR rtl-optimization/98782
            * ira-int.h (ira_allocno::might_conflict_with_parent_p): New field.
            (ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P): New macro.
            (ira_single_region_allocno_p): New function.
            (ira_total_conflict_hard_regs): Likewise.
            * ira-build.c (ira_create_allocno): Initialize
            ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P.
            (ira_propagate_hard_reg_costs): New function.
            (propagate_allocno_info): Use it.  Try to avoid propagating
            hard register conflicts to parent allocnos if we can handle
            the conflicts by spilling instead.  Limit the propagated
            register costs to the cost of spilling throughout the child loop.
            * ira-color.c (color_pass): Use ira_single_region_allocno_p to
            test whether a child and parent allocno can share the same
            register.
            (move_spill_restore): Adjust for the new behavior of
            propagate_allocno_info.
    
    gcc/testsuite/
            * gcc.target/aarch64/reg-alloc-2.c: New test.

Diff:
---
 gcc/ira-build.c                                | 55 +++++++++++++++++++++++---
 gcc/ira-color.c                                | 53 ++++++++++++++++---------
 gcc/ira-int.h                                  | 37 +++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c | 47 ++++++++++++++++++++++
 4 files changed, 169 insertions(+), 23 deletions(-)

diff --git a/gcc/ira-build.c b/gcc/ira-build.c
index 0547edfde55..875b4d8ed7c 100644
--- a/gcc/ira-build.c
+++ b/gcc/ira-build.c
@@ -497,6 +497,7 @@ ira_create_allocno (int regno, bool cap_p,
   bitmap_set_bit (loop_tree_node->all_allocnos, ALLOCNO_NUM (a));
   ALLOCNO_NREFS (a) = 0;
   ALLOCNO_FREQ (a) = 0;
+  ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (a) = false;
   ALLOCNO_HARD_REGNO (a) = -1;
   ALLOCNO_CALL_FREQ (a) = 0;
   ALLOCNO_CALLS_CROSSED_NUM (a) = 0;
@@ -1991,6 +1992,35 @@ propagate_modified_regnos (ira_loop_tree_node_t loop_tree_node)
 		   loop_tree_node->modified_regnos);
 }
 
+/* Propagate ALLOCNO_HARD_REG_COSTS from A to PARENT_A.  Use SPILL_COST
+   as the cost of spilling a register throughout A (which we have to do
+   for PARENT_A allocations that conflict with A).  */
+static void
+ira_propagate_hard_reg_costs (ira_allocno_t parent_a, ira_allocno_t a,
+			      int spill_cost)
+{
+  HARD_REG_SET conflicts = ira_total_conflict_hard_regs (a);
+  conflicts &= ~ira_total_conflict_hard_regs (parent_a);
+
+  auto costs = ALLOCNO_HARD_REG_COSTS (a);
+  if (!hard_reg_set_empty_p (conflicts))
+    ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (a) = true;
+  else if (!costs)
+    return;
+
+  auto aclass = ALLOCNO_CLASS (a);
+  ira_allocate_and_set_costs (&ALLOCNO_HARD_REG_COSTS (parent_a),
+			      aclass, ALLOCNO_CLASS_COST (parent_a));
+  auto parent_costs = ALLOCNO_HARD_REG_COSTS (parent_a);
+  for (int i = 0; i < ira_class_hard_regs_num[aclass]; ++i)
+    if (TEST_HARD_REG_BIT (conflicts, ira_class_hard_regs[aclass][i]))
+      parent_costs[i] += spill_cost;
+    else if (costs)
+      /* The cost to A of allocating this register to PARENT_A can't
+	 be more than the cost of spilling the register throughout A.  */
+      parent_costs[i] += MIN (costs[i], spill_cost);
+}
+
 /* Propagate new info about allocno A (see comments about accumulated
    info in allocno definition) to the corresponding allocno on upper
    loop tree level.  So allocnos on upper levels accumulate
@@ -2019,12 +2049,27 @@ propagate_allocno_info (void)
 	  && bitmap_bit_p (ALLOCNO_LOOP_TREE_NODE (a)->border_allocnos,
 			   ALLOCNO_NUM (a)))
 	{
+	  /* Calculate the cost of storing to memory on entry to A's loop,
+	     referencing as memory within A's loop, and restoring from
+	     memory on exit from A's loop.  */
+	  ira_loop_border_costs border_costs (a);
+	  int spill_cost = INT_MAX;
+	  if (ira_subloop_allocnos_can_differ_p (parent_a))
+	    spill_cost = (border_costs.spill_inside_loop_cost ()
+			  + ALLOCNO_MEMORY_COST (a));
+
 	  if (! ALLOCNO_BAD_SPILL_P (a))
 	    ALLOCNO_BAD_SPILL_P (parent_a) = false;
 	  ALLOCNO_NREFS (parent_a) += ALLOCNO_NREFS (a);
 	  ALLOCNO_FREQ (parent_a) += ALLOCNO_FREQ (a);
+
+	  /* If A's allocation can differ from PARENT_A's, we can if necessary
+	     spill PARENT_A on entry to A's loop and restore it afterwards.
+	     Doing that has cost SPILL_COST.  */
+	  if (!ira_subloop_allocnos_can_differ_p (parent_a))
+	    merge_hard_reg_conflicts (a, parent_a, true);
+
 	  ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a);
-	  merge_hard_reg_conflicts (a, parent_a, true);
 	  ALLOCNO_CALLS_CROSSED_NUM (parent_a)
 	    += ALLOCNO_CALLS_CROSSED_NUM (a);
 	  ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a)
@@ -2037,15 +2082,15 @@ propagate_allocno_info (void)
 	    += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a);
 	  aclass = ALLOCNO_CLASS (a);
 	  ira_assert (aclass == ALLOCNO_CLASS (parent_a));
-	  ira_allocate_and_accumulate_costs
-	    (&ALLOCNO_HARD_REG_COSTS (parent_a), aclass,
-	     ALLOCNO_HARD_REG_COSTS (a));
+	  ira_propagate_hard_reg_costs (parent_a, a, spill_cost);
 	  ira_allocate_and_accumulate_costs
 	    (&ALLOCNO_CONFLICT_HARD_REG_COSTS (parent_a),
 	     aclass,
 	     ALLOCNO_CONFLICT_HARD_REG_COSTS (a));
+	  /* The cost to A of allocating a register to PARENT_A can't be
+	     more than the cost of spilling the register throughout A.  */
 	  ALLOCNO_CLASS_COST (parent_a)
-	    += ALLOCNO_CLASS_COST (a);
+	    += MIN (ALLOCNO_CLASS_COST (a), spill_cost);
 	  ALLOCNO_MEMORY_COST (parent_a) += ALLOCNO_MEMORY_COST (a);
 	}
 }
diff --git a/gcc/ira-color.c b/gcc/ira-color.c
index ae0f08af4b3..4344ee6689e 100644
--- a/gcc/ira-color.c
+++ b/gcc/ira-color.c
@@ -3344,7 +3344,7 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
   unsigned int j;
   bitmap_iterator bi;
   machine_mode mode;
-  enum reg_class rclass, aclass, pclass;
+  enum reg_class rclass, aclass;
   ira_allocno_t a, subloop_allocno;
   ira_loop_tree_node_t subloop_node;
 
@@ -3389,10 +3389,9 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
 	/* Remove from processing in the next loop.  */
 	bitmap_clear_bit (consideration_allocno_bitmap, j);
 	rclass = ALLOCNO_CLASS (a);
-	pclass = ira_pressure_class_translate[rclass];
-	if (flag_ira_region == IRA_REGION_MIXED
-	    && (loop_tree_node->reg_pressure[pclass]
-		<= ira_class_hard_regs_num[pclass]))
+	subloop_allocno = ALLOCNO_CAP_MEMBER (a);
+	subloop_node = ALLOCNO_LOOP_TREE_NODE (subloop_allocno);
+	if (ira_single_region_allocno_p (a, subloop_allocno))
 	  {
 	    mode = ALLOCNO_MODE (a);
 	    hard_regno = ALLOCNO_HARD_REGNO (a);
@@ -3402,8 +3401,6 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
 		ira_assert (index >= 0);
 	      }
 	    regno = ALLOCNO_REGNO (a);
-	    subloop_allocno = ALLOCNO_CAP_MEMBER (a);
-	    subloop_node = ALLOCNO_LOOP_TREE_NODE (subloop_allocno);
 	    ira_assert (!ALLOCNO_ASSIGNED_P (subloop_allocno));
 	    ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno;
 	    ALLOCNO_ASSIGNED_P (subloop_allocno) = true;
@@ -3426,7 +3423,6 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
 	  ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
 	  mode = ALLOCNO_MODE (a);
 	  rclass = ALLOCNO_CLASS (a);
-	  pclass = ira_pressure_class_translate[rclass];
 	  hard_regno = ALLOCNO_HARD_REGNO (a);
 	  /* Use hard register class here.  ??? */
 	  if (hard_regno >= 0)
@@ -3443,11 +3439,11 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
 	  ira_assert (ALLOCNO_CLASS (subloop_allocno) == rclass);
 	  ira_assert (bitmap_bit_p (subloop_node->all_allocnos,
 				    ALLOCNO_NUM (subloop_allocno)));
-	  if ((flag_ira_region == IRA_REGION_MIXED
-	       && (loop_tree_node->reg_pressure[pclass]
-		   <= ira_class_hard_regs_num[pclass]))
+	  if (ira_single_region_allocno_p (a, subloop_allocno)
 	      || !ira_subloop_allocnos_can_differ_p (a, hard_regno >= 0))
 	    {
+	      gcc_assert (!ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P
+			  (subloop_allocno));
 	      if (! ALLOCNO_ASSIGNED_P (subloop_allocno))
 		{
 		  ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno;
@@ -3583,14 +3579,35 @@ move_spill_restore (void)
 	      if (subloop_allocno == NULL)
 		continue;
 	      ira_assert (rclass == ALLOCNO_CLASS (subloop_allocno));
-	      /* We have accumulated cost.  To get the real cost of
-		 allocno usage in the loop we should subtract costs of
-		 the subloop allocnos.  */
-	      cost -= (ALLOCNO_MEMORY_COST (subloop_allocno)
-		       - (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL
-			  ? ALLOCNO_CLASS_COST (subloop_allocno)
-			  : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index]));
 	      ira_loop_border_costs border_costs (subloop_allocno);
+
+	      /* We have accumulated cost.  To get the real cost of
+		 allocno usage in the loop we should subtract the costs
+		 added by propagate_allocno_info for the subloop allocnos.  */
+	      int reg_cost
+		= (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL
+		   ? ALLOCNO_CLASS_COST (subloop_allocno)
+		   : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index]);
+
+	      int spill_cost
+		= (border_costs.spill_inside_loop_cost ()
+		   + ALLOCNO_MEMORY_COST (subloop_allocno));
+
+	      /* If HARD_REGNO conflicts with SUBLOOP_A then
+		 propagate_allocno_info will have propagated
+		 the cost of spilling HARD_REGNO in SUBLOOP_NODE.
+		 (ira_subloop_allocnos_can_differ_p must be true
+		 in that case.)  Otherwise, SPILL_COST acted as
+		 a cap on the propagated register cost, in cases
+		 where the allocations can differ.  */
+	      auto conflicts = ira_total_conflict_hard_regs (subloop_allocno);
+	      if (TEST_HARD_REG_BIT (conflicts, hard_regno))
+		reg_cost = spill_cost;
+	      else if (ira_subloop_allocnos_can_differ_p (a))
+		reg_cost = MIN (reg_cost, spill_cost);
+
+	      cost -= ALLOCNO_MEMORY_COST (subloop_allocno) - reg_cost;
+
 	      if ((hard_regno2 = ALLOCNO_HARD_REGNO (subloop_allocno)) < 0)
 		/* The register was spilled in the subloop.  If we spill
 		   it in the outer loop too then we'll no longer need to
diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index c5b1a131abd..8b87498f77f 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -314,6 +314,13 @@ struct ira_allocno
      vector where a bit with given index represents allocno with the
      same number.  */
   unsigned int conflict_vec_p : 1;
+  /* True if the parent loop has an allocno for the same register and
+     if the parent allocno's assignment might not be valid in this loop.
+     This means that we cannot merge this allocno and the parent allocno
+     together.
+
+     This is only ever true for non-cap allocnos.  */
+  unsigned int might_conflict_with_parent_p : 1;
   /* Hard register assigned to given allocno.  Negative value means
      that memory was allocated to the allocno.  During the reload,
      spilled allocno has value equal to the corresponding stack slot
@@ -423,6 +430,8 @@ struct ira_allocno
 #define ALLOCNO_CAP_MEMBER(A) ((A)->cap_member)
 #define ALLOCNO_NREFS(A) ((A)->nrefs)
 #define ALLOCNO_FREQ(A) ((A)->freq)
+#define ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P(A) \
+  ((A)->might_conflict_with_parent_p)
 #define ALLOCNO_HARD_REGNO(A) ((A)->hard_regno)
 #define ALLOCNO_CALL_FREQ(A) ((A)->call_freq)
 #define ALLOCNO_CALLS_CROSSED_NUM(A) ((A)->calls_crossed_num)
@@ -1623,4 +1632,32 @@ ira_subloop_allocnos_can_differ_p (ira_allocno_t a, bool allocated_p = true)
   return true;
 }
 
+/* Return true if we should treat A and SUBLOOP_A as belonging to a
+   single region.  */
+inline bool
+ira_single_region_allocno_p (ira_allocno_t a, ira_allocno_t subloop_a)
+{
+  if (flag_ira_region != IRA_REGION_MIXED)
+    return false;
+
+  if (ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (subloop_a))
+    return false;
+
+  auto rclass = ALLOCNO_CLASS (a);
+  auto pclass = ira_pressure_class_translate[rclass];
+  auto loop_used_regs = ALLOCNO_LOOP_TREE_NODE (a)->reg_pressure[pclass];
+  return loop_used_regs <= ira_class_hard_regs_num[pclass];
+}
+
+/* Return the set of all hard registers that conflict with A.  */
+inline HARD_REG_SET
+ira_total_conflict_hard_regs (ira_allocno_t a)
+{
+  auto obj_0 = ALLOCNO_OBJECT (a, 0);
+  HARD_REG_SET conflicts = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj_0);
+  for (int i = 1; i < ALLOCNO_NUM_OBJECTS (a); i++)
+    conflicts |= OBJECT_TOTAL_CONFLICT_HARD_REGS (ALLOCNO_OBJECT (a, i));
+  return conflicts;
+}
+
 #endif /* GCC_IRA_INT_H */
diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c
new file mode 100644
index 00000000000..d4d260c96b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c
@@ -0,0 +1,47 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#define PROB 0.1
+
+struct L
+{
+  int data;
+  volatile struct L *next;
+  volatile struct L *inner;
+};
+
+/* The thing we're testing here is that the !head->inner path of the outer loop
+   body has no stack accesses.  It's possible that we'll need to update this
+   pattern for unrelated code changes. but the test should be XFAILed rather
+   than changed if any new stack accesses occur on the !head->inner path.  */
+/*
+** foo:
+**	...
+**	ldr	(w[0-9]+), \[(x[0-9]+)\]
+**	add	(w[0-9]+), (?:\3, \1|\1, \3)
+**	ldr	(x[0-9]+), \[\2, #?16\]
+**	str	\3, \[\2\]
+**	ldr	\2, \[\2, #?8\]
+**	cbn?z	\4, .*
+**	...
+**	ret
+*/
+void
+foo (volatile struct L *head, int inc)
+{
+  while (head)
+    {
+      inc = head->data + inc;
+      volatile struct L *inner = head->inner;
+      head->data = inc;
+      head = head->next;
+      if (__builtin_expect_with_probability (inner != 0, 0, PROB))
+	for (int i = 0; i < 1000; ++i)
+	  /* Leave x30 for i.  */
+	  asm volatile ("// foo" :::
+			"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+			"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+			"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+			"x24", "x25", "x26", "x27", "x28");
+    }
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-01-10 14:47 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-10 14:47 [gcc r12-6414] ira: Try to avoid propagating conflicts Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).