[gcc r12-7451] rtl-optimization/104686 - speedup IRA allocno conflict test

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc r12-7451] rtl-optimization/104686 - speedup IRA allocno conflict test
@ 2022-03-02 14:09 Richard Biener
  0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2022-03-02 14:09 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8fede2876a751d53a28442dcca32466daa929daa

commit r12-7451-g8fede2876a751d53a28442dcca32466daa929daa
Author: Richard Biener <rguenther@suse.de>
Date:   Wed Mar 2 08:55:58 2022 +0100

    rtl-optimization/104686 - speedup IRA allocno conflict test
    
    In this PR allocnos_conflict_p takes 90% of the compile-time via
    the calls from update_conflict_hard_regno_costs.  This is due to
    the high number of conflicts recorded in the dense bitvector
    representation.  Fortunately we can take advantage of the bitvector
    representation here and turn the O(n) conflict test into an O(1) one,
    greatly speeding up the compile of the testcase from 39s to just 4s
    (93% IRA time to 26% IRA time).
    
    While for the testcase in question the first allocno is almost always
    the nice one the patch tries a more systematic approach to finding
    the allocno to iterate object conflicts over.  That does reduce
    the actual number of compares for the testcase but it doesn't make
    a measurable difference wall-clock wise.  That's not guaranteed
    though I think so I've kept this systematic way of choosing the
    cheapest allocno.
    
    2022-03-02  Richard Biener  <rguenther@suse.de>
    
            PR rtl-optimization/104686
            * ira-color.cc (object_conflicts_with_allocno_p): New function
            using a bitvector test instead of iterating when possible.
            (allocnos_conflict_p): Choose the best allocno to iterate over
            object conflicts.
            (update_conflict_hard_regno_costs): Do allocnos_conflict_p test
            last.

Diff:
---
 gcc/ira-color.cc | 75 ++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 57 insertions(+), 18 deletions(-)

diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc
index 8b6db1bb417..e01d1841a08 100644
--- a/gcc/ira-color.cc
+++ b/gcc/ira-color.cc
@@ -1338,26 +1338,65 @@ update_allocno_cost (ira_allocno_t allocno, int hard_regno,
   return true;
 }
 
+/* Return TRUE if the object OBJ conflicts with the allocno A.  */
+static bool
+object_conflicts_with_allocno_p (ira_object_t obj, ira_allocno_t a)
+{
+  if  (!OBJECT_CONFLICT_VEC_P (obj))
+    for (int word = 0; word < ALLOCNO_NUM_OBJECTS (a); word++)
+      {
+	ira_object_t another_obj = ALLOCNO_OBJECT (a, word);
+	if (OBJECT_CONFLICT_ID (another_obj) >= OBJECT_MIN (obj)
+	    && OBJECT_CONFLICT_ID (another_obj) <= OBJECT_MAX (obj)
+	    && TEST_MINMAX_SET_BIT (OBJECT_CONFLICT_BITVEC (obj),
+				    OBJECT_CONFLICT_ID (another_obj),
+				    OBJECT_MIN (obj), OBJECT_MAX (obj)))
+	  return true;
+      }
+  else
+    {
+      /* If this linear walk ever becomes a bottleneck we could add a
+	 conflict_vec_sorted_p flag and if not set, sort the conflicts after
+	 their ID so we can use a binary search.  That would also require
+	 tracking the actual number of conflicts in the vector to not rely
+	 on the NULL termination.  */
+      ira_object_conflict_iterator oci;
+      ira_object_t conflict_obj;
+      FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci)
+	if (OBJECT_ALLOCNO (conflict_obj) == a)
+	  return true;
+    }
+  return false;
+}
+
 /* Return TRUE if allocnos A1 and A2 conflicts. Here we are
-   interesting only in conflicts of allocnos with intersected allocno
-   classes. */
+   interested only in conflicts of allocnos with intersecting allocno
+   classes.  */
 static bool
 allocnos_conflict_p (ira_allocno_t a1, ira_allocno_t a2)
 {
-  ira_object_t obj, conflict_obj;
-  ira_object_conflict_iterator oci;
-  int word, nwords = ALLOCNO_NUM_OBJECTS (a1);
-  
-  for (word = 0; word < nwords; word++)
+  /* Compute the upper bound for the linear iteration when the object
+     conflicts are represented as a sparse vector.  In particular this
+     will make sure we prefer O(1) bitvector testing.  */
+  int num_conflicts_in_vec1 = 0, num_conflicts_in_vec2 = 0;
+  for (int word = 0; word < ALLOCNO_NUM_OBJECTS (a1); ++word)
+    if (OBJECT_CONFLICT_VEC_P (ALLOCNO_OBJECT (a1, word)))
+      num_conflicts_in_vec1 += OBJECT_NUM_CONFLICTS (ALLOCNO_OBJECT (a1, word));
+  for (int word = 0; word < ALLOCNO_NUM_OBJECTS (a2); ++word)
+    if (OBJECT_CONFLICT_VEC_P (ALLOCNO_OBJECT (a2, word)))
+      num_conflicts_in_vec2 += OBJECT_NUM_CONFLICTS (ALLOCNO_OBJECT (a2, word));
+  if (num_conflicts_in_vec2 < num_conflicts_in_vec1)
+    std::swap (a1, a2);
+
+  for (int word = 0; word < ALLOCNO_NUM_OBJECTS (a1); word++)
     {
-      obj = ALLOCNO_OBJECT (a1, word);
+      ira_object_t obj = ALLOCNO_OBJECT (a1, word);
       /* Take preferences of conflicting allocnos into account.  */
-      FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci)
-	if (OBJECT_ALLOCNO (conflict_obj) == a2)
-	  return true;
+      if (object_conflicts_with_allocno_p (obj, a2))
+	return true;
     }
   return false;
-}  
+}
 
 /* Update (decrease if DECR_P) HARD_REGNO cost of allocnos connected
    by copies to ALLOCNO to increase chances to remove some copies as
@@ -1572,15 +1611,15 @@ update_conflict_hard_regno_costs (int *costs, enum reg_class aclass,
 	else
 	  gcc_unreachable ();
 
+	another_aclass = ALLOCNO_CLASS (another_allocno);
 	if (another_allocno == from
-	    || allocnos_conflict_p (another_allocno, start))
-	  continue;
-
- 	another_aclass = ALLOCNO_CLASS (another_allocno);
- 	if (! ira_reg_classes_intersect_p[aclass][another_aclass]
 	    || ALLOCNO_ASSIGNED_P (another_allocno)
-	    || ALLOCNO_COLOR_DATA (another_allocno)->may_be_spilled_p)
+	    || ALLOCNO_COLOR_DATA (another_allocno)->may_be_spilled_p
+	    || ! ira_reg_classes_intersect_p[aclass][another_aclass])
+	  continue;
+	if (allocnos_conflict_p (another_allocno, start))
 	  continue;
+
 	class_size = ira_class_hard_regs_num[another_aclass];
 	ira_allocate_and_copy_costs
 	  (&ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (another_allocno),


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-03-02 14:09 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-02 14:09 [gcc r12-7451] rtl-optimization/104686 - speedup IRA allocno conflict test Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).