From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1666) id 11DD93858424; Wed, 2 Mar 2022 14:09:09 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 11DD93858424 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Richard Biener To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-7451] rtl-optimization/104686 - speedup IRA allocno conflict test X-Act-Checkin: gcc X-Git-Author: Richard Biener X-Git-Refname: refs/heads/master X-Git-Oldrev: ced22c51baaa3fe84d14d5baef60c4440a35b4be X-Git-Newrev: 8fede2876a751d53a28442dcca32466daa929daa Message-Id: <20220302140909.11DD93858424@sourceware.org> Date: Wed, 2 Mar 2022 14:09:09 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 02 Mar 2022 14:09:09 -0000 https://gcc.gnu.org/g:8fede2876a751d53a28442dcca32466daa929daa commit r12-7451-g8fede2876a751d53a28442dcca32466daa929daa Author: Richard Biener Date: Wed Mar 2 08:55:58 2022 +0100 rtl-optimization/104686 - speedup IRA allocno conflict test In this PR allocnos_conflict_p takes 90% of the compile-time via the calls from update_conflict_hard_regno_costs. This is due to the high number of conflicts recorded in the dense bitvector representation. Fortunately we can take advantage of the bitvector representation here and turn the O(n) conflict test into an O(1) one, greatly speeding up the compile of the testcase from 39s to just 4s (93% IRA time to 26% IRA time). While for the testcase in question the first allocno is almost always the nice one the patch tries a more systematic approach to finding the allocno to iterate object conflicts over. That does reduce the actual number of compares for the testcase but it doesn't make a measurable difference wall-clock wise. That's not guaranteed though I think so I've kept this systematic way of choosing the cheapest allocno. 2022-03-02 Richard Biener PR rtl-optimization/104686 * ira-color.cc (object_conflicts_with_allocno_p): New function using a bitvector test instead of iterating when possible. (allocnos_conflict_p): Choose the best allocno to iterate over object conflicts. (update_conflict_hard_regno_costs): Do allocnos_conflict_p test last. Diff: --- gcc/ira-color.cc | 75 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 57 insertions(+), 18 deletions(-) diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc index 8b6db1bb417..e01d1841a08 100644 --- a/gcc/ira-color.cc +++ b/gcc/ira-color.cc @@ -1338,26 +1338,65 @@ update_allocno_cost (ira_allocno_t allocno, int hard_regno, return true; } +/* Return TRUE if the object OBJ conflicts with the allocno A. */ +static bool +object_conflicts_with_allocno_p (ira_object_t obj, ira_allocno_t a) +{ + if (!OBJECT_CONFLICT_VEC_P (obj)) + for (int word = 0; word < ALLOCNO_NUM_OBJECTS (a); word++) + { + ira_object_t another_obj = ALLOCNO_OBJECT (a, word); + if (OBJECT_CONFLICT_ID (another_obj) >= OBJECT_MIN (obj) + && OBJECT_CONFLICT_ID (another_obj) <= OBJECT_MAX (obj) + && TEST_MINMAX_SET_BIT (OBJECT_CONFLICT_BITVEC (obj), + OBJECT_CONFLICT_ID (another_obj), + OBJECT_MIN (obj), OBJECT_MAX (obj))) + return true; + } + else + { + /* If this linear walk ever becomes a bottleneck we could add a + conflict_vec_sorted_p flag and if not set, sort the conflicts after + their ID so we can use a binary search. That would also require + tracking the actual number of conflicts in the vector to not rely + on the NULL termination. */ + ira_object_conflict_iterator oci; + ira_object_t conflict_obj; + FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci) + if (OBJECT_ALLOCNO (conflict_obj) == a) + return true; + } + return false; +} + /* Return TRUE if allocnos A1 and A2 conflicts. Here we are - interesting only in conflicts of allocnos with intersected allocno - classes. */ + interested only in conflicts of allocnos with intersecting allocno + classes. */ static bool allocnos_conflict_p (ira_allocno_t a1, ira_allocno_t a2) { - ira_object_t obj, conflict_obj; - ira_object_conflict_iterator oci; - int word, nwords = ALLOCNO_NUM_OBJECTS (a1); - - for (word = 0; word < nwords; word++) + /* Compute the upper bound for the linear iteration when the object + conflicts are represented as a sparse vector. In particular this + will make sure we prefer O(1) bitvector testing. */ + int num_conflicts_in_vec1 = 0, num_conflicts_in_vec2 = 0; + for (int word = 0; word < ALLOCNO_NUM_OBJECTS (a1); ++word) + if (OBJECT_CONFLICT_VEC_P (ALLOCNO_OBJECT (a1, word))) + num_conflicts_in_vec1 += OBJECT_NUM_CONFLICTS (ALLOCNO_OBJECT (a1, word)); + for (int word = 0; word < ALLOCNO_NUM_OBJECTS (a2); ++word) + if (OBJECT_CONFLICT_VEC_P (ALLOCNO_OBJECT (a2, word))) + num_conflicts_in_vec2 += OBJECT_NUM_CONFLICTS (ALLOCNO_OBJECT (a2, word)); + if (num_conflicts_in_vec2 < num_conflicts_in_vec1) + std::swap (a1, a2); + + for (int word = 0; word < ALLOCNO_NUM_OBJECTS (a1); word++) { - obj = ALLOCNO_OBJECT (a1, word); + ira_object_t obj = ALLOCNO_OBJECT (a1, word); /* Take preferences of conflicting allocnos into account. */ - FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci) - if (OBJECT_ALLOCNO (conflict_obj) == a2) - return true; + if (object_conflicts_with_allocno_p (obj, a2)) + return true; } return false; -} +} /* Update (decrease if DECR_P) HARD_REGNO cost of allocnos connected by copies to ALLOCNO to increase chances to remove some copies as @@ -1572,15 +1611,15 @@ update_conflict_hard_regno_costs (int *costs, enum reg_class aclass, else gcc_unreachable (); + another_aclass = ALLOCNO_CLASS (another_allocno); if (another_allocno == from - || allocnos_conflict_p (another_allocno, start)) - continue; - - another_aclass = ALLOCNO_CLASS (another_allocno); - if (! ira_reg_classes_intersect_p[aclass][another_aclass] || ALLOCNO_ASSIGNED_P (another_allocno) - || ALLOCNO_COLOR_DATA (another_allocno)->may_be_spilled_p) + || ALLOCNO_COLOR_DATA (another_allocno)->may_be_spilled_p + || ! ira_reg_classes_intersect_p[aclass][another_aclass]) + continue; + if (allocnos_conflict_p (another_allocno, start)) continue; + class_size = ira_class_hard_regs_num[another_aclass]; ira_allocate_and_copy_costs (&ALLOCNO_UPDATED_CONFLICT_HARD_REG_COSTS (another_allocno),