From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 110426 invoked by alias); 17 Dec 2019 23:33:19 -0000 Mailing-List: contact dwz-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Post: List-Help: List-Subscribe: Sender: dwz-owner@sourceware.org Received: (qmail 110417 invoked by uid 89); 17 Dec 2019 23:33:19 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Checked: by ClamAV 0.100.3 on sourceware.org X-Virus-Found: No X-Spam-SWARE-Status: No, score=-25.2 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_PASS autolearn=ham version=3.3.1 spammy=11710 X-Spam-Status: No, score=-25.2 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_PASS autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on sourceware.org X-Spam-Level: X-HELO: mx2.suse.de X-Virus-Scanned: by amavisd-new at test-mx.suse.de Date: Tue, 01 Jan 2019 00:00:00 -0000 From: Tom de Vries To: dwz@sourceware.org, jakub@redhat.com Subject: [committed] Reimplement seen flag of struct import_cu as bitvector Message-ID: <20191217233311.GA27150@delia> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.10.1 (2018-07-13) X-SW-Source: 2019-q4/txt/msg00174.txt.bz2 Hi, The function create_import_tree has a hot loop in phase 3: ... for (icu = ipu->next; icu; icu = icu->next) icu->seen = false; ... Speed up this loop by allocating the seen field as a bitvector, and doing the clearing using memset. This gives a 15% speedup on vmlinux: ... real: mean: 16772.40 100.00% stddev: 478.95 mean: 14112.20 84.14% stddev: 117.10 user: mean: 16220.00 100.00% stddev: 114.17 mean: 13767.40 84.88% stddev: 115.58 sys: mean: 406.40 100.00% stddev: 173.96 mean: 343.20 84.45% stddev: 41.90 ... More specifically, using --devel-progress, we see 'create_import_tree phase 3' go from: ... user: 3.15 ... to: ... user: 0.64 ... Committed to trunk. Thanks, - Tom Reimplement seen flag of struct import_cu as bitvector 2019-12-18 Tom de Vries PR dwz/25276 * dwz.c (MAX, MIN): New macro. (struct import_cu): Remove seen field. (BITVECTOR_TYPE): Define. (bitvector_alloc, bitvector_set_bit, bitvector_bit_p) (bitvector_clear_bits): New function. (create_import_tree): Allocate seen as bitvector, and handle accordingly. --- dwz.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/dwz.c b/dwz.c index 524cd6f..2f1f59f 100644 --- a/dwz.c +++ b/dwz.c @@ -129,6 +129,8 @@ /* Utility macro. */ #define IMPLIES(A, B) (!((A) && !(B))) +#define MAX(A, B) ((A) > (B) ? (A) : (B)) +#define MIN(A, B) ((A) < (B) ? (A) : (B)) static void @@ -6480,9 +6482,6 @@ struct import_cu edges at the start), then optionally any PUs created by create_import_tree. */ unsigned int idx; - /* Flag used during PU merging, set for PUs already considered - for merging for the given first PU. */ - bool seen; }; /* An edge in a linked list. */ @@ -6852,6 +6851,48 @@ verify_edges (struct import_cu **ipus, unsigned int npus, unsigned int ncus, assert (ic == oc); } +#define BITVECTOR_TYPE unsigned int + +/* Return a bitvector containing NBITS bits. */ +static inline BITVECTOR_TYPE * +bitvector_alloc (unsigned nbits) +{ + size_t nbytes = (nbits / 8) + 1; + size_t size = nbytes + sizeof (BITVECTOR_TYPE); + BITVECTOR_TYPE *res = (BITVECTOR_TYPE *)malloc (size); + if (res == NULL) + dwz_oom (); + memset (res, 0, size); + return res; +} + +/* Set bit IDX in bitvector VECTOR. */ +static inline void FORCE_INLINE +bitvector_set_bit (BITVECTOR_TYPE *vector, unsigned idx) +{ + unsigned div = idx / (sizeof (BITVECTOR_TYPE) * 8); + unsigned mod = idx % (sizeof (BITVECTOR_TYPE) * 8); + vector[div] |= (1 << mod); +} + +/* Test bit IDX in bitvector VECTOR. */ +static inline bool FORCE_INLINE +bitvector_bit_p (BITVECTOR_TYPE *vector, unsigned idx) +{ + unsigned div = idx / (sizeof (BITVECTOR_TYPE) * 8); + unsigned mod = idx % (sizeof (BITVECTOR_TYPE) * 8); + return (vector[div] & (1 << mod)) != 0; +} + +/* Clear at least bits [A, B] in VECTOR, possibly more. */ +static inline void FORCE_INLINE +bitvector_clear_bits (BITVECTOR_TYPE *vector, unsigned int a, unsigned int b) +{ + unsigned int range_min = a / (sizeof (BITVECTOR_TYPE) * 8); + unsigned int range_max = b / (sizeof (BITVECTOR_TYPE) * 8); + memset (&vector[range_min], 0, range_max - range_min + 1); +} + /* Function to optimize the size of DW_TAG_imported_unit DIEs by creating an inclusion tree, instead of each CU importing all PUs it needs directly, by optionally creating new PUs or @@ -7294,6 +7335,11 @@ create_import_tree (void) v v v CU_3----->PU_4 CU_3------>PU_4 */ + /* Flag used during PU merging, set for PUs already considered + for merging for the given first PU. */ + BITVECTOR_TYPE *seen = bitvector_alloc (puidx); + unsigned int min_seen = UINT_MAX; + unsigned int max_seen = 0; for (ipu = ipus[0]; ipu; ipu = ipu->next) { struct import_edge *e1, *e2, *e3, *e4, **e1p, **ep, *prev; @@ -7312,9 +7358,11 @@ create_import_tree (void) bool maybe_superset; unsigned int intersection; - if (ipu2->idx <= ipu->idx || ipu2->seen) + if (ipu2->idx <= ipu->idx || bitvector_bit_p (seen, ipu2->idx)) continue; - ipu2->seen = true; + bitvector_set_bit (seen, ipu2->idx); + min_seen = MIN (min_seen, ipu2->idx); + max_seen = MAX (max_seen, ipu2->idx); maybe_subset = (e1 == ipu->incoming && ipu->incoming_count <= ipu2->incoming_count); maybe_superset = ipu->incoming_count >= ipu2->incoming_count; @@ -7540,8 +7588,12 @@ create_import_tree (void) } } } - for (icu = ipu->next; icu; icu = icu->next) - icu->seen = false; + if (min_seen <= max_seen) + { + bitvector_clear_bits (seen, min_seen, max_seen); + min_seen = UINT_MAX; + max_seen = 0; + } } if (unlikely (dump_edges_p)) dump_edges ("phase 3", ipus, npus, ncus); @@ -7629,6 +7681,7 @@ create_import_tree (void) for (cu = alt_first_cu; cu; cu = cu->cu_next) cu->u1.cu_icu = NULL; obstack_free (&ob2, to_free); + free (seen); return 0; }