* [committed] Reimplement seen flag of struct import_cu as bitvector
@ 2019-01-01 0:00 Tom de Vries
0 siblings, 0 replies; only message in thread
From: Tom de Vries @ 2019-01-01 0:00 UTC (permalink / raw)
To: dwz, jakub
Hi,
The function create_import_tree has a hot loop in phase 3:
...
for (icu = ipu->next; icu; icu = icu->next)
icu->seen = false;
...
Speed up this loop by allocating the seen field as a bitvector, and doing the
clearing using memset.
This gives a 15% speedup on vmlinux:
...
real: mean: 16772.40 100.00% stddev: 478.95
mean: 14112.20 84.14% stddev: 117.10
user: mean: 16220.00 100.00% stddev: 114.17
mean: 13767.40 84.88% stddev: 115.58
sys: mean: 406.40 100.00% stddev: 173.96
mean: 343.20 84.45% stddev: 41.90
...
More specifically, using --devel-progress, we see 'create_import_tree phase 3'
go from:
...
user: 3.15
...
to:
...
user: 0.64
...
Committed to trunk.
Thanks,
- Tom
Reimplement seen flag of struct import_cu as bitvector
2019-12-18 Tom de Vries <tdevries@suse.de>
PR dwz/25276
* dwz.c (MAX, MIN): New macro.
(struct import_cu): Remove seen field.
(BITVECTOR_TYPE): Define.
(bitvector_alloc, bitvector_set_bit, bitvector_bit_p)
(bitvector_clear_bits): New function.
(create_import_tree): Allocate seen as bitvector, and handle
accordingly.
---
dwz.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 60 insertions(+), 7 deletions(-)
diff --git a/dwz.c b/dwz.c
index 524cd6f..2f1f59f 100644
--- a/dwz.c
+++ b/dwz.c
@@ -129,6 +129,8 @@
/* Utility macro. */
#define IMPLIES(A, B) (!((A) && !(B)))
+#define MAX(A, B) ((A) > (B) ? (A) : (B))
+#define MIN(A, B) ((A) < (B) ? (A) : (B))
static void
@@ -6480,9 +6482,6 @@ struct import_cu
edges at the start), then optionally any PUs
created by create_import_tree. */
unsigned int idx;
- /* Flag used during PU merging, set for PUs already considered
- for merging for the given first PU. */
- bool seen;
};
/* An edge in a linked list. */
@@ -6852,6 +6851,48 @@ verify_edges (struct import_cu **ipus, unsigned int npus, unsigned int ncus,
assert (ic == oc);
}
+#define BITVECTOR_TYPE unsigned int
+
+/* Return a bitvector containing NBITS bits. */
+static inline BITVECTOR_TYPE *
+bitvector_alloc (unsigned nbits)
+{
+ size_t nbytes = (nbits / 8) + 1;
+ size_t size = nbytes + sizeof (BITVECTOR_TYPE);
+ BITVECTOR_TYPE *res = (BITVECTOR_TYPE *)malloc (size);
+ if (res == NULL)
+ dwz_oom ();
+ memset (res, 0, size);
+ return res;
+}
+
+/* Set bit IDX in bitvector VECTOR. */
+static inline void FORCE_INLINE
+bitvector_set_bit (BITVECTOR_TYPE *vector, unsigned idx)
+{
+ unsigned div = idx / (sizeof (BITVECTOR_TYPE) * 8);
+ unsigned mod = idx % (sizeof (BITVECTOR_TYPE) * 8);
+ vector[div] |= (1 << mod);
+}
+
+/* Test bit IDX in bitvector VECTOR. */
+static inline bool FORCE_INLINE
+bitvector_bit_p (BITVECTOR_TYPE *vector, unsigned idx)
+{
+ unsigned div = idx / (sizeof (BITVECTOR_TYPE) * 8);
+ unsigned mod = idx % (sizeof (BITVECTOR_TYPE) * 8);
+ return (vector[div] & (1 << mod)) != 0;
+}
+
+/* Clear at least bits [A, B] in VECTOR, possibly more. */
+static inline void FORCE_INLINE
+bitvector_clear_bits (BITVECTOR_TYPE *vector, unsigned int a, unsigned int b)
+{
+ unsigned int range_min = a / (sizeof (BITVECTOR_TYPE) * 8);
+ unsigned int range_max = b / (sizeof (BITVECTOR_TYPE) * 8);
+ memset (&vector[range_min], 0, range_max - range_min + 1);
+}
+
/* Function to optimize the size of DW_TAG_imported_unit DIEs by
creating an inclusion tree, instead of each CU importing all
PUs it needs directly, by optionally creating new PUs or
@@ -7294,6 +7335,11 @@ create_import_tree (void)
v v v
CU_3----->PU_4 CU_3------>PU_4
*/
+ /* Flag used during PU merging, set for PUs already considered
+ for merging for the given first PU. */
+ BITVECTOR_TYPE *seen = bitvector_alloc (puidx);
+ unsigned int min_seen = UINT_MAX;
+ unsigned int max_seen = 0;
for (ipu = ipus[0]; ipu; ipu = ipu->next)
{
struct import_edge *e1, *e2, *e3, *e4, **e1p, **ep, *prev;
@@ -7312,9 +7358,11 @@ create_import_tree (void)
bool maybe_superset;
unsigned int intersection;
- if (ipu2->idx <= ipu->idx || ipu2->seen)
+ if (ipu2->idx <= ipu->idx || bitvector_bit_p (seen, ipu2->idx))
continue;
- ipu2->seen = true;
+ bitvector_set_bit (seen, ipu2->idx);
+ min_seen = MIN (min_seen, ipu2->idx);
+ max_seen = MAX (max_seen, ipu2->idx);
maybe_subset = (e1 == ipu->incoming
&& ipu->incoming_count <= ipu2->incoming_count);
maybe_superset = ipu->incoming_count >= ipu2->incoming_count;
@@ -7540,8 +7588,12 @@ create_import_tree (void)
}
}
}
- for (icu = ipu->next; icu; icu = icu->next)
- icu->seen = false;
+ if (min_seen <= max_seen)
+ {
+ bitvector_clear_bits (seen, min_seen, max_seen);
+ min_seen = UINT_MAX;
+ max_seen = 0;
+ }
}
if (unlikely (dump_edges_p))
dump_edges ("phase 3", ipus, npus, ncus);
@@ -7629,6 +7681,7 @@ create_import_tree (void)
for (cu = alt_first_cu; cu; cu = cu->cu_next)
cu->u1.cu_icu = NULL;
obstack_free (&ob2, to_free);
+ free (seen);
return 0;
}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2019-12-17 23:33 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-01 0:00 [committed] Reimplement seen flag of struct import_cu as bitvector Tom de Vries
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).