public inbox for dwz@sourceware.org
 help / color / mirror / Atom feed
* [committed] Avoid unnecessary .debug_info section parsing
@ 2019-01-01  0:00 Tom de Vries
  0 siblings, 0 replies; only message in thread
From: Tom de Vries @ 2019-01-01  0:00 UTC (permalink / raw)
  To: dwz, jakub

Hi,

In regular mode, processing cc1 takes ~7s:
...
$ time.sh dwz cc1 -o 1 -lnone
maxmem:	1178008
real:	6.95
user:	6.19
sys:	0.75
...

OTOH, in low-mem mode, processing takes ~11.3s (~63% slower), but at a ~45%
lower memory usage:
...
$ time.sh dwz cc1 -o 1 -l0
maxmem:	655792
real:	11.37
user:	10.71
sys:	0.66
...

However, if we do not enforce either regular mode (through -lnone) or low-mem
mode (through -l0), we first try regular mode, until we run into the low-mem
limit, and then start over and continue in low-mem mode:
...
$ time.sh dwz cc1 -o 1
maxmem:	994704
real:	15.28
user:	14.31
sys:	0.96
...
This gives a longer execution time than enforced low-mem, and doesn't reduce
memory usage as much as enforced low-mem.  The default low-mem limit is 10
million, and the number of DIEs in cc1 is 10.2 million, so it takes a long
time in regular mode to find out that we're hitting the limit.

We might employ a strategy where we count the DIEs before entering either
low-mem or regular mode, but that itself again adds a penalty of ~5% in case
we don't run into the low-mem limit.

Use the estimated number of DIEs to determine whether we might run into the
low-mem limit, and if so, count the DIEs to make sure that that's the case,
and handle things accordingly.

This gives us an execution time and memory usage similar to enforced low-mem:
...
$ time.sh dwz cc1 -o 1
maxmem:	655256
real:	11.66
user:	11.01
sys:	0.64
...

Do the same for the max-die limit.

Committed to trunk.

Thanks,
- Tom

Avoid unnecessary .debug_info section parsing

2019-11-23  Tom de Vries  <tdevries@suse.de>

	* dwz.c (try_debug_info): New function.
	(read_debug_info): If estimated number of DIEs in .debug_info exceeds
	either low_mem_die_limit or max_die_limit, use try_debug_info to
	verify this.

---
 dwz.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/dwz.c b/dwz.c
index 28728fb..b80375d 100644
--- a/dwz.c
+++ b/dwz.c
@@ -4880,6 +4880,139 @@ collapse_children (dw_cu_ref cu, dw_die_ref die)
       }
 }
 
+/* Count the number of DIEs in the .debug_info section, and see if we run into
+   some limit.  */
+static int
+try_debug_info (DSO *dso)
+{
+  unsigned char *ptr, *endcu, *endsec;
+  unsigned int value;
+  htab_t abbrev = NULL;
+  unsigned int last_abbrev_offset = 0;
+  struct abbrev_tag tag, *t;
+  unsigned int ndies;
+  unsigned ret = 1;
+  int kind = DEBUG_INFO;
+
+  ndies = 0;
+  ptr = debug_sections[kind].data;
+  endsec = ptr + debug_sections[kind].size;
+  while (ptr < endsec)
+    {
+      unsigned int culen;
+      int cu_version;
+
+      if (ptr + (kind == DEBUG_TYPES ? 23 : 11) > endsec)
+	{
+	  error (0, 0, "%s: %s CU header too small", dso->filename,
+		 debug_sections[kind].name);
+	  goto fail;
+	}
+
+      endcu = ptr + 4;
+      culen = read_32 (ptr);
+      if (culen >= 0xfffffff0)
+	{
+	  error (0, 0, "%s: 64-bit DWARF not supported", dso->filename);
+	  goto fail;
+	}
+      endcu += culen;
+
+      if (endcu > endsec)
+	{
+	  error (0, 0, "%s: %s too small", dso->filename,
+		 debug_sections[kind].name);
+	  goto fail;
+	}
+
+      cu_version = read_16 (ptr);
+      if (cu_version < 2 || cu_version > 4)
+	{
+	  error (0, 0, "%s: DWARF version %d unhandled", dso->filename,
+		 cu_version);
+	  goto fail;
+	}
+
+      value = read_32 (ptr);
+      if (value >= debug_sections[DEBUG_ABBREV].size)
+	{
+	  if (debug_sections[DEBUG_ABBREV].data == NULL)
+	    error (0, 0, "%s: .debug_abbrev not present", dso->filename);
+	  else
+	    error (0, 0, "%s: DWARF CU abbrev offset too large",
+		   dso->filename);
+	  goto fail;
+	}
+
+      if (ptr_size == 0)
+	{
+	  ptr_size = read_8 (ptr);
+	  if (ptr_size != 4 && ptr_size != 8)
+	    {
+	      error (0, 0, "%s: Invalid DWARF pointer size %d",
+		     dso->filename, ptr_size);
+	      goto fail;
+	    }
+	}
+      else if (read_8 (ptr) != ptr_size)
+	{
+	  error (0, 0, "%s: DWARF pointer size differs between CUs",
+		 dso->filename);
+	  goto fail;
+	}
+
+      if (abbrev == NULL || value != last_abbrev_offset)
+	{
+	  if (abbrev)
+	    htab_delete (abbrev);
+	  abbrev
+	    = read_abbrev (dso, debug_sections[DEBUG_ABBREV].data + value);
+	  if (abbrev == NULL)
+	    goto fail;
+	}
+      last_abbrev_offset = value;
+
+      while (ptr < endcu)
+	{
+	  tag.entry = read_uleb128 (ptr);
+	  if (tag.entry == 0)
+	    continue;
+	  if (ndies == max_die_limit)
+	    {
+	      error (0, 0, "%s: Too many DIEs, not optimizing",
+		     dso->filename);
+	      goto fail;
+	    }
+	  /* If we reach the DIE limit, signal the dwz caller that it
+	     should retry with low_mem.  */
+	  if (likely (!low_mem) && ndies == low_mem_die_limit)
+	    {
+	      if (tracing)
+		fprintf (stderr, "Hit low-mem die-limit\n");
+	      ret = 2;
+	      goto fail;
+	    }
+	  ndies++;
+	  t = htab_find_with_hash (abbrev, &tag, tag.entry);
+	  if (t == NULL)
+	    {
+	      error (0, 0, "%s: Could not find DWARF abbreviation %d",
+		     dso->filename, tag.entry);
+	      goto fail;
+	    }
+	  ptr = skip_attrs_1 (cu_version, t, ptr);
+	}
+    }
+
+  ret = 0;
+
+ fail:
+  if (abbrev)
+    htab_delete (abbrev);
+
+  return ret;
+}
+
 /* First phase of the DWARF compression.  Parse .debug_info section
    (for kind == DEBUG_INFO) or .debug_types section (for kind == DEBUG_TYPES)
    for each CU in it construct internal representation for the CU
@@ -4908,6 +5041,16 @@ read_debug_info (DSO *dso, int kind)
   struct dw_cu cu_buf;
   struct dw_die die_buf;
 
+  unsigned int estimated_nr_dies = estimate_nr_dies ();
+  if (kind == DEBUG_INFO
+      && ((multifile_mode == 0 && estimated_nr_dies > max_die_limit)
+	  || (!low_mem && estimated_nr_dies > low_mem_die_limit)))
+    {
+      int try_ret = try_debug_info (dso);
+      if (try_ret != 0)
+	return try_ret;
+    }
+
   if (likely (!fi_multifile && kind != DEBUG_TYPES))
     {
       dup_htab = htab_try_create (100000, die_hash, die_eq, NULL);

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2019-11-24 21:48 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-01  0:00 [committed] Avoid unnecessary .debug_info section parsing Tom de Vries

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).